Compare commits

..

3 Commits

Author SHA1 Message Date
Christopher Phillips
9609ce2b36
chore: remove test-binary
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-11-13 00:46:38 -05:00
Christopher Phillips
56761cee6f
fix: raise model version on package
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-11-13 00:44:19 -05:00
Christopher Phillips
b80592f735
chore: pr comments
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-11-13 00:32:08 -05:00
10 changed files with 67 additions and 100 deletions

View File

@ -1443,10 +1443,6 @@
"type": "string", "type": "string",
"description": "ModelName is the name of the model (from general.name or filename)" "description": "ModelName is the name of the model (from general.name or filename)"
}, },
"modelVersion": {
"type": "string",
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
},
"fileSize": { "fileSize": {
"type": "integer", "type": "integer",
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)" "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"

View File

@ -1443,10 +1443,6 @@
"type": "string", "type": "string",
"description": "ModelName is the name of the model (from general.name or filename)" "description": "ModelName is the name of the model (from general.name or filename)"
}, },
"modelVersion": {
"type": "string",
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
},
"fileSize": { "fileSize": {
"type": "integer", "type": "integer",
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)" "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"

BIN
syft-test

Binary file not shown.

View File

@ -6,7 +6,6 @@ import (
"testing" "testing"
"github.com/google/go-cmp/cmp/cmpopts" "github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -14,6 +13,28 @@ import (
) )
func TestGGUFCataloger_Globs(t *testing.T) { func TestGGUFCataloger_Globs(t *testing.T) {
tests := []struct {
name string
fixture string
expected []string
}{
{
name: "obtain gguf files",
fixture: "test-fixtures/glob-paths",
expected: []string{
"models/model.gguf",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewGGUFCataloger())
})
}
} }
func TestGGUFCataloger_Integration(t *testing.T) { func TestGGUFCataloger_Integration(t *testing.T) {
@ -50,15 +71,14 @@ func TestGGUFCataloger_Integration(t *testing.T) {
pkg.NewLicenseFromFields("Apache-2.0", "", nil), pkg.NewLicenseFromFields("Apache-2.0", "", nil),
), ),
Metadata: pkg.GGUFFileHeader{ Metadata: pkg.GGUFFileHeader{
ModelName: "llama3-8b", ModelName: "llama3-8b",
ModelVersion: "3.0", License: "Apache-2.0",
License: "Apache-2.0", Architecture: "llama",
Architecture: "llama", Quantization: "Unknown",
Quantization: "Unknown", Parameters: 0,
Parameters: 0, GGUFVersion: 3,
GGUFVersion: 3, TensorCount: 0,
TensorCount: 0, Header: map[string]interface{}{},
Header: map[string]interface{}{},
}, },
}, },
}, },
@ -77,16 +97,11 @@ func TestGGUFCataloger_Integration(t *testing.T) {
IgnoreLocationLayer(). IgnoreLocationLayer().
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
WithCompareOptions( WithCompareOptions(
// Ignore Hash as it's computed dynamically // Ignore MetadataHash as it's computed dynamically
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"), cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"),
) )
tester.TestCataloger(t, NewGGUFCataloger()) tester.TestCataloger(t, NewGGUFCataloger())
}) })
} }
} }
func TestGGUFCataloger_Name(t *testing.T) {
cataloger := NewGGUFCataloger()
assert.Equal(t, "gguf-cataloger", cataloger.Name())
}

View File

@ -1,25 +1,14 @@
package ai package ai
import ( import (
"encoding/json"
"fmt"
"github.com/cespare/xxhash/v2"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package { func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...file.Location) pkg.Package {
// Compute hash if not already set
if metadata.Hash == "" {
metadata.Hash = computeMetadataHash(metadata)
}
p := pkg.Package{ p := pkg.Package{
Name: metadata.ModelName, Name: metadata.ModelName,
Version: metadata.ModelVersion, Version: version,
Locations: file.NewLocationSet(locations...), Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg, Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(), Licenses: pkg.NewLicenseSet(),
@ -37,33 +26,3 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pk
return p return p
} }
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
// Create a stable representation of the metadata
hashData := struct {
Format string
Name string
Version string
Architecture string
GGUFVersion uint32
TensorCount uint64
}{
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Architecture: metadata.Architecture,
GGUFVersion: metadata.GGUFVersion,
TensorCount: metadata.TensorCount,
}
// Marshal to JSON for stable hashing
jsonBytes, err := json.Marshal(hashData)
if err != nil {
log.Debugf("failed to marshal metadata for hashing: %v", err)
return ""
}
// Compute xxhash
hash := xxhash.Sum64(jsonBytes)
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
}

View File

@ -15,21 +15,22 @@ func TestNewGGUFPackage(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
metadata *pkg.GGUFFileHeader metadata *pkg.GGUFFileHeader
version string
locations []file.Location locations []file.Location
checkFunc func(t *testing.T, p pkg.Package) checkFunc func(t *testing.T, p pkg.Package)
}{ }{
{ {
name: "complete GGUF package with all fields", name: "complete GGUF package with all fields",
version: "3.0",
metadata: &pkg.GGUFFileHeader{ metadata: &pkg.GGUFFileHeader{
ModelName: "llama3-8b-instruct", ModelName: "llama3-8b-instruct",
ModelVersion: "3.0", License: "Apache-2.0",
License: "Apache-2.0", Architecture: "llama",
Architecture: "llama", Quantization: "Q4_K_M",
Quantization: "Q4_K_M", Parameters: 8030000000,
Parameters: 8030000000, GGUFVersion: 3,
GGUFVersion: 3, TensorCount: 291,
TensorCount: 291, Header: map[string]any{},
Header: map[string]any{},
}, },
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")}, locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
checkFunc: func(t *testing.T, p pkg.Package) { checkFunc: func(t *testing.T, p pkg.Package) {
@ -51,10 +52,10 @@ func TestNewGGUFPackage(t *testing.T) {
}, },
}, },
{ {
name: "minimal GGUF package", name: "minimal GGUF package",
version: "1.0",
metadata: &pkg.GGUFFileHeader{ metadata: &pkg.GGUFFileHeader{
ModelName: "simple-model", ModelName: "simple-model",
ModelVersion: "1.0",
Architecture: "gpt2", Architecture: "gpt2",
GGUFVersion: 3, GGUFVersion: 3,
TensorCount: 50, TensorCount: 50,
@ -75,10 +76,10 @@ func TestNewGGUFPackage(t *testing.T) {
}, },
}, },
{ {
name: "GGUF package with multiple locations", name: "GGUF package with multiple locations",
version: "1.5",
metadata: &pkg.GGUFFileHeader{ metadata: &pkg.GGUFFileHeader{
ModelName: "multi-location-model", ModelName: "multi-location-model",
ModelVersion: "1.5",
Architecture: "llama", Architecture: "llama",
GGUFVersion: 3, GGUFVersion: 3,
TensorCount: 150, TensorCount: 150,
@ -95,12 +96,12 @@ func TestNewGGUFPackage(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
p := newGGUFPackage(tt.metadata, tt.locations...) p := newGGUFPackage(tt.metadata, tt.version, tt.locations...)
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" { if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d) t.Errorf("Name mismatch (-want +got):\n%s", d)
} }
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" { if d := cmp.Diff(tt.version, p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d) t.Errorf("Version mismatch (-want +got):\n%s", d)
} }
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" { if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {

View File

@ -62,15 +62,17 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
// Extract metadata // Extract metadata
metadata := ggufFile.Metadata() metadata := ggufFile.Metadata()
// Extract version separately (will be set on Package.Version)
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
// Convert to syft metadata structure // Convert to syft metadata structure
syftMetadata := &pkg.GGUFFileHeader{ syftMetadata := &pkg.GGUFFileHeader{
ModelName: metadata.Name, ModelName: metadata.Name,
ModelVersion: extractVersion(ggufFile.Header.MetadataKV), License: metadata.License,
License: metadata.License, Architecture: metadata.Architecture,
Architecture: metadata.Architecture, Quantization: metadata.FileTypeDescriptor,
Quantization: metadata.FileTypeDescriptor, Parameters: uint64(metadata.Parameters),
Parameters: uint64(metadata.Parameters), GGUFVersion: uint32(ggufFile.Header.Version),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount, TensorCount: ggufFile.Header.TensorCount,
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV), Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV), MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
@ -84,6 +86,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
// Create package from metadata // Create package from metadata
p := newGGUFPackage( p := newGGUFPackage(
syftMetadata, syftMetadata,
modelVersion,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
) )

View File

@ -3,7 +3,7 @@ package ai
import ( import (
"fmt" "fmt"
"os" "os"
gguf_parser "github.com/gpustack/gguf-parser-go" gguf_parser "github.com/gpustack/gguf-parser-go"
) )
@ -14,21 +14,21 @@ func main() {
withStringKV("general.architecture", "llama"). withStringKV("general.architecture", "llama").
withStringKV("general.name", "test-model"). withStringKV("general.name", "test-model").
build() build()
// Write to temp file // Write to temp file
tempFile, err := os.CreateTemp("", "test-*.gguf") tempFile, err := os.CreateTemp("", "test-*.gguf")
if err != nil { if err != nil {
panic(err) panic(err)
} }
defer os.Remove(tempFile.Name()) defer os.Remove(tempFile.Name())
if _, err := tempFile.Write(data); err != nil { if _, err := tempFile.Write(data); err != nil {
panic(err) panic(err)
} }
tempFile.Close() tempFile.Close()
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name()) fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
// Try to parse it // Try to parse it
fmt.Println("Attempting to parse...") fmt.Println("Attempting to parse...")
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata()) gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
@ -36,6 +36,6 @@ func main() {
fmt.Printf("Parse error: %v\n", err) fmt.Printf("Parse error: %v\n", err)
return return
} }
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name) fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
} }

View File

@ -10,9 +10,6 @@ type GGUFFileHeader struct {
// ModelName is the name of the model (from general.name or filename) // ModelName is the name of the model (from general.name or filename)
ModelName string `json:"modelName" cyclonedx:"modelName"` ModelName string `json:"modelName" cyclonedx:"modelName"`
// ModelVersion is the version of the model (if available in header, else "unknown")
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver) // FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"` FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`