From b80592f73563d57720a8dc234125f11dc9743fd3 Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Thu, 13 Nov 2025 00:32:08 -0500 Subject: [PATCH] chore: pr comments Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- syft/pkg/cataloger/ai/cataloger_test.go | 44 ++++++++++++++----- syft/pkg/cataloger/ai/package.go | 41 ----------------- syft/pkg/cataloger/ai/package_test.go | 18 ++++---- syft/pkg/cataloger/ai/parse_gguf_model.go | 14 +++--- .../glob-paths/models/model.gguf | 0 syft/pkg/cataloger/ai/test_builder_test.go | 12 ++--- 6 files changed, 55 insertions(+), 74 deletions(-) create mode 100644 syft/pkg/cataloger/ai/test-fixtures/glob-paths/models/model.gguf diff --git a/syft/pkg/cataloger/ai/cataloger_test.go b/syft/pkg/cataloger/ai/cataloger_test.go index c89203878..80b270fa9 100644 --- a/syft/pkg/cataloger/ai/cataloger_test.go +++ b/syft/pkg/cataloger/ai/cataloger_test.go @@ -14,6 +14,28 @@ import ( ) func TestGGUFCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain gguf files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "models/model.gguf", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewGGUFCataloger()) + }) + } } func TestGGUFCataloger_Integration(t *testing.T) { @@ -50,15 +72,15 @@ func TestGGUFCataloger_Integration(t *testing.T) { pkg.NewLicenseFromFields("Apache-2.0", "", nil), ), Metadata: pkg.GGUFFileHeader{ - ModelName: "llama3-8b", - ModelVersion: "3.0", - License: "Apache-2.0", - Architecture: "llama", - Quantization: "Unknown", - Parameters: 0, - GGUFVersion: 3, - TensorCount: 0, - Header: map[string]interface{}{}, + ModelName: "llama3-8b", + ModelVersion: "3.0", + License: "Apache-2.0", + Architecture: "llama", + Quantization: "Unknown", + Parameters: 0, + GGUFVersion: 3, + TensorCount: 0, + Header: map[string]interface{}{}, }, }, }, @@ -77,8 +99,8 @@ func TestGGUFCataloger_Integration(t *testing.T) { IgnoreLocationLayer(). IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger WithCompareOptions( - // Ignore Hash as it's computed dynamically - cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"), + // Ignore MetadataHash as it's computed dynamically + cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"), ) tester.TestCataloger(t, NewGGUFCataloger()) diff --git a/syft/pkg/cataloger/ai/package.go b/syft/pkg/cataloger/ai/package.go index dfc93adb8..0200df08a 100644 --- a/syft/pkg/cataloger/ai/package.go +++ b/syft/pkg/cataloger/ai/package.go @@ -1,22 +1,11 @@ package ai import ( - "encoding/json" - "fmt" - - "github.com/cespare/xxhash/v2" - - "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" ) func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package { - // Compute hash if not already set - if metadata.Hash == "" { - metadata.Hash = computeMetadataHash(metadata) - } - p := pkg.Package{ Name: metadata.ModelName, Version: metadata.ModelVersion, @@ -37,33 +26,3 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pk return p } - -// computeMetadataHash computes a stable hash of the metadata for use as a global identifier -func computeMetadataHash(metadata *pkg.GGUFFileHeader) string { - // Create a stable representation of the metadata - hashData := struct { - Format string - Name string - Version string - Architecture string - GGUFVersion uint32 - TensorCount uint64 - }{ - Name: metadata.ModelName, - Version: metadata.ModelVersion, - Architecture: metadata.Architecture, - GGUFVersion: metadata.GGUFVersion, - TensorCount: metadata.TensorCount, - } - - // Marshal to JSON for stable hashing - jsonBytes, err := json.Marshal(hashData) - if err != nil { - log.Debugf("failed to marshal metadata for hashing: %v", err) - return "" - } - - // Compute xxhash - hash := xxhash.Sum64(jsonBytes) - return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits) -} diff --git a/syft/pkg/cataloger/ai/package_test.go b/syft/pkg/cataloger/ai/package_test.go index 6633bcbe2..b5918431e 100644 --- a/syft/pkg/cataloger/ai/package_test.go +++ b/syft/pkg/cataloger/ai/package_test.go @@ -21,15 +21,15 @@ func TestNewGGUFPackage(t *testing.T) { { name: "complete GGUF package with all fields", metadata: &pkg.GGUFFileHeader{ - ModelName: "llama3-8b-instruct", - ModelVersion: "3.0", - License: "Apache-2.0", - Architecture: "llama", - Quantization: "Q4_K_M", - Parameters: 8030000000, - GGUFVersion: 3, - TensorCount: 291, - Header: map[string]any{}, + ModelName: "llama3-8b-instruct", + ModelVersion: "3.0", + License: "Apache-2.0", + Architecture: "llama", + Quantization: "Q4_K_M", + Parameters: 8030000000, + GGUFVersion: 3, + TensorCount: 291, + Header: map[string]any{}, }, locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")}, checkFunc: func(t *testing.T, p pkg.Package) { diff --git a/syft/pkg/cataloger/ai/parse_gguf_model.go b/syft/pkg/cataloger/ai/parse_gguf_model.go index fc22a5cc3..3d449e76d 100644 --- a/syft/pkg/cataloger/ai/parse_gguf_model.go +++ b/syft/pkg/cataloger/ai/parse_gguf_model.go @@ -64,13 +64,13 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, // Convert to syft metadata structure syftMetadata := &pkg.GGUFFileHeader{ - ModelName: metadata.Name, - ModelVersion: extractVersion(ggufFile.Header.MetadataKV), - License: metadata.License, - Architecture: metadata.Architecture, - Quantization: metadata.FileTypeDescriptor, - Parameters: uint64(metadata.Parameters), - GGUFVersion: uint32(ggufFile.Header.Version), + ModelName: metadata.Name, + ModelVersion: extractVersion(ggufFile.Header.MetadataKV), + License: metadata.License, + Architecture: metadata.Architecture, + Quantization: metadata.FileTypeDescriptor, + Parameters: uint64(metadata.Parameters), + GGUFVersion: uint32(ggufFile.Header.Version), TensorCount: ggufFile.Header.TensorCount, Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV), MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV), diff --git a/syft/pkg/cataloger/ai/test-fixtures/glob-paths/models/model.gguf b/syft/pkg/cataloger/ai/test-fixtures/glob-paths/models/model.gguf new file mode 100644 index 000000000..e69de29bb diff --git a/syft/pkg/cataloger/ai/test_builder_test.go b/syft/pkg/cataloger/ai/test_builder_test.go index 62c9cfe7d..304f6acf3 100644 --- a/syft/pkg/cataloger/ai/test_builder_test.go +++ b/syft/pkg/cataloger/ai/test_builder_test.go @@ -3,7 +3,7 @@ package ai import ( "fmt" "os" - + gguf_parser "github.com/gpustack/gguf-parser-go" ) @@ -14,21 +14,21 @@ func main() { withStringKV("general.architecture", "llama"). withStringKV("general.name", "test-model"). build() - + // Write to temp file tempFile, err := os.CreateTemp("", "test-*.gguf") if err != nil { panic(err) } defer os.Remove(tempFile.Name()) - + if _, err := tempFile.Write(data); err != nil { panic(err) } tempFile.Close() - + fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name()) - + // Try to parse it fmt.Println("Attempting to parse...") gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata()) @@ -36,6 +36,6 @@ func main() { fmt.Printf("Parse error: %v\n", err) return } - + fmt.Printf("Success! Model: %s\n", gf.Metadata().Name) }