chore: refactor to use gguf-parser-go; 50mb limit

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2025-10-22 02:37:01 -04:00
parent f664f9eaf2
commit c689dcfeef
No known key found for this signature in database
15 changed files with 338 additions and 1620 deletions

8
go.mod
View File

@ -286,6 +286,8 @@ require (
modernc.org/memory v1.11.0 // indirect modernc.org/memory v1.11.0 // indirect
) )
require github.com/gpustack/gguf-parser-go v0.22.1
require ( require (
cyphar.com/go-pathrs v0.2.1 // indirect cyphar.com/go-pathrs v0.2.1 // indirect
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
@ -310,7 +312,13 @@ require (
github.com/clipperhouse/stringish v0.1.1 // indirect github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
github.com/henvic/httpretty v0.1.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
gonum.org/v1/gonum v0.15.1 // indirect
) )
retract ( retract (

11
go.sum
View File

@ -549,6 +549,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg= github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA= github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs= github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
@ -598,6 +600,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA= github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
@ -625,6 +629,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@ -730,9 +735,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
@ -860,6 +867,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik= github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU= github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
@ -1313,6 +1322,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=

View File

@ -3,7 +3,7 @@ package task
import ( import (
"github.com/anchore/syft/syft/cataloging/pkgcataloging" "github.com/anchore/syft/syft/cataloging/pkgcataloging"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/aiartifact" "github.com/anchore/syft/syft/pkg/cataloger/ai"
"github.com/anchore/syft/syft/pkg/cataloger/alpine" "github.com/anchore/syft/syft/pkg/cataloger/alpine"
"github.com/anchore/syft/syft/pkg/cataloger/arch" "github.com/anchore/syft/syft/pkg/cataloger/arch"
"github.com/anchore/syft/syft/pkg/cataloger/binary" "github.com/anchore/syft/syft/pkg/cataloger/binary"
@ -179,7 +179,7 @@ func DefaultPackageTaskFactories() Factories {
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"), newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"), newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"), newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
newSimplePackageTaskFactory(aiartifact.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"), newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
// deprecated catalogers //////////////////////////////////////// // deprecated catalogers ////////////////////////////////////////
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible) // these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)

View File

@ -1,8 +1,8 @@
/* /*
Package aiartifact provides concrete Cataloger implementations for AI artifacts and machine learning models, Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
including support for GGUF (GPT-Generated Unified Format) model files. including support for GGUF (GPT-Generated Unified Format) model files.
*/ */
package aiartifact package ai
import ( import (
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -11,6 +11,6 @@ import (
// NewGGUFCataloger returns a new cataloger instance for GGUF model files. // NewGGUFCataloger returns a new cataloger instance for GGUF model files.
func NewGGUFCataloger() pkg.Cataloger { func NewGGUFCataloger() pkg.Cataloger {
return generic.NewCataloger("model-gguf-cataloger"). return generic.NewCataloger("gguf-cataloger").
WithParserByGlobs(parseGGUFModel, "**/*.gguf") WithParserByGlobs(parseGGUFModel, "**/*.gguf")
} }

View File

@ -1,4 +1,4 @@
package aiartifact package ai
import ( import (
"os" "os"
@ -171,7 +171,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
ModelName: "model1", ModelName: "model1",
ModelVersion: "1.0", ModelVersion: "1.0",
Architecture: "llama", Architecture: "llama",
Quantization: unkownGGUFData, Quantization: unknownGGUFData,
GGUFVersion: 3, GGUFVersion: 3,
TensorCount: 100, TensorCount: 100,
Header: map[string]interface{}{}, Header: map[string]interface{}{},
@ -187,7 +187,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
ModelName: "model2", ModelName: "model2",
ModelVersion: "2.0", ModelVersion: "2.0",
Architecture: "mistral", Architecture: "mistral",
Quantization: unkownGGUFData, Quantization: unknownGGUFData,
GGUFVersion: 3, GGUFVersion: 3,
TensorCount: 200, TensorCount: 200,
Header: map[string]interface{}{}, Header: map[string]interface{}{},
@ -217,14 +217,14 @@ func TestGGUFCataloger_Integration(t *testing.T) {
expectedPackages: []pkg.Package{ expectedPackages: []pkg.Package{
{ {
Name: "qwen-nested", Name: "qwen-nested",
Version: unkownGGUFData, Version: unknownGGUFData,
Type: pkg.ModelPkg, Type: pkg.ModelPkg,
Metadata: pkg.GGUFFileMetadata{ Metadata: pkg.GGUFFileMetadata{
ModelFormat: "gguf", ModelFormat: "gguf",
ModelName: "qwen-nested", ModelName: "qwen-nested",
ModelVersion: unkownGGUFData, ModelVersion: unknownGGUFData,
Architecture: "qwen", Architecture: "qwen",
Quantization: unkownGGUFData, Quantization: unknownGGUFData,
GGUFVersion: 3, GGUFVersion: 3,
TensorCount: 150, TensorCount: 150,
Header: map[string]interface{}{}, Header: map[string]interface{}{},

View File

@ -0,0 +1,69 @@
package ai
import (
"crypto/sha256"
"encoding/json"
"fmt"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location) pkg.Package {
// Compute hash if not already set
if metadata.Hash == "" {
metadata.Hash = computeMetadataHash(metadata)
}
p := pkg.Package{
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Metadata: *metadata,
// NOTE: PURL is intentionally not set as the package-url spec
// has not yet finalized support for ML model packages
}
// Add license to the package if present in metadata
if metadata.License != "" {
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
}
p.SetID()
return p
}
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
func computeMetadataHash(metadata *pkg.GGUFFileMetadata) string {
// Create a stable representation of the metadata
hashData := struct {
Format string
Name string
Version string
Architecture string
GGUFVersion uint32
TensorCount uint64
}{
Format: metadata.ModelFormat,
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Architecture: metadata.Architecture,
GGUFVersion: metadata.GGUFVersion,
TensorCount: metadata.TensorCount,
}
// Marshal to JSON for stable hashing
jsonBytes, err := json.Marshal(hashData)
if err != nil {
log.Warnf("failed to marshal metadata for hashing: %v", err)
return ""
}
// Compute SHA256 hash
hash := sha256.Sum256(jsonBytes)
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
}

View File

@ -1,4 +1,4 @@
package aiartifact package ai
import ( import (
"testing" "testing"

View File

@ -0,0 +1,92 @@
package ai
import (
"encoding/binary"
"fmt"
"io"
gguf_parser "github.com/gpustack/gguf-parser-go"
)
// GGUF file format constants
const (
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
)
// ggufHeaderReader reads just the header portion of a GGUF file efficiently
type ggufHeaderReader struct {
reader io.Reader
}
// readHeader reads only the GGUF header (metadata) without reading tensor data
// This is much more efficient than reading the entire file
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
// Read initial chunk to determine header size
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
}
// Verify magic number
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
if magic != ggufMagicNumber {
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
}
// We need to read the metadata KV pairs to know the full header size
// For efficiency, we'll read incrementally up to maxHeaderSize
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
headerData = append(headerData, initialBuf...)
// Read the rest of the header in larger chunks for efficiency
buf := make([]byte, 64*1024) // 64KB chunks
for len(headerData) < maxHeaderSize {
n, err := r.reader.Read(buf)
if n > 0 {
headerData = append(headerData, buf[:n]...)
}
if err == io.EOF {
// Reached end of file, we have all the data
break
}
if err != nil {
return nil, fmt.Errorf("failed to read GGUF header: %w", err)
}
}
if len(headerData) > maxHeaderSize {
// Truncate if we somehow read too much
headerData = headerData[:maxHeaderSize]
}
return headerData, nil
}
// Helper to convert gguf_parser metadata to simpler types
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
result := make(map[string]interface{})
// Limit KV pairs to avoid bloat
const maxKVPairs = 200
count := 0
for _, kv := range kvs {
if count >= maxKVPairs {
break
}
// Skip standard fields that are extracted separately
switch kv.Key {
case "general.architecture", "general.name", "general.license",
"general.version", "general.parameter_count", "general.quantization":
continue
}
result[kv.Key] = kv.Value
count++
}
return result
}

View File

@ -0,0 +1,127 @@
package ai
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
gguf_parser "github.com/gpustack/gguf-parser-go"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
const unknownGGUFData = "unknown"
// parseGGUFModel parses a GGUF model file and returns the discovered package.
// This implementation only reads the header portion of the file, not the entire model.
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
defer internal.CloseAndLogError(reader, reader.Path())
// Read only the header portion (not the entire file)
headerReader := &ggufHeaderReader{reader: reader}
headerData, err := headerReader.readHeader()
if err != nil {
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
}
// Create a temporary file with just the header for the library to parse
// The library requires a file path, so we create a minimal temp file
tempFile, err := os.CreateTemp("", "syft-gguf-header-*.gguf")
if err != nil {
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
}
tempPath := tempFile.Name()
defer os.Remove(tempPath)
// Write header data to temp file
if _, err := tempFile.Write(headerData); err != nil {
tempFile.Close()
return nil, nil, fmt.Errorf("failed to write header to temp file: %w", err)
}
tempFile.Close()
// Parse using gguf-parser-go with options to skip unnecessary data
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
gguf_parser.SkipLargeMetadata(),
)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
}
// Extract metadata
metadata := ggufFile.Metadata()
// Convert to syft metadata structure
syftMetadata := &pkg.GGUFFileMetadata{
ModelFormat: "gguf",
ModelName: metadata.Name,
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
License: metadata.License,
Architecture: metadata.Architecture,
Quantization: metadata.FileTypeDescriptor,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount,
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
TruncatedHeader: false, // We read the full header
Hash: "", // Will be computed in newGGUFPackage
}
// If model name is not in metadata, use filename
if syftMetadata.ModelName == "" {
syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
}
// If version is still unknown, try to infer from name
if syftMetadata.ModelVersion == unknownGGUFData {
syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
}
// Create package from metadata
p := newGGUFPackage(
syftMetadata,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
}
// extractVersion attempts to extract version from metadata KV pairs
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
for _, kv := range kvs {
if kv.Key == "general.version" {
if v, ok := kv.Value.(string); ok && v != "" {
return v
}
}
}
return unknownGGUFData
}
// extractVersionFromName tries to extract version from model name
func extractVersionFromName(_ string) string {
// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
// For now, return unknown - this could be enhanced with regex
return unknownGGUFData
}
// extractModelNameFromPath extracts the model name from the file path
func extractModelNameFromPath(path string) string {
// Get the base filename
base := filepath.Base(path)
// Remove .gguf extension
name := strings.TrimSuffix(base, ".gguf")
return name
}
// integrity check
var _ generic.Parser = parseGGUFModel

View File

@ -1,10 +1,28 @@
package aiartifact package ai
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
) )
// GGUF type constants for test builder
const (
ggufMagic = 0x46554747 // "GGUF" in little-endian
ggufTypeUint8 = 0
ggufTypeInt8 = 1
ggufTypeUint16 = 2
ggufTypeInt16 = 3
ggufTypeUint32 = 4
ggufTypeInt32 = 5
ggufTypeFloat32 = 6
ggufTypeUint64 = 7
ggufTypeInt64 = 8
ggufTypeFloat64 = 9
ggufTypeBool = 10
ggufTypeString = 11
ggufTypeArray = 12
)
// testGGUFBuilder helps build GGUF files for testing // testGGUFBuilder helps build GGUF files for testing
type testGGUFBuilder struct { type testGGUFBuilder struct {
buf *bytes.Buffer buf *bytes.Buffer

View File

@ -1,28 +0,0 @@
package aiartifact
import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location) pkg.Package {
p := pkg.Package{
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Metadata: *metadata,
// NOTE: PURL is intentionally not set as the package-url spec
// has not yet finalized support for ML model packages
}
// Add license to the package if present in metadata
if metadata.License != "" {
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
}
p.SetID()
return p
}

View File

@ -1,345 +0,0 @@
package aiartifact
import (
"bytes"
"crypto/sha256"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"path/filepath"
"regexp"
"strings"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
)
// GGUF file format constants
const (
ggufMagic = 0x46554747 // "GGUF" in little-endian
maxKVPairs = 10000 // Safety limit for KV pairs
maxKeyLen = 65535 // Maximum key length
maxTensors = 100000 // Safety limit for tensors
maxHeaderKV = 200 // Maximum KV pairs to include in Header map (to avoid bloat)
)
// GGUF value types (from GGUF spec)
const (
ggufTypeUint8 = 0
ggufTypeInt8 = 1
ggufTypeUint16 = 2
ggufTypeInt16 = 3
ggufTypeUint32 = 4
ggufTypeInt32 = 5
ggufTypeFloat32 = 6
ggufTypeUint64 = 7
ggufTypeInt64 = 8
ggufTypeFloat64 = 9
ggufTypeBool = 10
ggufTypeString = 11
ggufTypeArray = 12
)
const unknownGGUFData = "unknown"
//nolint:funlen
func parseGGUFHeader(data []byte, location string) (*pkg.GGUFFileMetadata, error) {
reader := bytes.NewReader(data)
// Read magic number
var magic uint32
if err := binary.Read(reader, binary.LittleEndian, &magic); err != nil {
return nil, fmt.Errorf("failed to read magic number: %w", err)
}
if magic != ggufMagic {
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
}
// Read version
var version uint32
if err := binary.Read(reader, binary.LittleEndian, &version); err != nil {
return nil, fmt.Errorf("failed to read version: %w", err)
}
// Read tensor count
var tensorCount uint64
if err := binary.Read(reader, binary.LittleEndian, &tensorCount); err != nil {
return nil, fmt.Errorf("failed to read tensor count: %w", err)
}
if tensorCount > maxTensors {
log.Warnf("GGUF file has suspicious tensor count: %d (max: %d)", tensorCount, maxTensors)
tensorCount = maxTensors
}
// Read metadata KV count
var kvCount uint64
if err := binary.Read(reader, binary.LittleEndian, &kvCount); err != nil {
return nil, fmt.Errorf("failed to read KV count: %w", err)
}
if kvCount > maxKVPairs {
log.Warnf("GGUF file has suspicious KV count: %d (max: %d)", kvCount, maxKVPairs)
return nil, fmt.Errorf("KV count exceeds safety limit: %d", kvCount)
}
// Parse metadata key-value pairs
kvMap := make(map[string]any)
truncated := false
for i := uint64(0); i < kvCount; i++ {
key, value, err := readKVPair(reader)
if err != nil {
log.Warnf("failed to read KV pair %d: %v", i, err)
truncated = true
break
}
if len(kvMap) < maxHeaderKV {
kvMap[key] = value
} else {
truncated = true
}
}
// Extract common metadata fields
metadata := &pkg.GGUFFileMetadata{
ModelFormat: "gguf",
GGUFVersion: version,
TensorCount: tensorCount,
Header: kvMap,
TruncatedHeader: truncated,
}
// Extract known fields from KV map and remove them to avoid duplication in Header
if arch, ok := kvMap["general.architecture"].(string); ok {
metadata.Architecture = arch
delete(kvMap, "general.architecture")
}
if name, ok := kvMap["general.name"].(string); ok {
metadata.ModelName = name
delete(kvMap, "general.name")
} else {
// Fall back to filename if general.name not present
filename := filepath.Base(location)
metadata.ModelName = strings.TrimSuffix(filename, filepath.Ext(filename))
}
if license, ok := kvMap["general.license"].(string); ok {
metadata.License = license
delete(kvMap, "general.license")
}
if version, ok := kvMap["general.version"].(string); ok {
metadata.ModelVersion = version
delete(kvMap, "general.version")
} else {
metadata.ModelVersion = unknownGGUFData
}
// Extract parameters count if present
if params, ok := kvMap["general.parameter_count"].(uint64); ok {
metadata.Parameters = params
delete(kvMap, "general.parameter_count")
}
// Try to infer quantization from general.quantization or from filename
if quant, ok := kvMap["general.quantization"].(string); ok {
metadata.Quantization = quant
delete(kvMap, "general.quantization")
} else if quantizedBy, ok := kvMap["general.quantized_by"].(string); ok && quantizedBy != "" {
// If quantized but no explicit quantization field, try to extract from filename
metadata.Quantization = inferQuantizationFromFilename(location)
// Note: we keep general.quantized_by in Header since it's not directly mapped to a field
} else {
metadata.Quantization = unknownGGUFData
}
// Compute hash of metadata for stable identifier
metadata.Hash = computeMetadataHash(metadata)
return metadata, nil
}
// readKVPair reads a single key-value pair from the GGUF header
func readKVPair(reader io.Reader) (string, interface{}, error) {
// Read key length
var keyLen uint64
if err := binary.Read(reader, binary.LittleEndian, &keyLen); err != nil {
return "", nil, fmt.Errorf("failed to read key length: %w", err)
}
if keyLen > maxKeyLen {
return "", nil, fmt.Errorf("key length exceeds maximum: %d", keyLen)
}
// Read key
keyBytes := make([]byte, keyLen)
if _, err := io.ReadFull(reader, keyBytes); err != nil {
return "", nil, fmt.Errorf("failed to read key: %w", err)
}
key := string(keyBytes)
// Read value type
var valueType uint32
if err := binary.Read(reader, binary.LittleEndian, &valueType); err != nil {
return "", nil, fmt.Errorf("failed to read value type: %w", err)
}
// Read value based on type
value, err := readValue(reader, valueType)
if err != nil {
return "", nil, fmt.Errorf("failed to read value for key %s: %w", key, err)
}
return key, value, nil
}
//nolint:funlen
func readValue(reader io.Reader, valueType uint32) (any, error) {
switch valueType {
case ggufTypeUint8:
var v uint8
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeInt8:
var v int8
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeUint16:
var v uint16
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeInt16:
var v int16
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeUint32:
var v uint32
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeInt32:
var v int32
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeFloat32:
var v float32
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeUint64:
var v uint64
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeInt64:
var v int64
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeFloat64:
var v float64
err := binary.Read(reader, binary.LittleEndian, &v)
return v, err
case ggufTypeBool:
var v uint8
err := binary.Read(reader, binary.LittleEndian, &v)
return v != 0, err
case ggufTypeString:
return readString(reader)
case ggufTypeArray:
return readArray(reader)
default:
return nil, fmt.Errorf("unknown value type: %d", valueType)
}
}
// readString reads a length-prefixed UTF-8 string
func readString(reader io.Reader) (string, error) {
var length uint64
if err := binary.Read(reader, binary.LittleEndian, &length); err != nil {
return "", fmt.Errorf("failed to read string length: %w", err)
}
if length > maxKeyLen {
return "", fmt.Errorf("string length exceeds maximum: %d", length)
}
strBytes := make([]byte, length)
if _, err := io.ReadFull(reader, strBytes); err != nil {
return "", fmt.Errorf("failed to read string: %w", err)
}
return string(strBytes), nil
}
// readArray reads an array value
func readArray(reader io.Reader) (interface{}, error) {
// Read array element type
var elemType uint32
if err := binary.Read(reader, binary.LittleEndian, &elemType); err != nil {
return nil, fmt.Errorf("failed to read array element type: %w", err)
}
// Read array length
var length uint64
if err := binary.Read(reader, binary.LittleEndian, &length); err != nil {
return nil, fmt.Errorf("failed to read array length: %w", err)
}
if length > 1000 {
// Limit array size to avoid memory issues
return nil, fmt.Errorf("array length too large: %d", length)
}
// Read array elements
var elements []interface{}
for i := uint64(0); i < length; i++ {
value, err := readValue(reader, elemType)
if err != nil {
return nil, fmt.Errorf("failed to read array element %d: %w", i, err)
}
elements = append(elements, value)
}
return elements, nil
}
// inferQuantizationFromFilename attempts to extract quantization info from filename
func inferQuantizationFromFilename(filename string) string {
// Common quantization patterns: Q4_K_M, IQ4_NL, Q5_K_S, etc.
quantPattern := regexp.MustCompile(`[IQ]\d+_[A-Z_]+`)
if match := quantPattern.FindString(filename); match != "" {
return match
}
return unknownGGUFData
}
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
func computeMetadataHash(metadata *pkg.GGUFFileMetadata) string {
// Create a stable representation of the metadata
hashData := struct {
Format string
Name string
Version string
Architecture string
GGUFVersion uint32
TensorCount uint64
}{
Format: metadata.ModelFormat,
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Architecture: metadata.Architecture,
GGUFVersion: metadata.GGUFVersion,
TensorCount: metadata.TensorCount,
}
// Marshal to JSON for stable hashing
jsonBytes, err := json.Marshal(hashData)
if err != nil {
log.Warnf("failed to marshal metadata for hashing: %v", err)
return ""
}
// Compute SHA256 hash
hash := sha256.Sum256(jsonBytes)
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
}

View File

@ -1,68 +0,0 @@
package aiartifact
import (
"context"
"fmt"
"io"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
// parseGGUFModel parses a GGUF model file and returns the discovered package.
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
defer internal.CloseAndLogError(reader, reader.Path())
// Read header (we'll read a reasonable amount to parse the header without reading entire file)
// GGUF headers are typically < 1MB, but we'll use a 10MB limit to be safe
const maxHeaderSize = 10 * 1024 * 1024
limitedReader := io.LimitReader(reader, maxHeaderSize)
// We need to buffer the data because we need to check magic and parse
headerData := make([]byte, 0, 8192) // Start with 8KB buffer
buf := make([]byte, 8192)
for {
n, err := limitedReader.Read(buf)
if n > 0 {
headerData = append(headerData, buf[:n]...)
}
if err == io.EOF {
break
}
if err != nil {
return nil, nil, fmt.Errorf("error reading file: %w", err)
}
// Stop if we've read enough for a reasonable header
if len(headerData) > maxHeaderSize {
log.Warnf("GGUF header at %s exceeds max size, truncating", reader.Path())
break
}
}
// Check if this is actually a GGUF file
if len(headerData) < 4 {
return nil, nil, fmt.Errorf("file too small to be a valid GGUF file")
}
// Parse the GGUF header
metadata, err := parseGGUFHeader(headerData, reader.Path())
if err != nil {
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
}
// Create package from metadata
p := newGGUFPackage(
metadata,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
}
// integrity check
var _ generic.Parser = parseGGUFModel

File diff suppressed because it is too large Load Diff

View File

@ -266,8 +266,6 @@ func TypeByName(name string) Type {
return WordpressPluginPkg return WordpressPluginPkg
case "homebrew": case "homebrew":
return HomebrewPkg return HomebrewPkg
case "model":
return ModelPkg
default: default:
return UnknownPkg return UnknownPkg
} }