Compare commits

...

13 Commits

Author SHA1 Message Date
Christopher Phillips
9c5279cb99
chore: pr feedback
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:33:33 -04:00
Christopher Phillips
f7a19db98b
chore: warn -> debug
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:24:47 -04:00
Christopher Phillips
13756ec768
fix: role schema forward
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:22:58 -04:00
Christopher Phillips
3326ae44fa
Merge branch 'main' into 4184-gguf-parser
* main:
  chore(deps): update tools to latest versions (#4302)
  chore(deps): bump github.com/github/go-spdx/v2 from 2.3.3 to 2.3.4 (#4301)
  chore(deps): bump github/codeql-action from 4.30.8 to 4.30.9 (#4299)
  support universal (fat) mach-o binary files (#4278)
  chore(deps): bump sigstore/cosign-installer from 3.10.0 to 4.0.0 (#4296)
  chore(deps): bump anchore/sbom-action from 0.20.7 to 0.20.8 (#4297)
  convert posix path back to windows (#4285)
  Remove duplicate image source providers (#4289)
  chore(deps): bump anchore/sbom-action from 0.20.6 to 0.20.7 (#4293)
  feat: add option to fetch remote licenses for pnpm-lock.yaml files (#4286)
  Add PDM parser (#4234)
  chore(deps): update tools to latest versions (#4291)
  fix: panic during java archive maven resolution (#4290)
  Extract zip archive with multiple entries (#4283)
  chore: update to use old configuration on new cosign (#4287)
  chore(deps): update anchore dependencies (#4282)
  chore(deps): bump github.com/mholt/archives from 0.1.3 to 0.1.5 (#4280)
  add docs to configs (#4281)
2025-10-22 13:21:59 -04:00
Christopher Phillips
a08d5b78d9
fix: update gguf data to be GGUFFileHeader
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:20:23 -04:00
Christopher Phillips
ce74ed0309
chore: refactor to use gguf-parser-go; 50mb limit
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 02:37:01 -04:00
Christopher Phillips
0ff6a1af58
fix: first pass pr fixes
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-17 10:46:19 -04:00
Christopher Phillips
cd4d0ce062
test: fix local flake
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 05:45:11 -04:00
Christopher Phillips
a721a854a9
tests: account for epoch in dedupe test
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 05:27:31 -04:00
Christopher Phillips
c715e01cc2
chore: schema and test additions
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 05:02:15 -04:00
Christopher Phillips
165611d2e4
test: migrate gguf tests over
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 04:56:07 -04:00
Christopher Phillips
746f00ad68
chore: lint-fix
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 03:59:42 -04:00
Christopher Phillips
3f117a3eb5
feat: migrate gguf parser to separate PR from oci
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 02:46:34 -04:00
25 changed files with 5178 additions and 4 deletions

2
.gitignore vendored
View File

@ -73,3 +73,5 @@ cosign.pub
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
*$py.class *$py.class

View File

@ -88,6 +88,7 @@ func TestPkgCoverageImage(t *testing.T) {
definedPkgs.Remove(string(pkg.TerraformPkg)) definedPkgs.Remove(string(pkg.TerraformPkg))
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
definedPkgs.Remove(string(pkg.CondaPkg)) definedPkgs.Remove(string(pkg.CondaPkg))
definedPkgs.Remove(string(pkg.ModelPkg))
var cases []testCase var cases []testCase
cases = append(cases, commonTestCases...) cases = append(cases, commonTestCases...)
@ -162,6 +163,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
definedPkgs.Remove(string(pkg.UnknownPkg)) definedPkgs.Remove(string(pkg.UnknownPkg))
definedPkgs.Remove(string(pkg.CondaPkg)) definedPkgs.Remove(string(pkg.CondaPkg))
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
definedPkgs.Remove(string(pkg.ModelPkg))
// for directory scans we should not expect to see any of the following package types // for directory scans we should not expect to see any of the following package types
definedPkgs.Remove(string(pkg.KbPkg)) definedPkgs.Remove(string(pkg.KbPkg))

8
go.mod
View File

@ -286,6 +286,8 @@ require (
modernc.org/memory v1.11.0 // indirect modernc.org/memory v1.11.0 // indirect
) )
require github.com/gpustack/gguf-parser-go v0.22.1
require ( require (
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect
@ -306,6 +308,12 @@ require (
github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
github.com/aws/smithy-go v1.22.4 // indirect github.com/aws/smithy-go v1.22.4 // indirect
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
github.com/henvic/httpretty v0.1.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
gonum.org/v1/gonum v0.15.1 // indirect
) )
retract ( retract (

11
go.sum
View File

@ -541,6 +541,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg= github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA= github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs= github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
@ -590,6 +592,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA= github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
@ -617,6 +621,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@ -722,9 +727,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
@ -851,6 +858,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik= github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU= github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
@ -1304,6 +1313,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=

View File

@ -3,5 +3,5 @@ package internal
const ( const (
// JSONSchemaVersion is the current schema version output by the JSON encoder // JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.41" JSONSchemaVersion = "16.0.42"
) )

View File

@ -27,6 +27,7 @@ func AllTypes() []any {
pkg.ELFBinaryPackageNoteJSONPayload{}, pkg.ELFBinaryPackageNoteJSONPayload{},
pkg.ElixirMixLockEntry{}, pkg.ElixirMixLockEntry{},
pkg.ErlangRebarLockEntry{}, pkg.ErlangRebarLockEntry{},
pkg.GGUFFileHeader{},
pkg.GitHubActionsUseStatement{}, pkg.GitHubActionsUseStatement{},
pkg.GolangBinaryBuildinfoEntry{}, pkg.GolangBinaryBuildinfoEntry{},
pkg.GolangModuleEntry{}, pkg.GolangModuleEntry{},

View File

@ -123,6 +123,7 @@ var jsonTypes = makeJSONTypes(
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"), jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"), jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"), jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
) )
func expandLegacyNameVariants(names ...string) []string { func expandLegacyNameVariants(names ...string) []string {

View File

@ -3,6 +3,7 @@ package task
import ( import (
"github.com/anchore/syft/syft/cataloging/pkgcataloging" "github.com/anchore/syft/syft/cataloging/pkgcataloging"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/ai"
"github.com/anchore/syft/syft/pkg/cataloger/alpine" "github.com/anchore/syft/syft/pkg/cataloger/alpine"
"github.com/anchore/syft/syft/pkg/cataloger/arch" "github.com/anchore/syft/syft/pkg/cataloger/arch"
"github.com/anchore/syft/syft/pkg/cataloger/binary" "github.com/anchore/syft/syft/pkg/cataloger/binary"
@ -175,6 +176,7 @@ func DefaultPackageTaskFactories() Factories {
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"), newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"), newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"), newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
// deprecated catalogers //////////////////////////////////////// // deprecated catalogers ////////////////////////////////////////
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible) // these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"$schema": "https://json-schema.org/draft/2020-12/schema", "$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "anchore.io/schema/syft/json/16.0.41/document", "$id": "anchore.io/schema/syft/json/16.0.42/document",
"$ref": "#/$defs/Document", "$ref": "#/$defs/Document",
"$defs": { "$defs": {
"AlpmDbEntry": { "AlpmDbEntry": {
@ -1399,6 +1399,70 @@
"size" "size"
] ]
}, },
"GgufFileMetadata": {
"properties": {
"modelFormat": {
"type": "string",
"description": "ModelFormat is always \"gguf\""
},
"modelName": {
"type": "string",
"description": "ModelName is the name of the model (from general.name or filename)"
},
"modelVersion": {
"type": "string",
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
},
"fileSize": {
"type": "integer",
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
},
"hash": {
"type": "string",
"description": "Hash is a content hash of the metadata (for stable global identifiers across remotes)"
},
"license": {
"type": "string",
"description": "License is the license identifier (from general.license if present)"
},
"ggufVersion": {
"type": "integer",
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
},
"architecture": {
"type": "string",
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
},
"quantization": {
"type": "string",
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
},
"parameters": {
"type": "integer",
"description": "Parameters is the number of model parameters (if present in header)"
},
"tensorCount": {
"type": "integer",
"description": "TensorCount is the number of tensors in the model"
},
"header": {
"type": "object",
"description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
},
"truncatedHeader": {
"type": "boolean",
"description": "TruncatedHeader indicates if the header was truncated during parsing (for very large headers)"
}
},
"type": "object",
"required": [
"modelFormat",
"modelName",
"ggufVersion",
"tensorCount"
],
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
},
"GithubActionsUseStatement": { "GithubActionsUseStatement": {
"properties": { "properties": {
"value": { "value": {
@ -2474,6 +2538,9 @@
{ {
"$ref": "#/$defs/ErlangRebarLockEntry" "$ref": "#/$defs/ErlangRebarLockEntry"
}, },
{
"$ref": "#/$defs/GgufFileMetadata"
},
{ {
"$ref": "#/$defs/GithubActionsUseStatement" "$ref": "#/$defs/GithubActionsUseStatement"
}, },

View File

@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
} }
componentType := cyclonedx.ComponentTypeLibrary componentType := cyclonedx.ComponentTypeLibrary
if p.Type == pkg.BinaryPkg { switch p.Type {
case pkg.BinaryPkg:
componentType = cyclonedx.ComponentTypeApplication componentType = cyclonedx.ComponentTypeApplication
case pkg.ModelPkg:
componentType = cyclonedx.ComponentTypeMachineLearningModel
} }
return cyclonedx.Component{ return cyclonedx.Component{

View File

@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
switch component.Type { switch component.Type {
case cyclonedx.ComponentTypeOS: case cyclonedx.ComponentTypeOS:
case cyclonedx.ComponentTypeContainer: case cyclonedx.ComponentTypeContainer:
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary: case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
p := decodeComponent(component) p := decodeComponent(component)
idMap[component.BOMRef] = p idMap[component.BOMRef] = p
if component.BOMRef != "" { if component.BOMRef != "" {

View File

@ -54,6 +54,7 @@ func Test_OriginatorSupplier(t *testing.T) {
pkg.OpamPackage{}, pkg.OpamPackage{},
pkg.YarnLockEntry{}, pkg.YarnLockEntry{},
pkg.TerraformLockProviderEntry{}, pkg.TerraformLockProviderEntry{},
pkg.GGUFFileHeader{},
) )
tests := []struct { tests := []struct {
name string name string

View File

@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
answer = "acquired package info from Homebrew formula" answer = "acquired package info from Homebrew formula"
case pkg.TerraformPkg: case pkg.TerraformPkg:
answer = "acquired package info from Terraform dependency lock file" answer = "acquired package info from Terraform dependency lock file"
case pkg.ModelPkg:
answer = "acquired package info from AI artifact (e.g. GGUF File"
default: default:
answer = "acquired package info from the following paths" answer = "acquired package info from the following paths"
} }

View File

@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
"acquired package info from Terraform dependency lock file", "acquired package info from Terraform dependency lock file",
}, },
}, },
{
input: pkg.Package{
Type: pkg.ModelPkg,
},
expected: []string{
"",
},
},
} }
var pkgTypes []pkg.Type var pkgTypes []pkg.Type
for _, test := range tests { for _, test := range tests {

View File

@ -0,0 +1,16 @@
/*
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
including support for GGUF (GPT-Generated Unified Format) model files.
*/
package ai
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
func NewGGUFCataloger() pkg.Cataloger {
return generic.NewCataloger("gguf-cataloger").
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
}

View File

@ -0,0 +1,381 @@
package ai
import (
"os"
"path/filepath"
"testing"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func TestGGUFCataloger_Globs(t *testing.T) {
tests := []struct {
name string
setup func(t *testing.T) string // returns fixture directory
expected []string
}{
{
name: "finds GGUF files in root",
setup: func(t *testing.T) string {
dir := t.TempDir()
createTestGGUFInDir(t, dir, "model1.gguf")
createTestGGUFInDir(t, dir, "model2.gguf")
return dir
},
expected: []string{
"model1.gguf",
"model2.gguf",
},
},
{
name: "finds GGUF files in subdirectories",
setup: func(t *testing.T) string {
dir := t.TempDir()
modelsDir := filepath.Join(dir, "models")
os.MkdirAll(modelsDir, 0755)
createTestGGUFInDir(t, modelsDir, "llama.gguf")
deepDir := filepath.Join(dir, "deep", "nested", "path")
os.MkdirAll(deepDir, 0755)
createTestGGUFInDir(t, deepDir, "mistral.gguf")
return dir
},
expected: []string{
"models/llama.gguf",
"deep/nested/path/mistral.gguf",
},
},
{
name: "ignores non-GGUF files",
setup: func(t *testing.T) string {
dir := t.TempDir()
createTestGGUFInDir(t, dir, "model.gguf")
// Create non-GGUF files
os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
return dir
},
expected: []string{
"model.gguf",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fixtureDir := tt.setup(t)
pkgtest.NewCatalogTester().
FromDirectory(t, fixtureDir).
ExpectsResolverContentQueries(tt.expected).
TestCataloger(t, NewGGUFCataloger())
})
}
}
func TestGGUFCataloger_Integration(t *testing.T) {
tests := []struct {
name string
setup func(t *testing.T) string
expectedPackages []pkg.Package
expectedRelationships []artifact.Relationship
}{
{
name: "catalog single GGUF file",
setup: func(t *testing.T) string {
dir := t.TempDir()
data := newTestGGUFBuilder().
withVersion(3).
withTensorCount(291).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "llama3-8b").
withStringKV("general.version", "3.0").
withStringKV("general.license", "Apache-2.0").
withStringKV("general.quantization", "Q4_K_M").
withUint64KV("general.parameter_count", 8030000000).
build()
path := filepath.Join(dir, "llama3-8b.gguf")
os.WriteFile(path, data, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "llama3-8b",
Version: "3.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "llama3-8b",
ModelVersion: "3.0",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Q4_K_M",
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
},
expectedRelationships: nil,
},
{
name: "catalog multiple GGUF files",
setup: func(t *testing.T) string {
dir := t.TempDir()
// Create first model
data1 := newTestGGUFBuilder().
withVersion(3).
withTensorCount(100).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "model1").
withStringKV("general.version", "1.0").
build()
os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
// Create second model
data2 := newTestGGUFBuilder().
withVersion(3).
withTensorCount(200).
withStringKV("general.architecture", "mistral").
withStringKV("general.name", "model2").
withStringKV("general.version", "2.0").
build()
os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "model1",
Version: "1.0",
Type: pkg.ModelPkg,
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "model1",
ModelVersion: "1.0",
Architecture: "llama",
Quantization: unknownGGUFData,
GGUFVersion: 3,
TensorCount: 100,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
{
Name: "model2",
Version: "2.0",
Type: pkg.ModelPkg,
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "model2",
ModelVersion: "2.0",
Architecture: "mistral",
Quantization: unknownGGUFData,
GGUFVersion: 3,
TensorCount: 200,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
},
expectedRelationships: nil,
},
{
name: "catalog GGUF in nested directories",
setup: func(t *testing.T) string {
dir := t.TempDir()
nestedDir := filepath.Join(dir, "models", "quantized")
os.MkdirAll(nestedDir, 0755)
data := newTestGGUFBuilder().
withVersion(3).
withTensorCount(150).
withStringKV("general.architecture", "qwen").
withStringKV("general.name", "qwen-nested").
build()
os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "qwen-nested",
Version: unknownGGUFData,
Type: pkg.ModelPkg,
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "qwen-nested",
ModelVersion: unknownGGUFData,
Architecture: "qwen",
Quantization: unknownGGUFData,
GGUFVersion: 3,
TensorCount: 150,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
},
expectedRelationships: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fixtureDir := tt.setup(t)
// Use pkgtest to catalog and compare
tester := pkgtest.NewCatalogTester().
FromDirectory(t, fixtureDir).
Expects(tt.expectedPackages, tt.expectedRelationships).
IgnoreLocationLayer().
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
WithCompareOptions(
// Ignore Hash as it's computed dynamically
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
)
tester.TestCataloger(t, NewGGUFCataloger())
})
}
}
func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
dir := t.TempDir()
// Create a valid GGUF
validData := newTestGGUFBuilder().
withVersion(3).
withTensorCount(100).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "valid-model").
build()
os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
// Create an invalid GGUF (wrong magic)
invalidData := newTestGGUFBuilder().buildInvalidMagic()
os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
// Create a truncated GGUF
os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
// Catalog should succeed and only return the valid package
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
// Should only find the valid model
require.Len(t, pkgs, 1)
assert.Equal(t, "valid-model", pkgs[0].Name)
})
tester.TestCataloger(t, NewGGUFCataloger())
}
func TestGGUFCataloger_Name(t *testing.T) {
cataloger := NewGGUFCataloger()
assert.Equal(t, "gguf-cataloger", cataloger.Name())
}
func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
dir := t.TempDir()
// Create a subdirectory to ensure glob still runs
os.MkdirAll(filepath.Join(dir, "models"), 0755)
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
assert.Empty(t, pkgs)
assert.Empty(t, rels)
})
tester.TestCataloger(t, NewGGUFCataloger())
}
func TestGGUFCataloger_MixedFiles(t *testing.T) {
dir := t.TempDir()
// Create GGUF file
ggufData := newTestGGUFBuilder().
withVersion(3).
withTensorCount(100).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "test-model").
build()
os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
// Create other file types
os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
// Should only find the GGUF model
require.Len(t, pkgs, 1)
assert.Equal(t, "test-model", pkgs[0].Name)
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
})
tester.TestCataloger(t, NewGGUFCataloger())
}
func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
dir := t.TempDir()
// Create lowercase .gguf
data := newTestGGUFBuilder().
withVersion(3).
withTensorCount(100).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "lowercase").
build()
os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
// Create uppercase .GGUF (should not match the glob)
os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
// On case-insensitive filesystems (macOS), both might match
// On case-sensitive filesystems (Linux), only lowercase matches
assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
})
tester.TestCataloger(t, NewGGUFCataloger())
}
// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
func createTestGGUFInDir(t *testing.T, dir, filename string) {
t.Helper()
data := newTestGGUFBuilder().
withVersion(3).
withTensorCount(100).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "test-model").
build()
path := filepath.Join(dir, filename)
err := os.WriteFile(path, data, 0644)
require.NoError(t, err)
}

View File

@ -0,0 +1,69 @@
package ai
import (
"crypto/sha256"
"encoding/json"
"fmt"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
// Compute hash if not already set
if metadata.Hash == "" {
metadata.Hash = computeMetadataHash(metadata)
}
p := pkg.Package{
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Metadata: *metadata,
// NOTE: PURL is intentionally not set as the package-url spec
// has not yet finalized support for ML model packages
}
// Add license to the package if present in metadata
if metadata.License != "" {
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
}
p.SetID()
return p
}
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
// Create a stable representation of the metadata
hashData := struct {
Format string
Name string
Version string
Architecture string
GGUFVersion uint32
TensorCount uint64
}{
Format: metadata.ModelFormat,
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Architecture: metadata.Architecture,
GGUFVersion: metadata.GGUFVersion,
TensorCount: metadata.TensorCount,
}
// Marshal to JSON for stable hashing
jsonBytes, err := json.Marshal(hashData)
if err != nil {
log.Debugf("failed to marshal metadata for hashing: %v", err)
return ""
}
// Compute SHA256 hash
hash := sha256.Sum256(jsonBytes)
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
}

View File

@ -0,0 +1,126 @@
package ai
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func TestNewGGUFPackage(t *testing.T) {
tests := []struct {
name string
metadata *pkg.GGUFFileHeader
locations []file.Location
checkFunc func(t *testing.T, p pkg.Package)
}{
{
name: "complete GGUF package with all fields",
metadata: &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "llama3-8b-instruct",
ModelVersion: "3.0",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Q4_K_M",
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
Header: map[string]any{},
TruncatedHeader: false,
},
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
checkFunc: func(t *testing.T, p pkg.Package) {
if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff("3.0", p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
t.Errorf("Type mismatch (-want +got):\n%s", d)
}
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
assert.Len(t, p.Licenses.ToSlice(), 1)
if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
t.Errorf("License value mismatch (-want +got):\n%s", d)
}
assert.NotEmpty(t, p.ID())
},
},
{
name: "minimal GGUF package",
metadata: &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "simple-model",
ModelVersion: "1.0",
Architecture: "gpt2",
GGUFVersion: 3,
TensorCount: 50,
},
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
checkFunc: func(t *testing.T, p pkg.Package) {
if d := cmp.Diff("simple-model", p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff("1.0", p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
t.Errorf("Type mismatch (-want +got):\n%s", d)
}
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
assert.Empty(t, p.Licenses.ToSlice())
},
},
{
name: "GGUF package with multiple locations",
metadata: &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "multi-location-model",
ModelVersion: "1.5",
Architecture: "llama",
GGUFVersion: 3,
TensorCount: 150,
},
locations: []file.Location{
file.NewLocation("/models/model1.gguf"),
file.NewLocation("/models/model2.gguf"),
},
checkFunc: func(t *testing.T, p pkg.Package) {
assert.Len(t, p.Locations.ToSlice(), 2)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := newGGUFPackage(tt.metadata, tt.locations...)
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
t.Errorf("Type mismatch (-want +got):\n%s", d)
}
// Verify metadata is attached
metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
require.True(t, ok, "metadata should be GGUFFileHeader")
if d := cmp.Diff(*tt.metadata, metadata); d != "" {
t.Errorf("Metadata mismatch (-want +got):\n%s", d)
}
if tt.checkFunc != nil {
tt.checkFunc(t, p)
}
})
}
}

View File

@ -0,0 +1,92 @@
package ai
import (
"encoding/binary"
"fmt"
"io"
gguf_parser "github.com/gpustack/gguf-parser-go"
)
// GGUF file format constants
const (
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
)
// ggufHeaderReader reads just the header portion of a GGUF file efficiently
type ggufHeaderReader struct {
reader io.Reader
}
// readHeader reads only the GGUF header (metadata) without reading tensor data
// This is much more efficient than reading the entire file
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
// Read initial chunk to determine header size
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
}
// Verify magic number
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
if magic != ggufMagicNumber {
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
}
// We need to read the metadata KV pairs to know the full header size
// For efficiency, we'll read incrementally up to maxHeaderSize
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
headerData = append(headerData, initialBuf...)
// Read the rest of the header in larger chunks for efficiency
buf := make([]byte, 64*1024) // 64KB chunks
for len(headerData) < maxHeaderSize {
n, err := r.reader.Read(buf)
if n > 0 {
headerData = append(headerData, buf[:n]...)
}
if err == io.EOF {
// Reached end of file, we have all the data
break
}
if err != nil {
return nil, fmt.Errorf("failed to read GGUF header: %w", err)
}
}
if len(headerData) > maxHeaderSize {
// Truncate if we somehow read too much
headerData = headerData[:maxHeaderSize]
}
return headerData, nil
}
// Helper to convert gguf_parser metadata to simpler types
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
result := make(map[string]interface{})
// Limit KV pairs to avoid bloat
const maxKVPairs = 200
count := 0
for _, kv := range kvs {
if count >= maxKVPairs {
break
}
// Skip standard fields that are extracted separately
switch kv.Key {
case "general.architecture", "general.name", "general.license",
"general.version", "general.parameter_count", "general.quantization":
continue
}
result[kv.Key] = kv.Value
count++
}
return result
}

View File

@ -0,0 +1,127 @@
package ai
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
gguf_parser "github.com/gpustack/gguf-parser-go"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
const unknownGGUFData = "unknown"
// parseGGUFModel parses a GGUF model file and returns the discovered package.
// This implementation only reads the header portion of the file, not the entire model.
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
defer internal.CloseAndLogError(reader, reader.Path())
// Read only the header portion (not the entire file)
headerReader := &ggufHeaderReader{reader: reader}
headerData, err := headerReader.readHeader()
if err != nil {
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
}
// Create a temporary file with just the header for the library to parse
// The library requires a file path, so we create a minimal temp file
tempFile, err := os.CreateTemp("", "syft-gguf-header-*.gguf")
if err != nil {
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
}
tempPath := tempFile.Name()
defer os.Remove(tempPath)
// Write header data to temp file
if _, err := tempFile.Write(headerData); err != nil {
tempFile.Close()
return nil, nil, fmt.Errorf("failed to write header to temp file: %w", err)
}
tempFile.Close()
// Parse using gguf-parser-go with options to skip unnecessary data
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
gguf_parser.SkipLargeMetadata(),
)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
}
// Extract metadata
metadata := ggufFile.Metadata()
// Convert to syft metadata structure
syftMetadata := &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: metadata.Name,
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
License: metadata.License,
Architecture: metadata.Architecture,
Quantization: metadata.FileTypeDescriptor,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount,
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
TruncatedHeader: false, // We read the full header
Hash: "", // Will be computed in newGGUFPackage
}
// If model name is not in metadata, use filename
if syftMetadata.ModelName == "" {
syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
}
// If version is still unknown, try to infer from name
if syftMetadata.ModelVersion == unknownGGUFData {
syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
}
// Create package from metadata
p := newGGUFPackage(
syftMetadata,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
}
// extractVersion attempts to extract version from metadata KV pairs
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
for _, kv := range kvs {
if kv.Key == "general.version" {
if v, ok := kv.Value.(string); ok && v != "" {
return v
}
}
}
return unknownGGUFData
}
// extractVersionFromName tries to extract version from model name
func extractVersionFromName(_ string) string {
// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
// For now, return unknown - this could be enhanced with regex
return unknownGGUFData
}
// extractModelNameFromPath extracts the model name from the file path
func extractModelNameFromPath(path string) string {
// Get the base filename
base := filepath.Base(path)
// Remove .gguf extension
name := strings.TrimSuffix(base, ".gguf")
return name
}
// integrity check
var _ generic.Parser = parseGGUFModel

View File

@ -0,0 +1,127 @@
package ai
import (
"bytes"
"encoding/binary"
)
// GGUF type constants for test builder
const (
ggufMagic = 0x46554747 // "GGUF" in little-endian
ggufTypeUint8 = 0
ggufTypeInt8 = 1
ggufTypeUint16 = 2
ggufTypeInt16 = 3
ggufTypeUint32 = 4
ggufTypeInt32 = 5
ggufTypeFloat32 = 6
ggufTypeUint64 = 7
ggufTypeInt64 = 8
ggufTypeFloat64 = 9
ggufTypeBool = 10
ggufTypeString = 11
ggufTypeArray = 12
)
// testGGUFBuilder helps build GGUF files for testing
type testGGUFBuilder struct {
buf *bytes.Buffer
version uint32
tensorCount uint64
kvPairs []testKVPair
}
type testKVPair struct {
key string
valueType uint32
value interface{}
}
func newTestGGUFBuilder() *testGGUFBuilder {
return &testGGUFBuilder{
buf: new(bytes.Buffer),
version: 3,
tensorCount: 100,
kvPairs: []testKVPair{},
}
}
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
b.version = v
return b
}
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
b.tensorCount = count
return b
}
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
return b
}
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
return b
}
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
return b
}
func (b *testGGUFBuilder) writeString(s string) {
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
b.buf.WriteString(s)
}
func (b *testGGUFBuilder) build() []byte {
// Write magic number "GGUF"
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
// Write version
binary.Write(b.buf, binary.LittleEndian, b.version)
// Write tensor count
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
// Write KV count
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
// Write KV pairs
for _, kv := range b.kvPairs {
// Write key
b.writeString(kv.key)
// Write value type
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
// Write value based on type
switch kv.valueType {
case ggufTypeString:
b.writeString(kv.value.(string))
case ggufTypeUint32:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
case ggufTypeUint64:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
case ggufTypeUint8:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
case ggufTypeInt32:
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
case ggufTypeBool:
var v uint8
if kv.value.(bool) {
v = 1
}
binary.Write(b.buf, binary.LittleEndian, v)
}
}
return b.buf.Bytes()
}
// buildInvalidMagic creates a file with invalid magic number
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
buf := new(bytes.Buffer)
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
return buf.Bytes()
}

47
syft/pkg/gguf.go Normal file
View File

@ -0,0 +1,47 @@
package pkg
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
// loading and saving of models, particularly quantized large language models.
type GGUFFileHeader struct {
// ModelFormat is always "gguf"
ModelFormat string `json:"modelFormat" cyclonedx:"modelFormat"`
// ModelName is the name of the model (from general.name or filename)
ModelName string `json:"modelName" cyclonedx:"modelName"`
// ModelVersion is the version of the model (if available in header, else "unknown")
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
// Hash is a content hash of the metadata (for stable global identifiers across remotes)
Hash string `json:"hash,omitempty" cyclonedx:"hash"`
// License is the license identifier (from general.license if present)
License string `json:"license,omitempty" cyclonedx:"license"`
// GGUFVersion is the GGUF format version (e.g., 3)
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
// Parameters is the number of model parameters (if present in header)
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
// TensorCount is the number of tensors in the model
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
// Header contains the remaining key-value pairs from the GGUF header that are not already
// represented as typed fields above. This preserves additional metadata fields for reference
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
// TruncatedHeader indicates if the header was truncated during parsing (for very large headers)
TruncatedHeader bool `json:"truncatedHeader,omitempty" cyclonedx:"truncatedHeader"`
}

View File

@ -50,6 +50,7 @@ const (
TerraformPkg Type = "terraform" TerraformPkg Type = "terraform"
WordpressPluginPkg Type = "wordpress-plugin" WordpressPluginPkg Type = "wordpress-plugin"
HomebrewPkg Type = "homebrew" HomebrewPkg Type = "homebrew"
ModelPkg Type = "model"
) )
// AllPkgs represents all supported package types // AllPkgs represents all supported package types
@ -94,6 +95,7 @@ var AllPkgs = []Type{
TerraformPkg, TerraformPkg,
WordpressPluginPkg, WordpressPluginPkg,
HomebrewPkg, HomebrewPkg,
ModelPkg,
} }
// PackageURLType returns the PURL package type for the current package. // PackageURLType returns the PURL package type for the current package.

View File

@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
expectedTypes.Remove(string(HomebrewPkg)) expectedTypes.Remove(string(HomebrewPkg))
expectedTypes.Remove(string(TerraformPkg)) expectedTypes.Remove(string(TerraformPkg))
expectedTypes.Remove(string(GraalVMNativeImagePkg)) expectedTypes.Remove(string(GraalVMNativeImagePkg))
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
for _, test := range tests { for _, test := range tests {