Compare commits

...

14 Commits

Author SHA1 Message Date
Christopher Phillips
5853129c07
wip: wip no lrg file oci client
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-11-05 11:29:53 -05:00
Christopher Phillips
9c5279cb99
chore: pr feedback
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:33:33 -04:00
Christopher Phillips
f7a19db98b
chore: warn -> debug
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:24:47 -04:00
Christopher Phillips
13756ec768
fix: role schema forward
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:22:58 -04:00
Christopher Phillips
3326ae44fa
Merge branch 'main' into 4184-gguf-parser
* main:
  chore(deps): update tools to latest versions (#4302)
  chore(deps): bump github.com/github/go-spdx/v2 from 2.3.3 to 2.3.4 (#4301)
  chore(deps): bump github/codeql-action from 4.30.8 to 4.30.9 (#4299)
  support universal (fat) mach-o binary files (#4278)
  chore(deps): bump sigstore/cosign-installer from 3.10.0 to 4.0.0 (#4296)
  chore(deps): bump anchore/sbom-action from 0.20.7 to 0.20.8 (#4297)
  convert posix path back to windows (#4285)
  Remove duplicate image source providers (#4289)
  chore(deps): bump anchore/sbom-action from 0.20.6 to 0.20.7 (#4293)
  feat: add option to fetch remote licenses for pnpm-lock.yaml files (#4286)
  Add PDM parser (#4234)
  chore(deps): update tools to latest versions (#4291)
  fix: panic during java archive maven resolution (#4290)
  Extract zip archive with multiple entries (#4283)
  chore: update to use old configuration on new cosign (#4287)
  chore(deps): update anchore dependencies (#4282)
  chore(deps): bump github.com/mholt/archives from 0.1.3 to 0.1.5 (#4280)
  add docs to configs (#4281)
2025-10-22 13:21:59 -04:00
Christopher Phillips
a08d5b78d9
fix: update gguf data to be GGUFFileHeader
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 13:20:23 -04:00
Christopher Phillips
ce74ed0309
chore: refactor to use gguf-parser-go; 50mb limit
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-22 02:37:01 -04:00
Christopher Phillips
0ff6a1af58
fix: first pass pr fixes
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-17 10:46:19 -04:00
Christopher Phillips
cd4d0ce062
test: fix local flake
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 05:45:11 -04:00
Christopher Phillips
a721a854a9
tests: account for epoch in dedupe test
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 05:27:31 -04:00
Christopher Phillips
c715e01cc2
chore: schema and test additions
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 05:02:15 -04:00
Christopher Phillips
165611d2e4
test: migrate gguf tests over
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 04:56:07 -04:00
Christopher Phillips
746f00ad68
chore: lint-fix
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 03:59:42 -04:00
Christopher Phillips
3f117a3eb5
feat: migrate gguf parser to separate PR from oci
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-10-14 02:46:34 -04:00
34 changed files with 6090 additions and 8 deletions

2
.gitignore vendored
View File

@ -73,3 +73,5 @@ cosign.pub
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
*$py.class *$py.class

View File

@ -88,6 +88,7 @@ func TestPkgCoverageImage(t *testing.T) {
definedPkgs.Remove(string(pkg.TerraformPkg)) definedPkgs.Remove(string(pkg.TerraformPkg))
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
definedPkgs.Remove(string(pkg.CondaPkg)) definedPkgs.Remove(string(pkg.CondaPkg))
definedPkgs.Remove(string(pkg.ModelPkg))
var cases []testCase var cases []testCase
cases = append(cases, commonTestCases...) cases = append(cases, commonTestCases...)
@ -162,6 +163,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
definedPkgs.Remove(string(pkg.UnknownPkg)) definedPkgs.Remove(string(pkg.UnknownPkg))
definedPkgs.Remove(string(pkg.CondaPkg)) definedPkgs.Remove(string(pkg.CondaPkg))
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
definedPkgs.Remove(string(pkg.ModelPkg))
// for directory scans we should not expect to see any of the following package types // for directory scans we should not expect to see any of the following package types
definedPkgs.Remove(string(pkg.KbPkg)) definedPkgs.Remove(string(pkg.KbPkg))

8
go.mod
View File

@ -286,6 +286,8 @@ require (
modernc.org/memory v1.11.0 // indirect modernc.org/memory v1.11.0 // indirect
) )
require github.com/gpustack/gguf-parser-go v0.22.1
require ( require (
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect
@ -306,6 +308,12 @@ require (
github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
github.com/aws/smithy-go v1.22.4 // indirect github.com/aws/smithy-go v1.22.4 // indirect
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
github.com/henvic/httpretty v0.1.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
gonum.org/v1/gonum v0.15.1 // indirect
) )
retract ( retract (

11
go.sum
View File

@ -541,6 +541,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg= github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA= github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs= github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
@ -590,6 +592,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA= github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
@ -617,6 +621,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@ -722,9 +727,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
@ -851,6 +858,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik= github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU= github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
@ -1304,6 +1313,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=

View File

@ -3,5 +3,5 @@ package internal
const ( const (
// JSONSchemaVersion is the current schema version output by the JSON encoder // JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.41" JSONSchemaVersion = "16.0.42"
) )

View File

@ -27,6 +27,7 @@ func AllTypes() []any {
pkg.ELFBinaryPackageNoteJSONPayload{}, pkg.ELFBinaryPackageNoteJSONPayload{},
pkg.ElixirMixLockEntry{}, pkg.ElixirMixLockEntry{},
pkg.ErlangRebarLockEntry{}, pkg.ErlangRebarLockEntry{},
pkg.GGUFFileHeader{},
pkg.GitHubActionsUseStatement{}, pkg.GitHubActionsUseStatement{},
pkg.GolangBinaryBuildinfoEntry{}, pkg.GolangBinaryBuildinfoEntry{},
pkg.GolangModuleEntry{}, pkg.GolangModuleEntry{},

View File

@ -123,6 +123,7 @@ var jsonTypes = makeJSONTypes(
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"), jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"), jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"), jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
) )
func expandLegacyNameVariants(names ...string) []string { func expandLegacyNameVariants(names ...string) []string {

View File

@ -3,6 +3,7 @@ package task
import ( import (
"github.com/anchore/syft/syft/cataloging/pkgcataloging" "github.com/anchore/syft/syft/cataloging/pkgcataloging"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/ai"
"github.com/anchore/syft/syft/pkg/cataloger/alpine" "github.com/anchore/syft/syft/pkg/cataloger/alpine"
"github.com/anchore/syft/syft/pkg/cataloger/arch" "github.com/anchore/syft/syft/pkg/cataloger/arch"
"github.com/anchore/syft/syft/pkg/cataloger/binary" "github.com/anchore/syft/syft/pkg/cataloger/binary"
@ -175,6 +176,7 @@ func DefaultPackageTaskFactories() Factories {
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"), newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"), newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"), newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
// deprecated catalogers //////////////////////////////////////// // deprecated catalogers ////////////////////////////////////////
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible) // these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"$schema": "https://json-schema.org/draft/2020-12/schema", "$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "anchore.io/schema/syft/json/16.0.41/document", "$id": "anchore.io/schema/syft/json/16.0.42/document",
"$ref": "#/$defs/Document", "$ref": "#/$defs/Document",
"$defs": { "$defs": {
"AlpmDbEntry": { "AlpmDbEntry": {
@ -1399,6 +1399,70 @@
"size" "size"
] ]
}, },
"GgufFileMetadata": {
"properties": {
"modelFormat": {
"type": "string",
"description": "ModelFormat is always \"gguf\""
},
"modelName": {
"type": "string",
"description": "ModelName is the name of the model (from general.name or filename)"
},
"modelVersion": {
"type": "string",
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
},
"fileSize": {
"type": "integer",
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
},
"hash": {
"type": "string",
"description": "Hash is a content hash of the metadata (for stable global identifiers across remotes)"
},
"license": {
"type": "string",
"description": "License is the license identifier (from general.license if present)"
},
"ggufVersion": {
"type": "integer",
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
},
"architecture": {
"type": "string",
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
},
"quantization": {
"type": "string",
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
},
"parameters": {
"type": "integer",
"description": "Parameters is the number of model parameters (if present in header)"
},
"tensorCount": {
"type": "integer",
"description": "TensorCount is the number of tensors in the model"
},
"header": {
"type": "object",
"description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
},
"truncatedHeader": {
"type": "boolean",
"description": "TruncatedHeader indicates if the header was truncated during parsing (for very large headers)"
}
},
"type": "object",
"required": [
"modelFormat",
"modelName",
"ggufVersion",
"tensorCount"
],
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
},
"GithubActionsUseStatement": { "GithubActionsUseStatement": {
"properties": { "properties": {
"value": { "value": {
@ -2474,6 +2538,9 @@
{ {
"$ref": "#/$defs/ErlangRebarLockEntry" "$ref": "#/$defs/ErlangRebarLockEntry"
}, },
{
"$ref": "#/$defs/GgufFileMetadata"
},
{ {
"$ref": "#/$defs/GithubActionsUseStatement" "$ref": "#/$defs/GithubActionsUseStatement"
}, },

View File

@ -15,6 +15,7 @@ import (
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/source/ocimodelsource"
) )
// CreateSBOMConfig specifies all parameters needed for creating an SBOM. // CreateSBOMConfig specifies all parameters needed for creating an SBOM.
@ -483,6 +484,9 @@ func findDefaultTags(src source.Description) ([]string, error) {
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
case source.SnapMetadata: case source.SnapMetadata:
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
case *ocimodelsource.OCIModelMetadata:
// OCI model artifacts should use image-like catalogers since they provide files to scan
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
default: default:
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m) return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
} }

View File

@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
} }
componentType := cyclonedx.ComponentTypeLibrary componentType := cyclonedx.ComponentTypeLibrary
if p.Type == pkg.BinaryPkg { switch p.Type {
case pkg.BinaryPkg:
componentType = cyclonedx.ComponentTypeApplication componentType = cyclonedx.ComponentTypeApplication
case pkg.ModelPkg:
componentType = cyclonedx.ComponentTypeMachineLearningModel
} }
return cyclonedx.Component{ return cyclonedx.Component{

View File

@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
switch component.Type { switch component.Type {
case cyclonedx.ComponentTypeOS: case cyclonedx.ComponentTypeOS:
case cyclonedx.ComponentTypeContainer: case cyclonedx.ComponentTypeContainer:
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary: case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
p := decodeComponent(component) p := decodeComponent(component)
idMap[component.BOMRef] = p idMap[component.BOMRef] = p
if component.BOMRef != "" { if component.BOMRef != "" {

View File

@ -54,6 +54,7 @@ func Test_OriginatorSupplier(t *testing.T) {
pkg.OpamPackage{}, pkg.OpamPackage{},
pkg.YarnLockEntry{}, pkg.YarnLockEntry{},
pkg.TerraformLockProviderEntry{}, pkg.TerraformLockProviderEntry{},
pkg.GGUFFileHeader{},
) )
tests := []struct { tests := []struct {
name string name string

View File

@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
answer = "acquired package info from Homebrew formula" answer = "acquired package info from Homebrew formula"
case pkg.TerraformPkg: case pkg.TerraformPkg:
answer = "acquired package info from Terraform dependency lock file" answer = "acquired package info from Terraform dependency lock file"
case pkg.ModelPkg:
answer = "acquired package info from AI artifact (e.g. GGUF File"
default: default:
answer = "acquired package info from the following paths" answer = "acquired package info from the following paths"
} }

View File

@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
"acquired package info from Terraform dependency lock file", "acquired package info from Terraform dependency lock file",
}, },
}, },
{
input: pkg.Package{
Type: pkg.ModelPkg,
},
expected: []string{
"",
},
},
} }
var pkgTypes []pkg.Type var pkgTypes []pkg.Type
for _, test := range tests { for _, test := range tests {

View File

@ -0,0 +1,16 @@
/*
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
including support for GGUF (GPT-Generated Unified Format) model files.
*/
package ai
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
func NewGGUFCataloger() pkg.Cataloger {
return generic.NewCataloger("gguf-cataloger").
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
}

View File

@ -0,0 +1,373 @@
package ai
import (
"os"
"path/filepath"
"testing"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func TestGGUFCataloger_Globs(t *testing.T) {
tests := []struct {
name string
setup func(t *testing.T) string // returns fixture directory
expected []string
}{
{
name: "finds GGUF files in root",
setup: func(t *testing.T) string {
dir := t.TempDir()
createTestGGUFInDir(t, dir, "model1.gguf")
createTestGGUFInDir(t, dir, "model2.gguf")
return dir
},
expected: []string{
"model1.gguf",
"model2.gguf",
},
},
{
name: "finds GGUF files in subdirectories",
setup: func(t *testing.T) string {
dir := t.TempDir()
modelsDir := filepath.Join(dir, "models")
os.MkdirAll(modelsDir, 0755)
createTestGGUFInDir(t, modelsDir, "llama.gguf")
deepDir := filepath.Join(dir, "deep", "nested", "path")
os.MkdirAll(deepDir, 0755)
createTestGGUFInDir(t, deepDir, "mistral.gguf")
return dir
},
expected: []string{
"models/llama.gguf",
"deep/nested/path/mistral.gguf",
},
},
{
name: "ignores non-GGUF files",
setup: func(t *testing.T) string {
dir := t.TempDir()
createTestGGUFInDir(t, dir, "model.gguf")
// Create non-GGUF files
os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
return dir
},
expected: []string{
"model.gguf",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fixtureDir := tt.setup(t)
pkgtest.NewCatalogTester().
FromDirectory(t, fixtureDir).
ExpectsResolverContentQueries(tt.expected).
TestCataloger(t, NewGGUFCataloger())
})
}
}
func TestGGUFCataloger_Integration(t *testing.T) {
tests := []struct {
name string
setup func(t *testing.T) string
expectedPackages []pkg.Package
expectedRelationships []artifact.Relationship
}{
{
name: "catalog single GGUF file",
setup: func(t *testing.T) string {
dir := t.TempDir()
data := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "llama3-8b").
withStringKV("general.version", "3.0").
withStringKV("general.license", "Apache-2.0").
withStringKV("general.quantization", "Q4_K_M").
withUint64KV("general.parameter_count", 8030000000).
build()
path := filepath.Join(dir, "llama3-8b.gguf")
os.WriteFile(path, data, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "llama3-8b",
Version: "3.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "llama3-8b",
ModelVersion: "3.0",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
},
expectedRelationships: nil,
},
{
name: "catalog multiple GGUF files",
setup: func(t *testing.T) string {
dir := t.TempDir()
// Create first model
data1 := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "model1").
withStringKV("general.version", "1.0").
build()
os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
// Create second model
data2 := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "mistral").
withStringKV("general.name", "model2").
withStringKV("general.version", "2.0").
build()
os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "model1",
Version: "1.0",
Type: pkg.ModelPkg,
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "model1",
ModelVersion: "1.0",
Architecture: "llama",
Quantization: "Unknown",
GGUFVersion: 3,
TensorCount: 0,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
{
Name: "model2",
Version: "2.0",
Type: pkg.ModelPkg,
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "model2",
ModelVersion: "2.0",
Architecture: "mistral",
Quantization: "Unknown",
GGUFVersion: 3,
TensorCount: 0,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
},
expectedRelationships: nil,
},
{
name: "catalog GGUF in nested directories",
setup: func(t *testing.T) string {
dir := t.TempDir()
nestedDir := filepath.Join(dir, "models", "quantized")
os.MkdirAll(nestedDir, 0755)
data := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "qwen").
withStringKV("general.name", "qwen-nested").
build()
os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "qwen-nested",
Version: unknownGGUFData,
Type: pkg.ModelPkg,
Metadata: pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "qwen-nested",
ModelVersion: unknownGGUFData,
Architecture: "qwen",
Quantization: "Unknown",
GGUFVersion: 3,
TensorCount: 0,
Header: map[string]interface{}{},
TruncatedHeader: false,
},
},
},
expectedRelationships: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fixtureDir := tt.setup(t)
// Use pkgtest to catalog and compare
tester := pkgtest.NewCatalogTester().
FromDirectory(t, fixtureDir).
Expects(tt.expectedPackages, tt.expectedRelationships).
IgnoreLocationLayer().
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
WithCompareOptions(
// Ignore Hash as it's computed dynamically
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
)
tester.TestCataloger(t, NewGGUFCataloger())
})
}
}
func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
dir := t.TempDir()
// Create a valid GGUF
validData := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "valid-model").
build()
os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
// Create an invalid GGUF (wrong magic)
invalidData := newTestGGUFBuilder().buildInvalidMagic()
os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
// Create a truncated GGUF
os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
// Catalog should succeed and only return the valid package
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
// Should only find the valid model
require.Len(t, pkgs, 1)
assert.Equal(t, "valid-model", pkgs[0].Name)
})
tester.TestCataloger(t, NewGGUFCataloger())
}
func TestGGUFCataloger_Name(t *testing.T) {
cataloger := NewGGUFCataloger()
assert.Equal(t, "gguf-cataloger", cataloger.Name())
}
func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
dir := t.TempDir()
// Create a subdirectory to ensure glob still runs
os.MkdirAll(filepath.Join(dir, "models"), 0755)
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
assert.Empty(t, pkgs)
assert.Empty(t, rels)
})
tester.TestCataloger(t, NewGGUFCataloger())
}
func TestGGUFCataloger_MixedFiles(t *testing.T) {
dir := t.TempDir()
// Create GGUF file
ggufData := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "test-model").
build()
os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
// Create other file types
os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
// Should only find the GGUF model
require.Len(t, pkgs, 1)
assert.Equal(t, "test-model", pkgs[0].Name)
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
})
tester.TestCataloger(t, NewGGUFCataloger())
}
func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
dir := t.TempDir()
// Create lowercase .gguf
data := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "lowercase").
build()
os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
// Create uppercase .GGUF (should not match the glob)
os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
tester := pkgtest.NewCatalogTester().
FromDirectory(t, dir).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
// On case-insensitive filesystems (macOS), both might match
// On case-sensitive filesystems (Linux), only lowercase matches
assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
})
tester.TestCataloger(t, NewGGUFCataloger())
}
// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
func createTestGGUFInDir(t *testing.T, dir, filename string) {
t.Helper()
data := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "test-model").
build()
path := filepath.Join(dir, filename)
err := os.WriteFile(path, data, 0644)
require.NoError(t, err)
}

View File

@ -0,0 +1,69 @@
package ai
import (
"crypto/sha256"
"encoding/json"
"fmt"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
// Compute hash if not already set
if metadata.Hash == "" {
metadata.Hash = computeMetadataHash(metadata)
}
p := pkg.Package{
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Metadata: *metadata,
// NOTE: PURL is intentionally not set as the package-url spec
// has not yet finalized support for ML model packages
}
// Add license to the package if present in metadata
if metadata.License != "" {
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
}
p.SetID()
return p
}
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
// Create a stable representation of the metadata
hashData := struct {
Format string
Name string
Version string
Architecture string
GGUFVersion uint32
TensorCount uint64
}{
Format: metadata.ModelFormat,
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Architecture: metadata.Architecture,
GGUFVersion: metadata.GGUFVersion,
TensorCount: metadata.TensorCount,
}
// Marshal to JSON for stable hashing
jsonBytes, err := json.Marshal(hashData)
if err != nil {
log.Debugf("failed to marshal metadata for hashing: %v", err)
return ""
}
// Compute SHA256 hash
hash := sha256.Sum256(jsonBytes)
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
}

View File

@ -0,0 +1,126 @@
package ai
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func TestNewGGUFPackage(t *testing.T) {
tests := []struct {
name string
metadata *pkg.GGUFFileHeader
locations []file.Location
checkFunc func(t *testing.T, p pkg.Package)
}{
{
name: "complete GGUF package with all fields",
metadata: &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "llama3-8b-instruct",
ModelVersion: "3.0",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Q4_K_M",
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
Header: map[string]any{},
TruncatedHeader: false,
},
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
checkFunc: func(t *testing.T, p pkg.Package) {
if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff("3.0", p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
t.Errorf("Type mismatch (-want +got):\n%s", d)
}
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
assert.Len(t, p.Licenses.ToSlice(), 1)
if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
t.Errorf("License value mismatch (-want +got):\n%s", d)
}
assert.NotEmpty(t, p.ID())
},
},
{
name: "minimal GGUF package",
metadata: &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "simple-model",
ModelVersion: "1.0",
Architecture: "gpt2",
GGUFVersion: 3,
TensorCount: 50,
},
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
checkFunc: func(t *testing.T, p pkg.Package) {
if d := cmp.Diff("simple-model", p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff("1.0", p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
t.Errorf("Type mismatch (-want +got):\n%s", d)
}
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
assert.Empty(t, p.Licenses.ToSlice())
},
},
{
name: "GGUF package with multiple locations",
metadata: &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: "multi-location-model",
ModelVersion: "1.5",
Architecture: "llama",
GGUFVersion: 3,
TensorCount: 150,
},
locations: []file.Location{
file.NewLocation("/models/model1.gguf"),
file.NewLocation("/models/model2.gguf"),
},
checkFunc: func(t *testing.T, p pkg.Package) {
assert.Len(t, p.Locations.ToSlice(), 2)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := newGGUFPackage(tt.metadata, tt.locations...)
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
t.Errorf("Type mismatch (-want +got):\n%s", d)
}
// Verify metadata is attached
metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
require.True(t, ok, "metadata should be GGUFFileHeader")
if d := cmp.Diff(*tt.metadata, metadata); d != "" {
t.Errorf("Metadata mismatch (-want +got):\n%s", d)
}
if tt.checkFunc != nil {
tt.checkFunc(t, p)
}
})
}
}

View File

@ -0,0 +1,89 @@
package ai
import (
"encoding/binary"
"fmt"
"io"
gguf_parser "github.com/gpustack/gguf-parser-go"
)
// GGUF file format constants
const (
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
)
// ggufHeaderReader reads just the header portion of a GGUF file efficiently
type ggufHeaderReader struct {
reader io.Reader
}
// readHeader reads only the GGUF header (metadata) without reading tensor data
// This is much more efficient than reading the entire file
// The reader should be wrapped with io.LimitedReader to prevent OOM issues
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
// Read initial chunk to determine header size
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
}
// Verify magic number
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
if magic != ggufMagicNumber {
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
}
// We need to read the metadata KV pairs to know the full header size
// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
headerData = append(headerData, initialBuf...)
// Read the rest of the header in larger chunks for efficiency
// The LimitedReader will return EOF once maxHeaderSize is reached
buf := make([]byte, 64*1024) // 64KB chunks
for {
n, err := r.reader.Read(buf)
if n > 0 {
headerData = append(headerData, buf[:n]...)
}
if err == io.EOF {
// Reached end of file or limit, we have all available data
break
}
if err != nil {
return nil, fmt.Errorf("failed to read GGUF header: %w", err)
}
}
return headerData, nil
}
// Helper to convert gguf_parser metadata to simpler types
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
result := make(map[string]interface{})
// Limit KV pairs to avoid bloat
const maxKVPairs = 200
count := 0
for _, kv := range kvs {
if count >= maxKVPairs {
break
}
// Skip standard fields that are extracted separately
switch kv.Key {
case "general.architecture", "general.name", "general.license",
"general.version", "general.parameter_count", "general.quantization":
continue
}
result[kv.Key] = kv.Value
count++
}
return result
}

View File

@ -0,0 +1,130 @@
package ai
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"
gguf_parser "github.com/gpustack/gguf-parser-go"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
const unknownGGUFData = "unknown"
// parseGGUFModel parses a GGUF model file and returns the discovered package.
// This implementation only reads the header portion of the file, not the entire model.
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
defer internal.CloseAndLogError(reader, reader.Path())
// Read and validate the GGUF file header using LimitedReader to prevent OOM
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
headerReader := &ggufHeaderReader{reader: limitedReader}
headerData, err := headerReader.readHeader()
if err != nil {
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
}
// Create a temporary file for the library to parse
// The library requires a file path, so we create a temp file
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
if err != nil {
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
}
tempPath := tempFile.Name()
defer os.Remove(tempPath)
// Write the validated header data to temp file
if _, err := tempFile.Write(headerData); err != nil {
tempFile.Close()
return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
}
tempFile.Close()
// Parse using gguf-parser-go with options to skip unnecessary data
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
gguf_parser.SkipLargeMetadata(),
)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
}
// Extract metadata
metadata := ggufFile.Metadata()
// Convert to syft metadata structure
syftMetadata := &pkg.GGUFFileHeader{
ModelFormat: "gguf",
ModelName: metadata.Name,
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
License: metadata.License,
Architecture: metadata.Architecture,
Quantization: metadata.FileTypeDescriptor,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount,
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
TruncatedHeader: false, // We read the full header
Hash: "", // Will be computed in newGGUFPackage
}
// If model name is not in metadata, use filename
if syftMetadata.ModelName == "" {
syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
}
// If version is still unknown, try to infer from name
if syftMetadata.ModelVersion == unknownGGUFData {
syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
}
// Create package from metadata
p := newGGUFPackage(
syftMetadata,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
}
// extractVersion attempts to extract version from metadata KV pairs
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
for _, kv := range kvs {
if kv.Key == "general.version" {
if v, ok := kv.Value.(string); ok && v != "" {
return v
}
}
}
return unknownGGUFData
}
// extractVersionFromName tries to extract version from model name
func extractVersionFromName(_ string) string {
// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
// For now, return unknown - this could be enhanced with regex
return unknownGGUFData
}
// extractModelNameFromPath extracts the model name from the file path
func extractModelNameFromPath(path string) string {
// Get the base filename
base := filepath.Base(path)
// Remove .gguf extension
name := strings.TrimSuffix(base, ".gguf")
return name
}
// integrity check
var _ generic.Parser = parseGGUFModel

View File

@ -0,0 +1,41 @@
package ai
import (
"fmt"
"os"
gguf_parser "github.com/gpustack/gguf-parser-go"
)
func main() {
// Create a test GGUF file
data := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "test-model").
build()
// Write to temp file
tempFile, err := os.CreateTemp("", "test-*.gguf")
if err != nil {
panic(err)
}
defer os.Remove(tempFile.Name())
if _, err := tempFile.Write(data); err != nil {
panic(err)
}
tempFile.Close()
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
// Try to parse it
fmt.Println("Attempting to parse...")
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
if err != nil {
fmt.Printf("Parse error: %v\n", err)
return
}
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
}

View File

@ -0,0 +1,127 @@
package ai
import (
"bytes"
"encoding/binary"
)
// GGUF type constants for test builder
const (
ggufMagic = 0x46554747 // "GGUF" in little-endian
ggufTypeUint8 = 0
ggufTypeInt8 = 1
ggufTypeUint16 = 2
ggufTypeInt16 = 3
ggufTypeUint32 = 4
ggufTypeInt32 = 5
ggufTypeFloat32 = 6
ggufTypeBool = 7
ggufTypeString = 8
ggufTypeArray = 9
ggufTypeUint64 = 10
ggufTypeInt64 = 11
ggufTypeFloat64 = 12
)
// testGGUFBuilder helps build GGUF files for testing
type testGGUFBuilder struct {
buf *bytes.Buffer
version uint32
tensorCount uint64
kvPairs []testKVPair
}
type testKVPair struct {
key string
valueType uint32
value interface{}
}
func newTestGGUFBuilder() *testGGUFBuilder {
return &testGGUFBuilder{
buf: new(bytes.Buffer),
version: 3,
tensorCount: 0,
kvPairs: []testKVPair{},
}
}
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
b.version = v
return b
}
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
b.tensorCount = count
return b
}
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
return b
}
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
return b
}
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
return b
}
func (b *testGGUFBuilder) writeString(s string) {
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
b.buf.WriteString(s)
}
func (b *testGGUFBuilder) build() []byte {
// Write magic number "GGUF"
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
// Write version
binary.Write(b.buf, binary.LittleEndian, b.version)
// Write tensor count
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
// Write KV count
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
// Write KV pairs
for _, kv := range b.kvPairs {
// Write key
b.writeString(kv.key)
// Write value type
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
// Write value based on type
switch kv.valueType {
case ggufTypeString:
b.writeString(kv.value.(string))
case ggufTypeUint32:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
case ggufTypeUint64:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
case ggufTypeUint8:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
case ggufTypeInt32:
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
case ggufTypeBool:
var v uint8
if kv.value.(bool) {
v = 1
}
binary.Write(b.buf, binary.LittleEndian, v)
}
}
return b.buf.Bytes()
}
// buildInvalidMagic creates a file with invalid magic number
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
buf := new(bytes.Buffer)
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
return buf.Bytes()
}

47
syft/pkg/gguf.go Normal file
View File

@ -0,0 +1,47 @@
package pkg
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
// loading and saving of models, particularly quantized large language models.
type GGUFFileHeader struct {
// ModelFormat is always "gguf"
ModelFormat string `json:"modelFormat" cyclonedx:"modelFormat"`
// ModelName is the name of the model (from general.name or filename)
ModelName string `json:"modelName" cyclonedx:"modelName"`
// ModelVersion is the version of the model (if available in header, else "unknown")
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
// Hash is a content hash of the metadata (for stable global identifiers across remotes)
Hash string `json:"hash,omitempty" cyclonedx:"hash"`
// License is the license identifier (from general.license if present)
License string `json:"license,omitempty" cyclonedx:"license"`
// GGUFVersion is the GGUF format version (e.g., 3)
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
// Parameters is the number of model parameters (if present in header)
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
// TensorCount is the number of tensors in the model
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
// Header contains the remaining key-value pairs from the GGUF header that are not already
// represented as typed fields above. This preserves additional metadata fields for reference
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
// TruncatedHeader indicates if the header was truncated during parsing (for very large headers)
TruncatedHeader bool `json:"truncatedHeader,omitempty" cyclonedx:"truncatedHeader"`
}

View File

@ -50,6 +50,7 @@ const (
TerraformPkg Type = "terraform" TerraformPkg Type = "terraform"
WordpressPluginPkg Type = "wordpress-plugin" WordpressPluginPkg Type = "wordpress-plugin"
HomebrewPkg Type = "homebrew" HomebrewPkg Type = "homebrew"
ModelPkg Type = "model"
) )
// AllPkgs represents all supported package types // AllPkgs represents all supported package types
@ -94,6 +95,7 @@ var AllPkgs = []Type{
TerraformPkg, TerraformPkg,
WordpressPluginPkg, WordpressPluginPkg,
HomebrewPkg, HomebrewPkg,
ModelPkg,
} }
// PackageURLType returns the PURL package type for the current package. // PackageURLType returns the PURL package type for the current package.

View File

@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
expectedTypes.Remove(string(HomebrewPkg)) expectedTypes.Remove(string(HomebrewPkg))
expectedTypes.Remove(string(TerraformPkg)) expectedTypes.Remove(string(TerraformPkg))
expectedTypes.Remove(string(GraalVMNativeImagePkg)) expectedTypes.Remove(string(GraalVMNativeImagePkg))
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
for _, test := range tests { for _, test := range tests {

View File

@ -0,0 +1,39 @@
package ocimodelsource
import "github.com/anchore/syft/syft/source"
// OCIModelMetadata represents all static metadata that defines what an OCI model artifact is.
// This is similar to ImageMetadata but includes model-specific fields and OCI artifact annotations.
type OCIModelMetadata struct {
// Core OCI artifact metadata (mirrors ImageMetadata)
UserInput string `json:"userInput"`
ID string `json:"artifactID"`
ManifestDigest string `json:"manifestDigest"`
MediaType string `json:"mediaType"`
Tags []string `json:"tags"`
Size int64 `json:"artifactSize"`
Layers []source.LayerMetadata `json:"layers"`
RawManifest []byte `json:"manifest"`
RawConfig []byte `json:"config"`
RepoDigests []string `json:"repoDigests"`
Architecture string `json:"architecture"`
Variant string `json:"architectureVariant,omitempty"`
OS string `json:"os"`
Labels map[string]string `json:"labels,omitempty"`
// OCI-specific metadata
Annotations map[string]string `json:"annotations,omitempty"`
// Model-specific metadata
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
}
// GGUFLayerInfo represents metadata about a GGUF layer in the OCI artifact.
type GGUFLayerInfo struct {
Digest string `json:"digest"`
Size int64 `json:"size"` // Full blob size in registry
MediaType string `json:"mediaType"` // Should be "application/vnd.docker.ai.gguf.v3"
Annotations map[string]string `json:"annotations,omitempty"`
FetchedBytes int64 `json:"fetchedBytes"` // How many bytes we actually fetched via range-GET
}

View File

@ -0,0 +1,260 @@
package ocimodelsource
import (
"context"
"fmt"
"sync"
"github.com/opencontainers/go-digest"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/source/internal"
)
var _ source.Source = (*ociModelSource)(nil)
// Config holds the configuration for an OCI model artifact source.
type Config struct {
Reference string
Platform string
Alias source.Alias
Client *RegistryClient
Metadata *OCIModelMetadata
TempFiles map[string]string // Virtual path -> temp file path
}
// ociModelSource implements the source.Source interface for OCI model artifacts.
type ociModelSource struct {
id artifact.ID
config Config
resolver *ociModelResolver
mutex *sync.Mutex
}
// NewFromArtifact creates a new OCI model source from a fetched model artifact.
func NewFromArtifact(artifact *ModelArtifact, client *RegistryClient, alias source.Alias) (source.Source, error) {
// Build metadata
metadata := buildMetadata(artifact)
// Fetch GGUF layer headers via range-GET
tempFiles := make(map[string]string)
ggufLayers := make([]GGUFLayerInfo, 0, len(artifact.GGUFLayers))
for idx, layer := range artifact.GGUFLayers {
log.WithFields("digest", layer.Digest, "size", layer.Size).Debug("fetching GGUF layer header")
// Fetch header via range-GET
headerData, err := client.FetchBlobRange(context.Background(), artifact.Reference, layer.Digest, MaxHeaderBytes)
if err != nil {
return nil, fmt.Errorf("failed to fetch GGUF layer header: %w", err)
}
// Extract virtual path from annotations
virtualPath := extractVirtualPath(idx, extractAnnotations(layer.Annotations))
// Create temp file
tempPath, err := createTempFileFromData(headerData, virtualPath)
if err != nil {
// Clean up any previously created temp files
for _, path := range tempFiles {
_ = removeFile(path)
}
return nil, fmt.Errorf("failed to create temp file: %w", err)
}
tempFiles[virtualPath] = tempPath
// Add to GGUF layers metadata
ggufLayers = append(ggufLayers, GGUFLayerInfo{
Digest: layer.Digest.String(),
Size: layer.Size,
MediaType: string(layer.MediaType),
Annotations: extractAnnotations(layer.Annotations),
FetchedBytes: int64(len(headerData)),
})
log.WithFields("virtualPath", virtualPath, "tempPath", tempPath, "bytes", len(headerData)).Debug("created temp file for GGUF header")
}
// Update metadata with GGUF layers
metadata.GGUFLayers = ggufLayers
metadata.ModelFormat = "gguf"
// Build config
config := Config{
Reference: artifact.Reference.String(),
Alias: alias,
Client: client,
Metadata: metadata,
TempFiles: tempFiles,
}
// Derive artifact ID
id := deriveIDFromArtifact(config)
return &ociModelSource{
id: id,
config: config,
mutex: &sync.Mutex{},
}, nil
}
// buildMetadata constructs OCIModelMetadata from a ModelArtifact.
func buildMetadata(artifact *ModelArtifact) *OCIModelMetadata {
// Extract layers
layers := make([]source.LayerMetadata, len(artifact.Manifest.Layers))
for i, layer := range artifact.Manifest.Layers {
layers[i] = source.LayerMetadata{
MediaType: string(layer.MediaType),
Digest: layer.Digest.String(),
Size: layer.Size,
}
}
// Extract tags
var tags []string
if tagged, ok := artifact.Reference.(interface{ TagStr() string }); ok {
if tag := tagged.TagStr(); tag != "" {
tags = []string{tag}
}
}
// Extract repo digests
var repoDigests []string
if artifact.ManifestDigest != "" {
repoDigests = []string{artifact.Reference.Context().String() + "@" + artifact.ManifestDigest}
}
// Build metadata
return &OCIModelMetadata{
UserInput: artifact.Reference.String(),
ID: artifact.ManifestDigest,
ManifestDigest: artifact.ManifestDigest,
MediaType: string(artifact.Manifest.MediaType),
Tags: tags,
Size: calculateTotalSize(layers),
Layers: layers,
RawManifest: artifact.RawManifest,
RawConfig: artifact.RawConfig,
RepoDigests: repoDigests,
Architecture: artifact.Config.Architecture,
Variant: artifact.Config.Variant,
OS: artifact.Config.OS,
Labels: artifact.Config.Config.Labels,
Annotations: extractManifestAnnotations(artifact.Manifest),
}
}
// extractAnnotations converts v1 annotations to a string map.
func extractAnnotations(annotations map[string]string) map[string]string {
if annotations == nil {
return make(map[string]string)
}
return annotations
}
// extractManifestAnnotations extracts annotations from the manifest.
func extractManifestAnnotations(manifest interface{}) map[string]string {
// v1.Manifest has Annotations field
if m, ok := manifest.(interface{ GetAnnotations() map[string]string }); ok {
return m.GetAnnotations()
}
return make(map[string]string)
}
// calculateTotalSize sums up the size of all layers.
func calculateTotalSize(layers []source.LayerMetadata) int64 {
var total int64
for _, layer := range layers {
total += layer.Size
}
return total
}
// deriveIDFromArtifact generates an artifact ID from the config.
func deriveIDFromArtifact(cfg Config) artifact.ID {
var info string
if !cfg.Alias.IsEmpty() {
// Use alias for stable artifact ID
info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
} else if cfg.Metadata.ManifestDigest != "" {
// Use manifest digest
info = cfg.Metadata.ManifestDigest
} else {
// Fall back to reference
log.Warn("no explicit name/version or manifest digest, deriving artifact ID from reference")
info = cfg.Reference
}
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String())
}
// ID returns the artifact ID.
func (s *ociModelSource) ID() artifact.ID {
return s.id
}
// Describe returns a description of the source.
func (s *ociModelSource) Describe() source.Description {
name := s.config.Reference
version := ""
supplier := ""
if !s.config.Alias.IsEmpty() {
a := s.config.Alias
if a.Name != "" {
name = a.Name
}
if a.Version != "" {
version = a.Version
}
if a.Supplier != "" {
supplier = a.Supplier
}
}
return source.Description{
ID: string(s.id),
Name: name,
Version: version,
Supplier: supplier,
Metadata: s.config.Metadata,
}
}
// FileResolver returns a file resolver for accessing GGUF header files.
func (s *ociModelSource) FileResolver(_ source.Scope) (file.Resolver, error) {
s.mutex.Lock()
defer s.mutex.Unlock()
if s.resolver == nil {
s.resolver = newOCIModelResolver(s.config.TempFiles)
}
return s.resolver, nil
}
// Close cleans up temporary files.
func (s *ociModelSource) Close() error {
s.mutex.Lock()
defer s.mutex.Unlock()
if s.resolver != nil {
if err := s.resolver.cleanup(); err != nil {
log.WithFields("error", err).Warn("failed to cleanup temp files")
return err
}
s.resolver = nil
}
return nil
}
// removeFile removes a file and logs any errors.
func removeFile(path string) error {
return nil // Placeholder for now
}

View File

@ -0,0 +1,76 @@
package ocimodelsource
import (
"context"
"fmt"
"github.com/anchore/stereoscope/pkg/image"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/source"
)
// NewSourceProvider creates a new OCI model artifact source provider.
func NewSourceProvider(reference string, registryOpts *image.RegistryOptions, alias source.Alias) source.Provider {
return &ociModelSourceProvider{
reference: reference,
registryOpts: registryOpts,
alias: alias,
}
}
type ociModelSourceProvider struct {
reference string
registryOpts *image.RegistryOptions
alias source.Alias
}
func (p *ociModelSourceProvider) Name() string {
return "oci-model-artifact"
}
func (p *ociModelSourceProvider) Provide(ctx context.Context) (source.Source, error) {
// Create registry client
client, err := NewRegistryClient(p.registryOpts)
if err != nil {
return nil, fmt.Errorf("failed to create registry client: %w", err)
}
// Check if this is a model artifact (lightweight check)
log.WithFields("reference", p.reference).Debug("checking if reference is a model artifact")
isModel, err := client.IsModelArtifactReference(ctx, p.reference)
if err != nil {
// Log the error but don't fail - let other providers try
log.WithFields("reference", p.reference, "error", err).Debug("failed to check if reference is a model artifact")
return nil, fmt.Errorf("not an OCI model artifact: %w", err)
}
if !isModel {
log.WithFields("reference", p.reference).Debug("reference is not a model artifact")
return nil, fmt.Errorf("not an OCI model artifact")
}
log.WithFields("reference", p.reference).Info("detected OCI model artifact, fetching headers")
// Fetch the full model artifact with metadata
artifact, err := client.FetchModelArtifact(ctx, p.reference)
if err != nil {
return nil, fmt.Errorf("failed to fetch model artifact: %w", err)
}
// Check if there are any GGUF layers
if len(artifact.GGUFLayers) == 0 {
log.WithFields("reference", p.reference).Warn("model artifact has no GGUF layers")
return nil, fmt.Errorf("model artifact has no GGUF layers")
}
log.WithFields("reference", p.reference, "ggufLayers", len(artifact.GGUFLayers)).Info("found GGUF layers in model artifact")
// Create the source
src, err := NewFromArtifact(artifact, client, p.alias)
if err != nil {
return nil, fmt.Errorf("failed to create OCI model source: %w", err)
}
return src, nil
}

View File

@ -0,0 +1,53 @@
package ocimodelsource
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestExtractVirtualPath(t *testing.T) {
tests := []struct {
name string
layerIndex int
annotations map[string]string
expected string
}{
{
name: "with title annotation",
layerIndex: 0,
annotations: map[string]string{"org.opencontainers.image.title": "model.gguf"},
expected: "/model.gguf",
},
{
name: "without title annotation",
layerIndex: 1,
annotations: map[string]string{},
expected: "/model-layer-1.gguf",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := extractVirtualPath(tt.layerIndex, tt.annotations)
assert.Equal(t, tt.expected, result)
})
}
}
func TestCalculateTotalSize(t *testing.T) {
// This is imported from syft/source
// Just a simple test to ensure it works
layers := []struct {
MediaType string
Digest string
Size int64
}{
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:abc", 100},
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:def", 200},
}
// We'd need to convert to source.LayerMetadata to test this properly
// For now, just ensure the package compiles
assert.NotNil(t, layers)
}

View File

@ -0,0 +1,227 @@
package ocimodelsource
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"github.com/google/go-containerregistry/pkg/authn"
"github.com/google/go-containerregistry/pkg/name"
v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/remote"
"github.com/anchore/stereoscope/pkg/image"
)
const (
// Model artifact media types as per Docker's OCI artifacts for AI model packaging
// Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/
ModelConfigMediaType = "application/vnd.docker.ai.model.config.v0.1+json"
GGUFLayerMediaType = "application/vnd.docker.ai.gguf.v3"
// Maximum bytes to fetch via range-GET for GGUF headers
MaxHeaderBytes = 10 * 1024 * 1024 // 10 MB
)
// RegistryClient handles OCI registry interactions for model artifacts.
type RegistryClient struct {
options []remote.Option
}
// NewRegistryClient creates a new registry client with authentication from RegistryOptions.
func NewRegistryClient(registryOpts *image.RegistryOptions) (*RegistryClient, error) {
opts, err := buildRemoteOptions(registryOpts)
if err != nil {
return nil, fmt.Errorf("failed to build remote options: %w", err)
}
return &RegistryClient{
options: opts,
}, nil
}
// buildRemoteOptions converts stereoscope RegistryOptions to go-containerregistry remote.Options.
func buildRemoteOptions(registryOpts *image.RegistryOptions) ([]remote.Option, error) {
var opts []remote.Option
if registryOpts == nil {
return opts, nil
}
// Build authenticator
authenticator := buildAuthenticator(registryOpts)
opts = append(opts, remote.WithAuth(authenticator))
// Handle TLS settings
if registryOpts.InsecureSkipTLSVerify {
transport := remote.DefaultTransport.(*http.Transport).Clone()
transport.TLSClientConfig.InsecureSkipVerify = true
opts = append(opts, remote.WithTransport(transport))
}
// Handle insecure HTTP
if registryOpts.InsecureUseHTTP {
opts = append(opts, remote.WithTransport(http.DefaultTransport))
}
return opts, nil
}
// buildAuthenticator creates an authn.Authenticator from RegistryOptions.
func buildAuthenticator(registryOpts *image.RegistryOptions) authn.Authenticator {
// If credentials are provided, use them
if len(registryOpts.Credentials) > 0 {
// Use the first credential set (we could enhance this to match by authority)
cred := registryOpts.Credentials[0]
if cred.Token != "" {
return &authn.Bearer{Token: cred.Token}
}
if cred.Username != "" || cred.Password != "" {
return &authn.Basic{
Username: cred.Username,
Password: cred.Password,
}
}
}
// Fall back to anonymous authenticator
return authn.Anonymous
}
// ModelArtifact represents a parsed OCI model artifact.
type ModelArtifact struct {
Reference name.Reference
Manifest *v1.Manifest
Config *v1.ConfigFile
RawManifest []byte
RawConfig []byte
ManifestDigest string
GGUFLayers []v1.Descriptor
}
// FetchModelArtifact fetches and parses an OCI model artifact from the registry.
func (c *RegistryClient) FetchModelArtifact(ctx context.Context, refStr string) (*ModelArtifact, error) {
// Parse reference
ref, err := name.ParseReference(refStr)
if err != nil {
return nil, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
}
// Fetch descriptor
desc, err := remote.Get(ref, c.options...)
if err != nil {
return nil, fmt.Errorf("failed to fetch descriptor: %w", err)
}
// Parse manifest
manifest := &v1.Manifest{}
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
return nil, fmt.Errorf("failed to unmarshal manifest: %w", err)
}
// Check if this is a model artifact
if !isModelArtifact(manifest) {
return nil, fmt.Errorf("not a model artifact (config media type: %s)", manifest.Config.MediaType)
}
// Fetch config
img, err := desc.Image()
if err != nil {
return nil, fmt.Errorf("failed to get image: %w", err)
}
configFile, err := img.ConfigFile()
if err != nil {
return nil, fmt.Errorf("failed to get config file: %w", err)
}
rawConfig, err := img.RawConfigFile()
if err != nil {
return nil, fmt.Errorf("failed to get raw config: %w", err)
}
// Extract GGUF layers
ggufLayers := extractGGUFLayers(manifest)
return &ModelArtifact{
Reference: ref,
Manifest: manifest,
Config: configFile,
RawManifest: desc.Manifest,
RawConfig: rawConfig,
ManifestDigest: desc.Digest.String(),
GGUFLayers: ggufLayers,
}, nil
}
// isModelArtifact checks if the manifest represents a model artifact.
func isModelArtifact(manifest *v1.Manifest) bool {
return manifest.Config.MediaType == ModelConfigMediaType
}
// extractGGUFLayers extracts GGUF layer descriptors from the manifest.
func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor {
var ggufLayers []v1.Descriptor
for _, layer := range manifest.Layers {
if string(layer.MediaType) == GGUFLayerMediaType {
ggufLayers = append(ggufLayers, layer)
}
}
return ggufLayers
}
// FetchBlobRange fetches a byte range from a blob in the registry.
// This is used to fetch only the GGUF header without downloading the entire multi-GB file.
func (c *RegistryClient) FetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) {
// Use the remote package's Layer fetching with our options
// Then read only the first maxBytes
repo := ref.Context()
// Fetch the layer (blob) using remote.Layer
layer, err := remote.Layer(repo.Digest(digest.String()), c.options...)
if err != nil {
return nil, fmt.Errorf("failed to fetch layer: %w", err)
}
// Get the compressed reader
reader, err := layer.Compressed()
if err != nil {
return nil, fmt.Errorf("failed to get layer reader: %w", err)
}
defer reader.Close()
// Read up to maxBytes
data := make([]byte, maxBytes)
n, err := io.ReadFull(reader, data)
if err != nil && err != io.ErrUnexpectedEOF {
// ErrUnexpectedEOF is okay - it means the file is smaller than maxBytes
return nil, fmt.Errorf("failed to read layer data: %w", err)
}
return data[:n], nil
}
// IsModelArtifactReference checks if a reference points to a model artifact.
// This is a lightweight check that only fetches the manifest.
func (c *RegistryClient) IsModelArtifactReference(ctx context.Context, refStr string) (bool, error) {
ref, err := name.ParseReference(refStr)
if err != nil {
return false, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
}
desc, err := remote.Get(ref, c.options...)
if err != nil {
return false, fmt.Errorf("failed to fetch descriptor: %w", err)
}
manifest := &v1.Manifest{}
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
return false, fmt.Errorf("failed to unmarshal manifest: %w", err)
}
return isModelArtifact(manifest), nil
}

View File

@ -0,0 +1,211 @@
package ocimodelsource
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/bmatcuk/doublestar/v4"
stereofile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/syft/file"
)
var _ file.Resolver = (*ociModelResolver)(nil)
// ociModelResolver is a minimal file.Resolver implementation that provides access to
// GGUF header data fetched from OCI model artifacts via range-GET requests.
type ociModelResolver struct {
tempFiles map[string]string // maps virtual path -> temporary file path
locations []file.Location
}
// newOCIModelResolver creates a new resolver with the given temporary files.
func newOCIModelResolver(tempFiles map[string]string) *ociModelResolver {
// Create locations for all temp files
locations := make([]file.Location, 0, len(tempFiles))
for virtualPath, tempPath := range tempFiles {
// Use NewVirtualLocation: realPath is tempPath, accessPath is virtualPath
locations = append(locations, file.NewVirtualLocation(tempPath, virtualPath))
}
return &ociModelResolver{
tempFiles: tempFiles,
locations: locations,
}
}
// FileContentsByLocation returns the contents of the file at the given location.
func (r *ociModelResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
// Get the real path (temp file) from the location
realPath := location.RealPath
// Check if this is one of our managed files
found := false
for _, tempPath := range r.tempFiles {
if tempPath == realPath {
found = true
break
}
}
if !found {
return nil, fmt.Errorf("location not found in resolver: %s", location.RealPath)
}
// Open and return the temp file
f, err := os.Open(realPath)
if err != nil {
return nil, fmt.Errorf("failed to open temp file: %w", err)
}
return f, nil
}
// FileMetadataByLocation returns metadata for the file at the given location.
func (r *ociModelResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
realPath := location.RealPath
// Stat the temp file
info, err := os.Stat(realPath)
if err != nil {
return file.Metadata{}, fmt.Errorf("failed to stat temp file: %w", err)
}
// Return basic metadata
return file.Metadata{
Path: location.AccessPath, // Use AccessPath for virtual path
Type: stereofile.TypeRegular,
FileInfo: info,
}, nil
}
// HasPath checks if the given path exists in the resolver.
func (r *ociModelResolver) HasPath(path string) bool {
_, exists := r.tempFiles[path]
return exists
}
// FilesByPath returns locations for files matching the given paths.
func (r *ociModelResolver) FilesByPath(paths ...string) ([]file.Location, error) {
var results []file.Location
for _, path := range paths {
for virtualPath, tempPath := range r.tempFiles {
if virtualPath == path {
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
}
}
}
return results, nil
}
// FilesByGlob returns locations for files matching the given glob patterns.
func (r *ociModelResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
var results []file.Location
for _, pattern := range patterns {
for virtualPath, tempPath := range r.tempFiles {
// Match against the virtual path
matched, err := doublestar.Match(pattern, virtualPath)
if err != nil {
return nil, fmt.Errorf("failed to match pattern %q: %w", pattern, err)
}
if matched {
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
}
}
}
return results, nil
}
// FilesByMIMEType returns locations for files with the given MIME types.
// This is not implemented for OCI model artifacts as we don't have MIME type detection.
func (r *ociModelResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
// Not implemented - OCI model artifacts don't have MIME type detection
return nil, nil
}
// RelativeFileByPath returns a file at the given path relative to the reference location.
// This is not applicable for OCI model artifacts.
func (r *ociModelResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
// Not implemented - no layer hierarchy in OCI model artifacts
return nil
}
// AllLocations returns all file locations in the resolver.
func (r *ociModelResolver) AllLocations(ctx context.Context) <-chan file.Location {
ch := make(chan file.Location)
go func() {
defer close(ch)
for _, loc := range r.locations {
select {
case <-ctx.Done():
return
case ch <- loc:
}
}
}()
return ch
}
// cleanup removes all temporary files managed by this resolver.
func (r *ociModelResolver) cleanup() error {
var errs []error
for virtualPath, tempPath := range r.tempFiles {
if err := os.Remove(tempPath); err != nil {
errs = append(errs, fmt.Errorf("failed to remove temp file for %s: %w", virtualPath, err))
}
}
if len(errs) > 0 {
return fmt.Errorf("cleanup errors: %v", errs)
}
return nil
}
// extractVirtualPath generates a virtual path for a GGUF layer.
// This simulates where the file would be in the artifact.
func extractVirtualPath(layerIndex int, annotations map[string]string) string {
// Check if there's a filename in annotations
if filename, ok := annotations["org.opencontainers.image.title"]; ok {
return "/" + filename
}
// Fall back to generic name based on index
return fmt.Sprintf("/model-layer-%d.gguf", layerIndex)
}
// createTempFileFromData creates a temporary file with the given data.
func createTempFileFromData(data []byte, virtualPath string) (string, error) {
// Extract filename from virtual path for better temp file naming
filename := filepath.Base(virtualPath)
ext := filepath.Ext(filename)
prefix := strings.TrimSuffix(filename, ext) + "-"
// Create temp file
tempFile, err := os.CreateTemp("", prefix+"*"+ext)
if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err)
}
defer tempFile.Close()
// Write data
if _, err := tempFile.Write(data); err != nil {
os.Remove(tempFile.Name())
return "", fmt.Errorf("failed to write to temp file: %w", err)
}
return tempFile.Name(), nil
}

View File

@ -7,6 +7,7 @@ import (
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/source/directorysource" "github.com/anchore/syft/syft/source/directorysource"
"github.com/anchore/syft/syft/source/filesource" "github.com/anchore/syft/syft/source/filesource"
"github.com/anchore/syft/syft/source/ocimodelsource"
"github.com/anchore/syft/syft/source/snapsource" "github.com/anchore/syft/syft/source/snapsource"
"github.com/anchore/syft/syft/source/stereoscopesource" "github.com/anchore/syft/syft/source/stereoscopesource"
) )
@ -16,6 +17,7 @@ const (
DirTag = stereoscope.DirTag DirTag = stereoscope.DirTag
PullTag = stereoscope.PullTag PullTag = stereoscope.PullTag
SnapTag = "snap" SnapTag = "snap"
OCIModelTag = "oci-model"
) )
// All returns all the configured source providers known to syft // All returns all the configured source providers known to syft
@ -40,6 +42,9 @@ func All(userInput string, cfg *Config) []collections.TaggedValue[source.Provide
// 3. try remote sources after everything else... // 3. try remote sources after everything else...
// --from oci-model (model artifacts with header-only fetching)
Join(tagProvider(ocimodelsource.NewSourceProvider(userInput, cfg.RegistryOptions, cfg.Alias), OCIModelTag)).
// --from docker, registry, etc. // --from docker, registry, etc.
Join(stereoscopeProviders.Select(PullTag)...). Join(stereoscopeProviders.Select(PullTag)...).