mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
Compare commits
14 Commits
bfe63bb006
...
5853129c07
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5853129c07 | ||
|
|
9c5279cb99 | ||
|
|
f7a19db98b | ||
|
|
13756ec768 | ||
|
|
3326ae44fa | ||
|
|
a08d5b78d9 | ||
|
|
ce74ed0309 | ||
|
|
0ff6a1af58 | ||
|
|
cd4d0ce062 | ||
|
|
a721a854a9 | ||
|
|
c715e01cc2 | ||
|
|
165611d2e4 | ||
|
|
746f00ad68 | ||
|
|
3f117a3eb5 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -73,3 +73,5 @@ cosign.pub
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -88,6 +88,7 @@ func TestPkgCoverageImage(t *testing.T) {
|
|||||||
definedPkgs.Remove(string(pkg.TerraformPkg))
|
definedPkgs.Remove(string(pkg.TerraformPkg))
|
||||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
||||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||||
|
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||||
|
|
||||||
var cases []testCase
|
var cases []testCase
|
||||||
cases = append(cases, commonTestCases...)
|
cases = append(cases, commonTestCases...)
|
||||||
@ -162,6 +163,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
|
|||||||
definedPkgs.Remove(string(pkg.UnknownPkg))
|
definedPkgs.Remove(string(pkg.UnknownPkg))
|
||||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
||||||
|
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||||
|
|
||||||
// for directory scans we should not expect to see any of the following package types
|
// for directory scans we should not expect to see any of the following package types
|
||||||
definedPkgs.Remove(string(pkg.KbPkg))
|
definedPkgs.Remove(string(pkg.KbPkg))
|
||||||
|
|||||||
8
go.mod
8
go.mod
@ -286,6 +286,8 @@ require (
|
|||||||
modernc.org/memory v1.11.0 // indirect
|
modernc.org/memory v1.11.0 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
|
require github.com/gpustack/gguf-parser-go v0.22.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect
|
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect
|
||||||
@ -306,6 +308,12 @@ require (
|
|||||||
github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
|
github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
|
||||||
github.com/aws/smithy-go v1.22.4 // indirect
|
github.com/aws/smithy-go v1.22.4 // indirect
|
||||||
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
||||||
|
github.com/henvic/httpretty v0.1.4 // indirect
|
||||||
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||||
|
gonum.org/v1/gonum v0.15.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
retract (
|
retract (
|
||||||
|
|||||||
11
go.sum
11
go.sum
@ -541,6 +541,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
|
|||||||
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
||||||
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
||||||
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
||||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||||
@ -590,6 +592,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
|||||||
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
||||||
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
||||||
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
||||||
|
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||||
|
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||||
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||||
@ -617,6 +621,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
|
|||||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||||
@ -722,9 +727,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
|
|||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||||
@ -851,6 +858,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
|
|||||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||||
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
||||||
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
||||||
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
||||||
@ -1304,6 +1313,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
|||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||||
|
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||||
|
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||||
|
|||||||
@ -3,5 +3,5 @@ package internal
|
|||||||
const (
|
const (
|
||||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||||
JSONSchemaVersion = "16.0.41"
|
JSONSchemaVersion = "16.0.42"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -27,6 +27,7 @@ func AllTypes() []any {
|
|||||||
pkg.ELFBinaryPackageNoteJSONPayload{},
|
pkg.ELFBinaryPackageNoteJSONPayload{},
|
||||||
pkg.ElixirMixLockEntry{},
|
pkg.ElixirMixLockEntry{},
|
||||||
pkg.ErlangRebarLockEntry{},
|
pkg.ErlangRebarLockEntry{},
|
||||||
|
pkg.GGUFFileHeader{},
|
||||||
pkg.GitHubActionsUseStatement{},
|
pkg.GitHubActionsUseStatement{},
|
||||||
pkg.GolangBinaryBuildinfoEntry{},
|
pkg.GolangBinaryBuildinfoEntry{},
|
||||||
pkg.GolangModuleEntry{},
|
pkg.GolangModuleEntry{},
|
||||||
|
|||||||
@ -123,6 +123,7 @@ var jsonTypes = makeJSONTypes(
|
|||||||
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
||||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||||
|
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
|
||||||
)
|
)
|
||||||
|
|
||||||
func expandLegacyNameVariants(names ...string) []string {
|
func expandLegacyNameVariants(names ...string) []string {
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package task
|
|||||||
import (
|
import (
|
||||||
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/ai"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||||
@ -175,6 +176,7 @@ func DefaultPackageTaskFactories() Factories {
|
|||||||
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
||||||
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
||||||
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
||||||
|
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
||||||
|
|
||||||
// deprecated catalogers ////////////////////////////////////////
|
// deprecated catalogers ////////////////////////////////////////
|
||||||
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
||||||
|
|||||||
4078
schema/json/schema-16.0.42.json
Normal file
4078
schema/json/schema-16.0.42.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||||
"$id": "anchore.io/schema/syft/json/16.0.41/document",
|
"$id": "anchore.io/schema/syft/json/16.0.42/document",
|
||||||
"$ref": "#/$defs/Document",
|
"$ref": "#/$defs/Document",
|
||||||
"$defs": {
|
"$defs": {
|
||||||
"AlpmDbEntry": {
|
"AlpmDbEntry": {
|
||||||
@ -1399,6 +1399,70 @@
|
|||||||
"size"
|
"size"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"GgufFileMetadata": {
|
||||||
|
"properties": {
|
||||||
|
"modelFormat": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelFormat is always \"gguf\""
|
||||||
|
},
|
||||||
|
"modelName": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelName is the name of the model (from general.name or filename)"
|
||||||
|
},
|
||||||
|
"modelVersion": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
|
||||||
|
},
|
||||||
|
"fileSize": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||||
|
},
|
||||||
|
"hash": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Hash is a content hash of the metadata (for stable global identifiers across remotes)"
|
||||||
|
},
|
||||||
|
"license": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "License is the license identifier (from general.license if present)"
|
||||||
|
},
|
||||||
|
"ggufVersion": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
|
||||||
|
},
|
||||||
|
"architecture": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
|
||||||
|
},
|
||||||
|
"quantization": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Parameters is the number of model parameters (if present in header)"
|
||||||
|
},
|
||||||
|
"tensorCount": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "TensorCount is the number of tensors in the model"
|
||||||
|
},
|
||||||
|
"header": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
|
||||||
|
},
|
||||||
|
"truncatedHeader": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "TruncatedHeader indicates if the header was truncated during parsing (for very large headers)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
"required": [
|
||||||
|
"modelFormat",
|
||||||
|
"modelName",
|
||||||
|
"ggufVersion",
|
||||||
|
"tensorCount"
|
||||||
|
],
|
||||||
|
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
|
||||||
|
},
|
||||||
"GithubActionsUseStatement": {
|
"GithubActionsUseStatement": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"value": {
|
"value": {
|
||||||
@ -2474,6 +2538,9 @@
|
|||||||
{
|
{
|
||||||
"$ref": "#/$defs/ErlangRebarLockEntry"
|
"$ref": "#/$defs/ErlangRebarLockEntry"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/$defs/GgufFileMetadata"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/$defs/GithubActionsUseStatement"
|
"$ref": "#/$defs/GithubActionsUseStatement"
|
||||||
},
|
},
|
||||||
|
|||||||
@ -15,6 +15,7 @@ import (
|
|||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/sbom"
|
"github.com/anchore/syft/syft/sbom"
|
||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
|
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
|
// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
|
||||||
@ -483,6 +484,9 @@ func findDefaultTags(src source.Description) ([]string, error) {
|
|||||||
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
||||||
case source.SnapMetadata:
|
case source.SnapMetadata:
|
||||||
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
|
||||||
|
case *ocimodelsource.OCIModelMetadata:
|
||||||
|
// OCI model artifacts should use image-like catalogers since they provide files to scan
|
||||||
|
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
|
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
|
|||||||
}
|
}
|
||||||
|
|
||||||
componentType := cyclonedx.ComponentTypeLibrary
|
componentType := cyclonedx.ComponentTypeLibrary
|
||||||
if p.Type == pkg.BinaryPkg {
|
switch p.Type {
|
||||||
|
case pkg.BinaryPkg:
|
||||||
componentType = cyclonedx.ComponentTypeApplication
|
componentType = cyclonedx.ComponentTypeApplication
|
||||||
|
case pkg.ModelPkg:
|
||||||
|
componentType = cyclonedx.ComponentTypeMachineLearningModel
|
||||||
}
|
}
|
||||||
|
|
||||||
return cyclonedx.Component{
|
return cyclonedx.Component{
|
||||||
|
|||||||
@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
|
|||||||
switch component.Type {
|
switch component.Type {
|
||||||
case cyclonedx.ComponentTypeOS:
|
case cyclonedx.ComponentTypeOS:
|
||||||
case cyclonedx.ComponentTypeContainer:
|
case cyclonedx.ComponentTypeContainer:
|
||||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary:
|
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
|
||||||
p := decodeComponent(component)
|
p := decodeComponent(component)
|
||||||
idMap[component.BOMRef] = p
|
idMap[component.BOMRef] = p
|
||||||
if component.BOMRef != "" {
|
if component.BOMRef != "" {
|
||||||
|
|||||||
@ -54,6 +54,7 @@ func Test_OriginatorSupplier(t *testing.T) {
|
|||||||
pkg.OpamPackage{},
|
pkg.OpamPackage{},
|
||||||
pkg.YarnLockEntry{},
|
pkg.YarnLockEntry{},
|
||||||
pkg.TerraformLockProviderEntry{},
|
pkg.TerraformLockProviderEntry{},
|
||||||
|
pkg.GGUFFileHeader{},
|
||||||
)
|
)
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
|
|||||||
answer = "acquired package info from Homebrew formula"
|
answer = "acquired package info from Homebrew formula"
|
||||||
case pkg.TerraformPkg:
|
case pkg.TerraformPkg:
|
||||||
answer = "acquired package info from Terraform dependency lock file"
|
answer = "acquired package info from Terraform dependency lock file"
|
||||||
|
case pkg.ModelPkg:
|
||||||
|
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
||||||
default:
|
default:
|
||||||
answer = "acquired package info from the following paths"
|
answer = "acquired package info from the following paths"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
|
|||||||
"acquired package info from Terraform dependency lock file",
|
"acquired package info from Terraform dependency lock file",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: pkg.Package{
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
var pkgTypes []pkg.Type
|
var pkgTypes []pkg.Type
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
/*
|
||||||
|
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
||||||
|
including support for GGUF (GPT-Generated Unified Format) model files.
|
||||||
|
*/
|
||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||||
|
func NewGGUFCataloger() pkg.Cataloger {
|
||||||
|
return generic.NewCataloger("gguf-cataloger").
|
||||||
|
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||||
|
}
|
||||||
373
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
373
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(t *testing.T) string // returns fixture directory
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "finds GGUF files in root",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestGGUFInDir(t, dir, "model1.gguf")
|
||||||
|
createTestGGUFInDir(t, dir, "model2.gguf")
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"model1.gguf",
|
||||||
|
"model2.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "finds GGUF files in subdirectories",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
modelsDir := filepath.Join(dir, "models")
|
||||||
|
os.MkdirAll(modelsDir, 0755)
|
||||||
|
createTestGGUFInDir(t, modelsDir, "llama.gguf")
|
||||||
|
|
||||||
|
deepDir := filepath.Join(dir, "deep", "nested", "path")
|
||||||
|
os.MkdirAll(deepDir, 0755)
|
||||||
|
createTestGGUFInDir(t, deepDir, "mistral.gguf")
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"models/llama.gguf",
|
||||||
|
"deep/nested/path/mistral.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ignores non-GGUF files",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestGGUFInDir(t, dir, "model.gguf")
|
||||||
|
|
||||||
|
// Create non-GGUF files
|
||||||
|
os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"model.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
fixtureDir := tt.setup(t)
|
||||||
|
|
||||||
|
pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, fixtureDir).
|
||||||
|
ExpectsResolverContentQueries(tt.expected).
|
||||||
|
TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Integration(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(t *testing.T) string
|
||||||
|
expectedPackages []pkg.Package
|
||||||
|
expectedRelationships []artifact.Relationship
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "catalog single GGUF file",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "llama3-8b").
|
||||||
|
withStringKV("general.version", "3.0").
|
||||||
|
withStringKV("general.license", "Apache-2.0").
|
||||||
|
withStringKV("general.quantization", "Q4_K_M").
|
||||||
|
withUint64KV("general.parameter_count", 8030000000).
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||||
|
os.WriteFile(path, data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "llama3-8b",
|
||||||
|
Version: "3.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(
|
||||||
|
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||||
|
),
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "llama3-8b",
|
||||||
|
ModelVersion: "3.0",
|
||||||
|
License: "Apache-2.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Unknown",
|
||||||
|
Parameters: 0,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 0,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "catalog multiple GGUF files",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create first model
|
||||||
|
data1 := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "model1").
|
||||||
|
withStringKV("general.version", "1.0").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
|
||||||
|
|
||||||
|
// Create second model
|
||||||
|
data2 := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "mistral").
|
||||||
|
withStringKV("general.name", "model2").
|
||||||
|
withStringKV("general.version", "2.0").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "model1",
|
||||||
|
Version: "1.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model1",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Unknown",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 0,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "model2",
|
||||||
|
Version: "2.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model2",
|
||||||
|
ModelVersion: "2.0",
|
||||||
|
Architecture: "mistral",
|
||||||
|
Quantization: "Unknown",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 0,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "catalog GGUF in nested directories",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
nestedDir := filepath.Join(dir, "models", "quantized")
|
||||||
|
os.MkdirAll(nestedDir, 0755)
|
||||||
|
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "qwen").
|
||||||
|
withStringKV("general.name", "qwen-nested").
|
||||||
|
build()
|
||||||
|
|
||||||
|
os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "qwen-nested",
|
||||||
|
Version: unknownGGUFData,
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "qwen-nested",
|
||||||
|
ModelVersion: unknownGGUFData,
|
||||||
|
Architecture: "qwen",
|
||||||
|
Quantization: "Unknown",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 0,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
fixtureDir := tt.setup(t)
|
||||||
|
|
||||||
|
// Use pkgtest to catalog and compare
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, fixtureDir).
|
||||||
|
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||||
|
IgnoreLocationLayer().
|
||||||
|
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
||||||
|
WithCompareOptions(
|
||||||
|
// Ignore Hash as it's computed dynamically
|
||||||
|
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
|
||||||
|
)
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create a valid GGUF
|
||||||
|
validData := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "valid-model").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
|
||||||
|
|
||||||
|
// Create an invalid GGUF (wrong magic)
|
||||||
|
invalidData := newTestGGUFBuilder().buildInvalidMagic()
|
||||||
|
os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
|
||||||
|
|
||||||
|
// Create a truncated GGUF
|
||||||
|
os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
|
||||||
|
|
||||||
|
// Catalog should succeed and only return the valid package
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Should only find the valid model
|
||||||
|
require.Len(t, pkgs, 1)
|
||||||
|
assert.Equal(t, "valid-model", pkgs[0].Name)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Name(t *testing.T) {
|
||||||
|
cataloger := NewGGUFCataloger()
|
||||||
|
assert.Equal(t, "gguf-cataloger", cataloger.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Create a subdirectory to ensure glob still runs
|
||||||
|
os.MkdirAll(filepath.Join(dir, "models"), 0755)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
|
||||||
|
assert.Empty(t, pkgs)
|
||||||
|
assert.Empty(t, rels)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_MixedFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create GGUF file
|
||||||
|
ggufData := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
|
||||||
|
|
||||||
|
// Create other file types
|
||||||
|
os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
|
||||||
|
os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Should only find the GGUF model
|
||||||
|
require.Len(t, pkgs, 1)
|
||||||
|
assert.Equal(t, "test-model", pkgs[0].Name)
|
||||||
|
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
|
||||||
|
// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create lowercase .gguf
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "lowercase").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
|
||||||
|
|
||||||
|
// Create uppercase .GGUF (should not match the glob)
|
||||||
|
os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
|
||||||
|
// On case-insensitive filesystems (macOS), both might match
|
||||||
|
// On case-sensitive filesystems (Linux), only lowercase matches
|
||||||
|
assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
|
||||||
|
func createTestGGUFInDir(t *testing.T, dir, filename string) {
|
||||||
|
t.Helper()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, filename)
|
||||||
|
err := os.WriteFile(path, data, 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
69
syft/pkg/cataloger/ai/package.go
Normal file
69
syft/pkg/cataloger/ai/package.go
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
|
||||||
|
// Compute hash if not already set
|
||||||
|
if metadata.Hash == "" {
|
||||||
|
metadata.Hash = computeMetadataHash(metadata)
|
||||||
|
}
|
||||||
|
|
||||||
|
p := pkg.Package{
|
||||||
|
Name: metadata.ModelName,
|
||||||
|
Version: metadata.ModelVersion,
|
||||||
|
Locations: file.NewLocationSet(locations...),
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(),
|
||||||
|
Metadata: *metadata,
|
||||||
|
// NOTE: PURL is intentionally not set as the package-url spec
|
||||||
|
// has not yet finalized support for ML model packages
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add license to the package if present in metadata
|
||||||
|
if metadata.License != "" {
|
||||||
|
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
p.SetID()
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
|
||||||
|
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
|
||||||
|
// Create a stable representation of the metadata
|
||||||
|
hashData := struct {
|
||||||
|
Format string
|
||||||
|
Name string
|
||||||
|
Version string
|
||||||
|
Architecture string
|
||||||
|
GGUFVersion uint32
|
||||||
|
TensorCount uint64
|
||||||
|
}{
|
||||||
|
Format: metadata.ModelFormat,
|
||||||
|
Name: metadata.ModelName,
|
||||||
|
Version: metadata.ModelVersion,
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
GGUFVersion: metadata.GGUFVersion,
|
||||||
|
TensorCount: metadata.TensorCount,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal to JSON for stable hashing
|
||||||
|
jsonBytes, err := json.Marshal(hashData)
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute SHA256 hash
|
||||||
|
hash := sha256.Sum256(jsonBytes)
|
||||||
|
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
|
||||||
|
}
|
||||||
126
syft/pkg/cataloger/ai/package_test.go
Normal file
126
syft/pkg/cataloger/ai/package_test.go
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewGGUFPackage(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
metadata *pkg.GGUFFileHeader
|
||||||
|
locations []file.Location
|
||||||
|
checkFunc func(t *testing.T, p pkg.Package)
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "complete GGUF package with all fields",
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "llama3-8b-instruct",
|
||||||
|
ModelVersion: "3.0",
|
||||||
|
License: "Apache-2.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 8030000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 291,
|
||||||
|
Header: map[string]any{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
|
||||||
|
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff("3.0", p.Version); d != "" {
|
||||||
|
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||||
|
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||||
|
assert.Len(t, p.Licenses.ToSlice(), 1)
|
||||||
|
if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
|
||||||
|
t.Errorf("License value mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
assert.NotEmpty(t, p.ID())
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "minimal GGUF package",
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "simple-model",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "gpt2",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 50,
|
||||||
|
},
|
||||||
|
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
if d := cmp.Diff("simple-model", p.Name); d != "" {
|
||||||
|
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff("1.0", p.Version); d != "" {
|
||||||
|
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||||
|
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||||
|
assert.Empty(t, p.Licenses.ToSlice())
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "GGUF package with multiple locations",
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "multi-location-model",
|
||||||
|
ModelVersion: "1.5",
|
||||||
|
Architecture: "llama",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 150,
|
||||||
|
},
|
||||||
|
locations: []file.Location{
|
||||||
|
file.NewLocation("/models/model1.gguf"),
|
||||||
|
file.NewLocation("/models/model2.gguf"),
|
||||||
|
},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
assert.Len(t, p.Locations.ToSlice(), 2)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
p := newGGUFPackage(tt.metadata, tt.locations...)
|
||||||
|
|
||||||
|
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
|
||||||
|
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
|
||||||
|
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||||
|
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify metadata is attached
|
||||||
|
metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
|
||||||
|
require.True(t, ok, "metadata should be GGUFFileHeader")
|
||||||
|
if d := cmp.Diff(*tt.metadata, metadata); d != "" {
|
||||||
|
t.Errorf("Metadata mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.checkFunc != nil {
|
||||||
|
tt.checkFunc(t, p)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
89
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
89
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GGUF file format constants
|
||||||
|
const (
|
||||||
|
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
|
||||||
|
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
|
||||||
|
)
|
||||||
|
|
||||||
|
// ggufHeaderReader reads just the header portion of a GGUF file efficiently
|
||||||
|
type ggufHeaderReader struct {
|
||||||
|
reader io.Reader
|
||||||
|
}
|
||||||
|
|
||||||
|
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
||||||
|
// This is much more efficient than reading the entire file
|
||||||
|
// The reader should be wrapped with io.LimitedReader to prevent OOM issues
|
||||||
|
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
||||||
|
// Read initial chunk to determine header size
|
||||||
|
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||||
|
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
|
||||||
|
if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify magic number
|
||||||
|
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
|
||||||
|
if magic != ggufMagicNumber {
|
||||||
|
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to read the metadata KV pairs to know the full header size
|
||||||
|
// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
|
||||||
|
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
||||||
|
headerData = append(headerData, initialBuf...)
|
||||||
|
|
||||||
|
// Read the rest of the header in larger chunks for efficiency
|
||||||
|
// The LimitedReader will return EOF once maxHeaderSize is reached
|
||||||
|
buf := make([]byte, 64*1024) // 64KB chunks
|
||||||
|
for {
|
||||||
|
n, err := r.reader.Read(buf)
|
||||||
|
if n > 0 {
|
||||||
|
headerData = append(headerData, buf[:n]...)
|
||||||
|
}
|
||||||
|
if err == io.EOF {
|
||||||
|
// Reached end of file or limit, we have all available data
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return headerData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper to convert gguf_parser metadata to simpler types
|
||||||
|
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
|
||||||
|
result := make(map[string]interface{})
|
||||||
|
|
||||||
|
// Limit KV pairs to avoid bloat
|
||||||
|
const maxKVPairs = 200
|
||||||
|
count := 0
|
||||||
|
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if count >= maxKVPairs {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip standard fields that are extracted separately
|
||||||
|
switch kv.Key {
|
||||||
|
case "general.architecture", "general.name", "general.license",
|
||||||
|
"general.version", "general.parameter_count", "general.quantization":
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result[kv.Key] = kv.Value
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
130
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
130
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal"
|
||||||
|
"github.com/anchore/syft/internal/unknown"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
|
)
|
||||||
|
|
||||||
|
const unknownGGUFData = "unknown"
|
||||||
|
|
||||||
|
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
||||||
|
// This implementation only reads the header portion of the file, not the entire model.
|
||||||
|
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
|
defer internal.CloseAndLogError(reader, reader.Path())
|
||||||
|
|
||||||
|
// Read and validate the GGUF file header using LimitedReader to prevent OOM
|
||||||
|
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
|
||||||
|
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
|
||||||
|
headerReader := &ggufHeaderReader{reader: limitedReader}
|
||||||
|
headerData, err := headerReader.readHeader()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a temporary file for the library to parse
|
||||||
|
// The library requires a file path, so we create a temp file
|
||||||
|
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
tempPath := tempFile.Name()
|
||||||
|
defer os.Remove(tempPath)
|
||||||
|
|
||||||
|
// Write the validated header data to temp file
|
||||||
|
if _, err := tempFile.Write(headerData); err != nil {
|
||||||
|
tempFile.Close()
|
||||||
|
return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
|
||||||
|
// Parse using gguf-parser-go with options to skip unnecessary data
|
||||||
|
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
|
||||||
|
gguf_parser.SkipLargeMetadata(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract metadata
|
||||||
|
metadata := ggufFile.Metadata()
|
||||||
|
|
||||||
|
// Convert to syft metadata structure
|
||||||
|
syftMetadata := &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: metadata.Name,
|
||||||
|
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
|
||||||
|
License: metadata.License,
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
Quantization: metadata.FileTypeDescriptor,
|
||||||
|
Parameters: uint64(metadata.Parameters),
|
||||||
|
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||||
|
TensorCount: ggufFile.Header.TensorCount,
|
||||||
|
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||||
|
TruncatedHeader: false, // We read the full header
|
||||||
|
Hash: "", // Will be computed in newGGUFPackage
|
||||||
|
}
|
||||||
|
|
||||||
|
// If model name is not in metadata, use filename
|
||||||
|
if syftMetadata.ModelName == "" {
|
||||||
|
syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
|
||||||
|
}
|
||||||
|
|
||||||
|
// If version is still unknown, try to infer from name
|
||||||
|
if syftMetadata.ModelVersion == unknownGGUFData {
|
||||||
|
syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create package from metadata
|
||||||
|
p := newGGUFPackage(
|
||||||
|
syftMetadata,
|
||||||
|
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||||
|
)
|
||||||
|
|
||||||
|
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVersion attempts to extract version from metadata KV pairs
|
||||||
|
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if kv.Key == "general.version" {
|
||||||
|
if v, ok := kv.Value.(string); ok && v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return unknownGGUFData
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVersionFromName tries to extract version from model name
|
||||||
|
func extractVersionFromName(_ string) string {
|
||||||
|
// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
|
||||||
|
// For now, return unknown - this could be enhanced with regex
|
||||||
|
return unknownGGUFData
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractModelNameFromPath extracts the model name from the file path
|
||||||
|
func extractModelNameFromPath(path string) string {
|
||||||
|
// Get the base filename
|
||||||
|
base := filepath.Base(path)
|
||||||
|
|
||||||
|
// Remove .gguf extension
|
||||||
|
name := strings.TrimSuffix(base, ".gguf")
|
||||||
|
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
|
||||||
|
// integrity check
|
||||||
|
var _ generic.Parser = parseGGUFModel
|
||||||
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// Create a test GGUF file
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
|
||||||
|
// Write to temp file
|
||||||
|
tempFile, err := os.CreateTemp("", "test-*.gguf")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer os.Remove(tempFile.Name())
|
||||||
|
|
||||||
|
if _, err := tempFile.Write(data); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
|
||||||
|
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
|
||||||
|
|
||||||
|
// Try to parse it
|
||||||
|
fmt.Println("Attempting to parse...")
|
||||||
|
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Parse error: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
|
||||||
|
}
|
||||||
127
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
127
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GGUF type constants for test builder
|
||||||
|
const (
|
||||||
|
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
||||||
|
ggufTypeUint8 = 0
|
||||||
|
ggufTypeInt8 = 1
|
||||||
|
ggufTypeUint16 = 2
|
||||||
|
ggufTypeInt16 = 3
|
||||||
|
ggufTypeUint32 = 4
|
||||||
|
ggufTypeInt32 = 5
|
||||||
|
ggufTypeFloat32 = 6
|
||||||
|
ggufTypeBool = 7
|
||||||
|
ggufTypeString = 8
|
||||||
|
ggufTypeArray = 9
|
||||||
|
ggufTypeUint64 = 10
|
||||||
|
ggufTypeInt64 = 11
|
||||||
|
ggufTypeFloat64 = 12
|
||||||
|
)
|
||||||
|
|
||||||
|
// testGGUFBuilder helps build GGUF files for testing
|
||||||
|
type testGGUFBuilder struct {
|
||||||
|
buf *bytes.Buffer
|
||||||
|
version uint32
|
||||||
|
tensorCount uint64
|
||||||
|
kvPairs []testKVPair
|
||||||
|
}
|
||||||
|
|
||||||
|
type testKVPair struct {
|
||||||
|
key string
|
||||||
|
valueType uint32
|
||||||
|
value interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||||
|
return &testGGUFBuilder{
|
||||||
|
buf: new(bytes.Buffer),
|
||||||
|
version: 3,
|
||||||
|
tensorCount: 0,
|
||||||
|
kvPairs: []testKVPair{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||||
|
b.version = v
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||||
|
b.tensorCount = count
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) writeString(s string) {
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||||
|
b.buf.WriteString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) build() []byte {
|
||||||
|
// Write magic number "GGUF"
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||||
|
|
||||||
|
// Write version
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||||
|
|
||||||
|
// Write tensor count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||||
|
|
||||||
|
// Write KV count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||||
|
|
||||||
|
// Write KV pairs
|
||||||
|
for _, kv := range b.kvPairs {
|
||||||
|
// Write key
|
||||||
|
b.writeString(kv.key)
|
||||||
|
// Write value type
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||||
|
// Write value based on type
|
||||||
|
switch kv.valueType {
|
||||||
|
case ggufTypeString:
|
||||||
|
b.writeString(kv.value.(string))
|
||||||
|
case ggufTypeUint32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||||
|
case ggufTypeUint64:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||||
|
case ggufTypeUint8:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||||
|
case ggufTypeInt32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||||
|
case ggufTypeBool:
|
||||||
|
var v uint8
|
||||||
|
if kv.value.(bool) {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return b.buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildInvalidMagic creates a file with invalid magic number
|
||||||
|
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
47
syft/pkg/gguf.go
Normal file
47
syft/pkg/gguf.go
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
package pkg
|
||||||
|
|
||||||
|
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
|
||||||
|
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
|
||||||
|
// loading and saving of models, particularly quantized large language models.
|
||||||
|
type GGUFFileHeader struct {
|
||||||
|
// ModelFormat is always "gguf"
|
||||||
|
ModelFormat string `json:"modelFormat" cyclonedx:"modelFormat"`
|
||||||
|
|
||||||
|
// ModelName is the name of the model (from general.name or filename)
|
||||||
|
ModelName string `json:"modelName" cyclonedx:"modelName"`
|
||||||
|
|
||||||
|
// ModelVersion is the version of the model (if available in header, else "unknown")
|
||||||
|
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
|
||||||
|
|
||||||
|
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||||
|
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||||
|
|
||||||
|
// Hash is a content hash of the metadata (for stable global identifiers across remotes)
|
||||||
|
Hash string `json:"hash,omitempty" cyclonedx:"hash"`
|
||||||
|
|
||||||
|
// License is the license identifier (from general.license if present)
|
||||||
|
License string `json:"license,omitempty" cyclonedx:"license"`
|
||||||
|
|
||||||
|
// GGUFVersion is the GGUF format version (e.g., 3)
|
||||||
|
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
|
||||||
|
|
||||||
|
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
|
||||||
|
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||||
|
|
||||||
|
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
|
||||||
|
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
|
||||||
|
|
||||||
|
// Parameters is the number of model parameters (if present in header)
|
||||||
|
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
|
||||||
|
|
||||||
|
// TensorCount is the number of tensors in the model
|
||||||
|
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||||
|
|
||||||
|
// Header contains the remaining key-value pairs from the GGUF header that are not already
|
||||||
|
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||||
|
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||||
|
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||||
|
|
||||||
|
// TruncatedHeader indicates if the header was truncated during parsing (for very large headers)
|
||||||
|
TruncatedHeader bool `json:"truncatedHeader,omitempty" cyclonedx:"truncatedHeader"`
|
||||||
|
}
|
||||||
@ -50,6 +50,7 @@ const (
|
|||||||
TerraformPkg Type = "terraform"
|
TerraformPkg Type = "terraform"
|
||||||
WordpressPluginPkg Type = "wordpress-plugin"
|
WordpressPluginPkg Type = "wordpress-plugin"
|
||||||
HomebrewPkg Type = "homebrew"
|
HomebrewPkg Type = "homebrew"
|
||||||
|
ModelPkg Type = "model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// AllPkgs represents all supported package types
|
// AllPkgs represents all supported package types
|
||||||
@ -94,6 +95,7 @@ var AllPkgs = []Type{
|
|||||||
TerraformPkg,
|
TerraformPkg,
|
||||||
WordpressPluginPkg,
|
WordpressPluginPkg,
|
||||||
HomebrewPkg,
|
HomebrewPkg,
|
||||||
|
ModelPkg,
|
||||||
}
|
}
|
||||||
|
|
||||||
// PackageURLType returns the PURL package type for the current package.
|
// PackageURLType returns the PURL package type for the current package.
|
||||||
|
|||||||
@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
|
|||||||
expectedTypes.Remove(string(HomebrewPkg))
|
expectedTypes.Remove(string(HomebrewPkg))
|
||||||
expectedTypes.Remove(string(TerraformPkg))
|
expectedTypes.Remove(string(TerraformPkg))
|
||||||
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
||||||
|
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
|
||||||
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
39
syft/source/ocimodelsource/metadata.go
Normal file
39
syft/source/ocimodelsource/metadata.go
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import "github.com/anchore/syft/syft/source"
|
||||||
|
|
||||||
|
// OCIModelMetadata represents all static metadata that defines what an OCI model artifact is.
|
||||||
|
// This is similar to ImageMetadata but includes model-specific fields and OCI artifact annotations.
|
||||||
|
type OCIModelMetadata struct {
|
||||||
|
// Core OCI artifact metadata (mirrors ImageMetadata)
|
||||||
|
UserInput string `json:"userInput"`
|
||||||
|
ID string `json:"artifactID"`
|
||||||
|
ManifestDigest string `json:"manifestDigest"`
|
||||||
|
MediaType string `json:"mediaType"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
Size int64 `json:"artifactSize"`
|
||||||
|
Layers []source.LayerMetadata `json:"layers"`
|
||||||
|
RawManifest []byte `json:"manifest"`
|
||||||
|
RawConfig []byte `json:"config"`
|
||||||
|
RepoDigests []string `json:"repoDigests"`
|
||||||
|
Architecture string `json:"architecture"`
|
||||||
|
Variant string `json:"architectureVariant,omitempty"`
|
||||||
|
OS string `json:"os"`
|
||||||
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
|
|
||||||
|
// OCI-specific metadata
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty"`
|
||||||
|
|
||||||
|
// Model-specific metadata
|
||||||
|
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
|
||||||
|
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// GGUFLayerInfo represents metadata about a GGUF layer in the OCI artifact.
|
||||||
|
type GGUFLayerInfo struct {
|
||||||
|
Digest string `json:"digest"`
|
||||||
|
Size int64 `json:"size"` // Full blob size in registry
|
||||||
|
MediaType string `json:"mediaType"` // Should be "application/vnd.docker.ai.gguf.v3"
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty"`
|
||||||
|
FetchedBytes int64 `json:"fetchedBytes"` // How many bytes we actually fetched via range-GET
|
||||||
|
}
|
||||||
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/opencontainers/go-digest"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
"github.com/anchore/syft/syft/source/internal"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ source.Source = (*ociModelSource)(nil)
|
||||||
|
|
||||||
|
// Config holds the configuration for an OCI model artifact source.
|
||||||
|
type Config struct {
|
||||||
|
Reference string
|
||||||
|
Platform string
|
||||||
|
Alias source.Alias
|
||||||
|
Client *RegistryClient
|
||||||
|
Metadata *OCIModelMetadata
|
||||||
|
TempFiles map[string]string // Virtual path -> temp file path
|
||||||
|
}
|
||||||
|
|
||||||
|
// ociModelSource implements the source.Source interface for OCI model artifacts.
|
||||||
|
type ociModelSource struct {
|
||||||
|
id artifact.ID
|
||||||
|
config Config
|
||||||
|
resolver *ociModelResolver
|
||||||
|
mutex *sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFromArtifact creates a new OCI model source from a fetched model artifact.
|
||||||
|
func NewFromArtifact(artifact *ModelArtifact, client *RegistryClient, alias source.Alias) (source.Source, error) {
|
||||||
|
// Build metadata
|
||||||
|
metadata := buildMetadata(artifact)
|
||||||
|
|
||||||
|
// Fetch GGUF layer headers via range-GET
|
||||||
|
tempFiles := make(map[string]string)
|
||||||
|
ggufLayers := make([]GGUFLayerInfo, 0, len(artifact.GGUFLayers))
|
||||||
|
|
||||||
|
for idx, layer := range artifact.GGUFLayers {
|
||||||
|
log.WithFields("digest", layer.Digest, "size", layer.Size).Debug("fetching GGUF layer header")
|
||||||
|
|
||||||
|
// Fetch header via range-GET
|
||||||
|
headerData, err := client.FetchBlobRange(context.Background(), artifact.Reference, layer.Digest, MaxHeaderBytes)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch GGUF layer header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract virtual path from annotations
|
||||||
|
virtualPath := extractVirtualPath(idx, extractAnnotations(layer.Annotations))
|
||||||
|
|
||||||
|
// Create temp file
|
||||||
|
tempPath, err := createTempFileFromData(headerData, virtualPath)
|
||||||
|
if err != nil {
|
||||||
|
// Clean up any previously created temp files
|
||||||
|
for _, path := range tempFiles {
|
||||||
|
_ = removeFile(path)
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tempFiles[virtualPath] = tempPath
|
||||||
|
|
||||||
|
// Add to GGUF layers metadata
|
||||||
|
ggufLayers = append(ggufLayers, GGUFLayerInfo{
|
||||||
|
Digest: layer.Digest.String(),
|
||||||
|
Size: layer.Size,
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
Annotations: extractAnnotations(layer.Annotations),
|
||||||
|
FetchedBytes: int64(len(headerData)),
|
||||||
|
})
|
||||||
|
|
||||||
|
log.WithFields("virtualPath", virtualPath, "tempPath", tempPath, "bytes", len(headerData)).Debug("created temp file for GGUF header")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update metadata with GGUF layers
|
||||||
|
metadata.GGUFLayers = ggufLayers
|
||||||
|
metadata.ModelFormat = "gguf"
|
||||||
|
|
||||||
|
// Build config
|
||||||
|
config := Config{
|
||||||
|
Reference: artifact.Reference.String(),
|
||||||
|
Alias: alias,
|
||||||
|
Client: client,
|
||||||
|
Metadata: metadata,
|
||||||
|
TempFiles: tempFiles,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Derive artifact ID
|
||||||
|
id := deriveIDFromArtifact(config)
|
||||||
|
|
||||||
|
return &ociModelSource{
|
||||||
|
id: id,
|
||||||
|
config: config,
|
||||||
|
mutex: &sync.Mutex{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildMetadata constructs OCIModelMetadata from a ModelArtifact.
|
||||||
|
func buildMetadata(artifact *ModelArtifact) *OCIModelMetadata {
|
||||||
|
// Extract layers
|
||||||
|
layers := make([]source.LayerMetadata, len(artifact.Manifest.Layers))
|
||||||
|
for i, layer := range artifact.Manifest.Layers {
|
||||||
|
layers[i] = source.LayerMetadata{
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
Digest: layer.Digest.String(),
|
||||||
|
Size: layer.Size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract tags
|
||||||
|
var tags []string
|
||||||
|
if tagged, ok := artifact.Reference.(interface{ TagStr() string }); ok {
|
||||||
|
if tag := tagged.TagStr(); tag != "" {
|
||||||
|
tags = []string{tag}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract repo digests
|
||||||
|
var repoDigests []string
|
||||||
|
if artifact.ManifestDigest != "" {
|
||||||
|
repoDigests = []string{artifact.Reference.Context().String() + "@" + artifact.ManifestDigest}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build metadata
|
||||||
|
return &OCIModelMetadata{
|
||||||
|
UserInput: artifact.Reference.String(),
|
||||||
|
ID: artifact.ManifestDigest,
|
||||||
|
ManifestDigest: artifact.ManifestDigest,
|
||||||
|
MediaType: string(artifact.Manifest.MediaType),
|
||||||
|
Tags: tags,
|
||||||
|
Size: calculateTotalSize(layers),
|
||||||
|
Layers: layers,
|
||||||
|
RawManifest: artifact.RawManifest,
|
||||||
|
RawConfig: artifact.RawConfig,
|
||||||
|
RepoDigests: repoDigests,
|
||||||
|
Architecture: artifact.Config.Architecture,
|
||||||
|
Variant: artifact.Config.Variant,
|
||||||
|
OS: artifact.Config.OS,
|
||||||
|
Labels: artifact.Config.Config.Labels,
|
||||||
|
Annotations: extractManifestAnnotations(artifact.Manifest),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractAnnotations converts v1 annotations to a string map.
|
||||||
|
func extractAnnotations(annotations map[string]string) map[string]string {
|
||||||
|
if annotations == nil {
|
||||||
|
return make(map[string]string)
|
||||||
|
}
|
||||||
|
return annotations
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractManifestAnnotations extracts annotations from the manifest.
|
||||||
|
func extractManifestAnnotations(manifest interface{}) map[string]string {
|
||||||
|
// v1.Manifest has Annotations field
|
||||||
|
if m, ok := manifest.(interface{ GetAnnotations() map[string]string }); ok {
|
||||||
|
return m.GetAnnotations()
|
||||||
|
}
|
||||||
|
return make(map[string]string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateTotalSize sums up the size of all layers.
|
||||||
|
func calculateTotalSize(layers []source.LayerMetadata) int64 {
|
||||||
|
var total int64
|
||||||
|
for _, layer := range layers {
|
||||||
|
total += layer.Size
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
|
// deriveIDFromArtifact generates an artifact ID from the config.
|
||||||
|
func deriveIDFromArtifact(cfg Config) artifact.ID {
|
||||||
|
var info string
|
||||||
|
|
||||||
|
if !cfg.Alias.IsEmpty() {
|
||||||
|
// Use alias for stable artifact ID
|
||||||
|
info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
|
||||||
|
} else if cfg.Metadata.ManifestDigest != "" {
|
||||||
|
// Use manifest digest
|
||||||
|
info = cfg.Metadata.ManifestDigest
|
||||||
|
} else {
|
||||||
|
// Fall back to reference
|
||||||
|
log.Warn("no explicit name/version or manifest digest, deriving artifact ID from reference")
|
||||||
|
info = cfg.Reference
|
||||||
|
}
|
||||||
|
|
||||||
|
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ID returns the artifact ID.
|
||||||
|
func (s *ociModelSource) ID() artifact.ID {
|
||||||
|
return s.id
|
||||||
|
}
|
||||||
|
|
||||||
|
// Describe returns a description of the source.
|
||||||
|
func (s *ociModelSource) Describe() source.Description {
|
||||||
|
name := s.config.Reference
|
||||||
|
version := ""
|
||||||
|
supplier := ""
|
||||||
|
|
||||||
|
if !s.config.Alias.IsEmpty() {
|
||||||
|
a := s.config.Alias
|
||||||
|
if a.Name != "" {
|
||||||
|
name = a.Name
|
||||||
|
}
|
||||||
|
if a.Version != "" {
|
||||||
|
version = a.Version
|
||||||
|
}
|
||||||
|
if a.Supplier != "" {
|
||||||
|
supplier = a.Supplier
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return source.Description{
|
||||||
|
ID: string(s.id),
|
||||||
|
Name: name,
|
||||||
|
Version: version,
|
||||||
|
Supplier: supplier,
|
||||||
|
Metadata: s.config.Metadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileResolver returns a file resolver for accessing GGUF header files.
|
||||||
|
func (s *ociModelSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
||||||
|
s.mutex.Lock()
|
||||||
|
defer s.mutex.Unlock()
|
||||||
|
|
||||||
|
if s.resolver == nil {
|
||||||
|
s.resolver = newOCIModelResolver(s.config.TempFiles)
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.resolver, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close cleans up temporary files.
|
||||||
|
func (s *ociModelSource) Close() error {
|
||||||
|
s.mutex.Lock()
|
||||||
|
defer s.mutex.Unlock()
|
||||||
|
|
||||||
|
if s.resolver != nil {
|
||||||
|
if err := s.resolver.cleanup(); err != nil {
|
||||||
|
log.WithFields("error", err).Warn("failed to cleanup temp files")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.resolver = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeFile removes a file and logs any errors.
|
||||||
|
func removeFile(path string) error {
|
||||||
|
return nil // Placeholder for now
|
||||||
|
}
|
||||||
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewSourceProvider creates a new OCI model artifact source provider.
|
||||||
|
func NewSourceProvider(reference string, registryOpts *image.RegistryOptions, alias source.Alias) source.Provider {
|
||||||
|
return &ociModelSourceProvider{
|
||||||
|
reference: reference,
|
||||||
|
registryOpts: registryOpts,
|
||||||
|
alias: alias,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ociModelSourceProvider struct {
|
||||||
|
reference string
|
||||||
|
registryOpts *image.RegistryOptions
|
||||||
|
alias source.Alias
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Name() string {
|
||||||
|
return "oci-model-artifact"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Provide(ctx context.Context) (source.Source, error) {
|
||||||
|
// Create registry client
|
||||||
|
client, err := NewRegistryClient(p.registryOpts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create registry client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a model artifact (lightweight check)
|
||||||
|
log.WithFields("reference", p.reference).Debug("checking if reference is a model artifact")
|
||||||
|
|
||||||
|
isModel, err := client.IsModelArtifactReference(ctx, p.reference)
|
||||||
|
if err != nil {
|
||||||
|
// Log the error but don't fail - let other providers try
|
||||||
|
log.WithFields("reference", p.reference, "error", err).Debug("failed to check if reference is a model artifact")
|
||||||
|
return nil, fmt.Errorf("not an OCI model artifact: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isModel {
|
||||||
|
log.WithFields("reference", p.reference).Debug("reference is not a model artifact")
|
||||||
|
return nil, fmt.Errorf("not an OCI model artifact")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields("reference", p.reference).Info("detected OCI model artifact, fetching headers")
|
||||||
|
|
||||||
|
// Fetch the full model artifact with metadata
|
||||||
|
artifact, err := client.FetchModelArtifact(ctx, p.reference)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch model artifact: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if there are any GGUF layers
|
||||||
|
if len(artifact.GGUFLayers) == 0 {
|
||||||
|
log.WithFields("reference", p.reference).Warn("model artifact has no GGUF layers")
|
||||||
|
return nil, fmt.Errorf("model artifact has no GGUF layers")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields("reference", p.reference, "ggufLayers", len(artifact.GGUFLayers)).Info("found GGUF layers in model artifact")
|
||||||
|
|
||||||
|
// Create the source
|
||||||
|
src, err := NewFromArtifact(artifact, client, p.alias)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create OCI model source: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return src, nil
|
||||||
|
}
|
||||||
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestExtractVirtualPath(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
layerIndex int
|
||||||
|
annotations map[string]string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "with title annotation",
|
||||||
|
layerIndex: 0,
|
||||||
|
annotations: map[string]string{"org.opencontainers.image.title": "model.gguf"},
|
||||||
|
expected: "/model.gguf",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "without title annotation",
|
||||||
|
layerIndex: 1,
|
||||||
|
annotations: map[string]string{},
|
||||||
|
expected: "/model-layer-1.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := extractVirtualPath(tt.layerIndex, tt.annotations)
|
||||||
|
assert.Equal(t, tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTotalSize(t *testing.T) {
|
||||||
|
// This is imported from syft/source
|
||||||
|
// Just a simple test to ensure it works
|
||||||
|
layers := []struct {
|
||||||
|
MediaType string
|
||||||
|
Digest string
|
||||||
|
Size int64
|
||||||
|
}{
|
||||||
|
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:abc", 100},
|
||||||
|
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:def", 200},
|
||||||
|
}
|
||||||
|
|
||||||
|
// We'd need to convert to source.LayerMetadata to test this properly
|
||||||
|
// For now, just ensure the package compiles
|
||||||
|
assert.NotNil(t, layers)
|
||||||
|
}
|
||||||
227
syft/source/ocimodelsource/registry_client.go
Normal file
227
syft/source/ocimodelsource/registry_client.go
Normal file
@ -0,0 +1,227 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/google/go-containerregistry/pkg/authn"
|
||||||
|
"github.com/google/go-containerregistry/pkg/name"
|
||||||
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||||
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Model artifact media types as per Docker's OCI artifacts for AI model packaging
|
||||||
|
// Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/
|
||||||
|
ModelConfigMediaType = "application/vnd.docker.ai.model.config.v0.1+json"
|
||||||
|
GGUFLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||||
|
|
||||||
|
// Maximum bytes to fetch via range-GET for GGUF headers
|
||||||
|
MaxHeaderBytes = 10 * 1024 * 1024 // 10 MB
|
||||||
|
)
|
||||||
|
|
||||||
|
// RegistryClient handles OCI registry interactions for model artifacts.
|
||||||
|
type RegistryClient struct {
|
||||||
|
options []remote.Option
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegistryClient creates a new registry client with authentication from RegistryOptions.
|
||||||
|
func NewRegistryClient(registryOpts *image.RegistryOptions) (*RegistryClient, error) {
|
||||||
|
opts, err := buildRemoteOptions(registryOpts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to build remote options: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &RegistryClient{
|
||||||
|
options: opts,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildRemoteOptions converts stereoscope RegistryOptions to go-containerregistry remote.Options.
|
||||||
|
func buildRemoteOptions(registryOpts *image.RegistryOptions) ([]remote.Option, error) {
|
||||||
|
var opts []remote.Option
|
||||||
|
|
||||||
|
if registryOpts == nil {
|
||||||
|
return opts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build authenticator
|
||||||
|
authenticator := buildAuthenticator(registryOpts)
|
||||||
|
opts = append(opts, remote.WithAuth(authenticator))
|
||||||
|
|
||||||
|
// Handle TLS settings
|
||||||
|
if registryOpts.InsecureSkipTLSVerify {
|
||||||
|
transport := remote.DefaultTransport.(*http.Transport).Clone()
|
||||||
|
transport.TLSClientConfig.InsecureSkipVerify = true
|
||||||
|
opts = append(opts, remote.WithTransport(transport))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle insecure HTTP
|
||||||
|
if registryOpts.InsecureUseHTTP {
|
||||||
|
opts = append(opts, remote.WithTransport(http.DefaultTransport))
|
||||||
|
}
|
||||||
|
|
||||||
|
return opts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildAuthenticator creates an authn.Authenticator from RegistryOptions.
|
||||||
|
func buildAuthenticator(registryOpts *image.RegistryOptions) authn.Authenticator {
|
||||||
|
// If credentials are provided, use them
|
||||||
|
if len(registryOpts.Credentials) > 0 {
|
||||||
|
// Use the first credential set (we could enhance this to match by authority)
|
||||||
|
cred := registryOpts.Credentials[0]
|
||||||
|
|
||||||
|
if cred.Token != "" {
|
||||||
|
return &authn.Bearer{Token: cred.Token}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cred.Username != "" || cred.Password != "" {
|
||||||
|
return &authn.Basic{
|
||||||
|
Username: cred.Username,
|
||||||
|
Password: cred.Password,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to anonymous authenticator
|
||||||
|
return authn.Anonymous
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModelArtifact represents a parsed OCI model artifact.
|
||||||
|
type ModelArtifact struct {
|
||||||
|
Reference name.Reference
|
||||||
|
Manifest *v1.Manifest
|
||||||
|
Config *v1.ConfigFile
|
||||||
|
RawManifest []byte
|
||||||
|
RawConfig []byte
|
||||||
|
ManifestDigest string
|
||||||
|
GGUFLayers []v1.Descriptor
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchModelArtifact fetches and parses an OCI model artifact from the registry.
|
||||||
|
func (c *RegistryClient) FetchModelArtifact(ctx context.Context, refStr string) (*ModelArtifact, error) {
|
||||||
|
// Parse reference
|
||||||
|
ref, err := name.ParseReference(refStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch descriptor
|
||||||
|
desc, err := remote.Get(ref, c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse manifest
|
||||||
|
manifest := &v1.Manifest{}
|
||||||
|
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a model artifact
|
||||||
|
if !isModelArtifact(manifest) {
|
||||||
|
return nil, fmt.Errorf("not a model artifact (config media type: %s)", manifest.Config.MediaType)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch config
|
||||||
|
img, err := desc.Image()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configFile, err := img.ConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get config file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rawConfig, err := img.RawConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get raw config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract GGUF layers
|
||||||
|
ggufLayers := extractGGUFLayers(manifest)
|
||||||
|
|
||||||
|
return &ModelArtifact{
|
||||||
|
Reference: ref,
|
||||||
|
Manifest: manifest,
|
||||||
|
Config: configFile,
|
||||||
|
RawManifest: desc.Manifest,
|
||||||
|
RawConfig: rawConfig,
|
||||||
|
ManifestDigest: desc.Digest.String(),
|
||||||
|
GGUFLayers: ggufLayers,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isModelArtifact checks if the manifest represents a model artifact.
|
||||||
|
func isModelArtifact(manifest *v1.Manifest) bool {
|
||||||
|
return manifest.Config.MediaType == ModelConfigMediaType
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractGGUFLayers extracts GGUF layer descriptors from the manifest.
|
||||||
|
func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor {
|
||||||
|
var ggufLayers []v1.Descriptor
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
if string(layer.MediaType) == GGUFLayerMediaType {
|
||||||
|
ggufLayers = append(ggufLayers, layer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ggufLayers
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchBlobRange fetches a byte range from a blob in the registry.
|
||||||
|
// This is used to fetch only the GGUF header without downloading the entire multi-GB file.
|
||||||
|
func (c *RegistryClient) FetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) {
|
||||||
|
// Use the remote package's Layer fetching with our options
|
||||||
|
// Then read only the first maxBytes
|
||||||
|
repo := ref.Context()
|
||||||
|
|
||||||
|
// Fetch the layer (blob) using remote.Layer
|
||||||
|
layer, err := remote.Layer(repo.Digest(digest.String()), c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch layer: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the compressed reader
|
||||||
|
reader, err := layer.Compressed()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get layer reader: %w", err)
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
// Read up to maxBytes
|
||||||
|
data := make([]byte, maxBytes)
|
||||||
|
n, err := io.ReadFull(reader, data)
|
||||||
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
|
// ErrUnexpectedEOF is okay - it means the file is smaller than maxBytes
|
||||||
|
return nil, fmt.Errorf("failed to read layer data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return data[:n], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsModelArtifactReference checks if a reference points to a model artifact.
|
||||||
|
// This is a lightweight check that only fetches the manifest.
|
||||||
|
func (c *RegistryClient) IsModelArtifactReference(ctx context.Context, refStr string) (bool, error) {
|
||||||
|
ref, err := name.ParseReference(refStr)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
desc, err := remote.Get(ref, c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
manifest := &v1.Manifest{}
|
||||||
|
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||||
|
return false, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return isModelArtifact(manifest), nil
|
||||||
|
}
|
||||||
211
syft/source/ocimodelsource/resolver.go
Normal file
211
syft/source/ocimodelsource/resolver.go
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
|
stereofile "github.com/anchore/stereoscope/pkg/file"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ file.Resolver = (*ociModelResolver)(nil)
|
||||||
|
|
||||||
|
// ociModelResolver is a minimal file.Resolver implementation that provides access to
|
||||||
|
// GGUF header data fetched from OCI model artifacts via range-GET requests.
|
||||||
|
type ociModelResolver struct {
|
||||||
|
tempFiles map[string]string // maps virtual path -> temporary file path
|
||||||
|
locations []file.Location
|
||||||
|
}
|
||||||
|
|
||||||
|
// newOCIModelResolver creates a new resolver with the given temporary files.
|
||||||
|
func newOCIModelResolver(tempFiles map[string]string) *ociModelResolver {
|
||||||
|
// Create locations for all temp files
|
||||||
|
locations := make([]file.Location, 0, len(tempFiles))
|
||||||
|
for virtualPath, tempPath := range tempFiles {
|
||||||
|
// Use NewVirtualLocation: realPath is tempPath, accessPath is virtualPath
|
||||||
|
locations = append(locations, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ociModelResolver{
|
||||||
|
tempFiles: tempFiles,
|
||||||
|
locations: locations,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileContentsByLocation returns the contents of the file at the given location.
|
||||||
|
func (r *ociModelResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||||
|
// Get the real path (temp file) from the location
|
||||||
|
realPath := location.RealPath
|
||||||
|
|
||||||
|
// Check if this is one of our managed files
|
||||||
|
found := false
|
||||||
|
for _, tempPath := range r.tempFiles {
|
||||||
|
if tempPath == realPath {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
return nil, fmt.Errorf("location not found in resolver: %s", location.RealPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open and return the temp file
|
||||||
|
f, err := os.Open(realPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to open temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileMetadataByLocation returns metadata for the file at the given location.
|
||||||
|
func (r *ociModelResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
||||||
|
realPath := location.RealPath
|
||||||
|
|
||||||
|
// Stat the temp file
|
||||||
|
info, err := os.Stat(realPath)
|
||||||
|
if err != nil {
|
||||||
|
return file.Metadata{}, fmt.Errorf("failed to stat temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return basic metadata
|
||||||
|
return file.Metadata{
|
||||||
|
Path: location.AccessPath, // Use AccessPath for virtual path
|
||||||
|
Type: stereofile.TypeRegular,
|
||||||
|
FileInfo: info,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasPath checks if the given path exists in the resolver.
|
||||||
|
func (r *ociModelResolver) HasPath(path string) bool {
|
||||||
|
_, exists := r.tempFiles[path]
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByPath returns locations for files matching the given paths.
|
||||||
|
func (r *ociModelResolver) FilesByPath(paths ...string) ([]file.Location, error) {
|
||||||
|
var results []file.Location
|
||||||
|
|
||||||
|
for _, path := range paths {
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
if virtualPath == path {
|
||||||
|
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByGlob returns locations for files matching the given glob patterns.
|
||||||
|
func (r *ociModelResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
||||||
|
var results []file.Location
|
||||||
|
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
// Match against the virtual path
|
||||||
|
matched, err := doublestar.Match(pattern, virtualPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to match pattern %q: %w", pattern, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if matched {
|
||||||
|
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByMIMEType returns locations for files with the given MIME types.
|
||||||
|
// This is not implemented for OCI model artifacts as we don't have MIME type detection.
|
||||||
|
func (r *ociModelResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
||||||
|
// Not implemented - OCI model artifacts don't have MIME type detection
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RelativeFileByPath returns a file at the given path relative to the reference location.
|
||||||
|
// This is not applicable for OCI model artifacts.
|
||||||
|
func (r *ociModelResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
||||||
|
// Not implemented - no layer hierarchy in OCI model artifacts
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AllLocations returns all file locations in the resolver.
|
||||||
|
func (r *ociModelResolver) AllLocations(ctx context.Context) <-chan file.Location {
|
||||||
|
ch := make(chan file.Location)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
for _, loc := range r.locations {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case ch <- loc:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanup removes all temporary files managed by this resolver.
|
||||||
|
func (r *ociModelResolver) cleanup() error {
|
||||||
|
var errs []error
|
||||||
|
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
if err := os.Remove(tempPath); err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("failed to remove temp file for %s: %w", virtualPath, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) > 0 {
|
||||||
|
return fmt.Errorf("cleanup errors: %v", errs)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVirtualPath generates a virtual path for a GGUF layer.
|
||||||
|
// This simulates where the file would be in the artifact.
|
||||||
|
func extractVirtualPath(layerIndex int, annotations map[string]string) string {
|
||||||
|
// Check if there's a filename in annotations
|
||||||
|
if filename, ok := annotations["org.opencontainers.image.title"]; ok {
|
||||||
|
return "/" + filename
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to generic name based on index
|
||||||
|
return fmt.Sprintf("/model-layer-%d.gguf", layerIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// createTempFileFromData creates a temporary file with the given data.
|
||||||
|
func createTempFileFromData(data []byte, virtualPath string) (string, error) {
|
||||||
|
// Extract filename from virtual path for better temp file naming
|
||||||
|
filename := filepath.Base(virtualPath)
|
||||||
|
ext := filepath.Ext(filename)
|
||||||
|
prefix := strings.TrimSuffix(filename, ext) + "-"
|
||||||
|
|
||||||
|
// Create temp file
|
||||||
|
tempFile, err := os.CreateTemp("", prefix+"*"+ext)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
defer tempFile.Close()
|
||||||
|
|
||||||
|
// Write data
|
||||||
|
if _, err := tempFile.Write(data); err != nil {
|
||||||
|
os.Remove(tempFile.Name())
|
||||||
|
return "", fmt.Errorf("failed to write to temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tempFile.Name(), nil
|
||||||
|
}
|
||||||
@ -7,15 +7,17 @@ import (
|
|||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
"github.com/anchore/syft/syft/source/directorysource"
|
"github.com/anchore/syft/syft/source/directorysource"
|
||||||
"github.com/anchore/syft/syft/source/filesource"
|
"github.com/anchore/syft/syft/source/filesource"
|
||||||
|
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||||
"github.com/anchore/syft/syft/source/snapsource"
|
"github.com/anchore/syft/syft/source/snapsource"
|
||||||
"github.com/anchore/syft/syft/source/stereoscopesource"
|
"github.com/anchore/syft/syft/source/stereoscopesource"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
FileTag = stereoscope.FileTag
|
FileTag = stereoscope.FileTag
|
||||||
DirTag = stereoscope.DirTag
|
DirTag = stereoscope.DirTag
|
||||||
PullTag = stereoscope.PullTag
|
PullTag = stereoscope.PullTag
|
||||||
SnapTag = "snap"
|
SnapTag = "snap"
|
||||||
|
OCIModelTag = "oci-model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// All returns all the configured source providers known to syft
|
// All returns all the configured source providers known to syft
|
||||||
@ -40,6 +42,9 @@ func All(userInput string, cfg *Config) []collections.TaggedValue[source.Provide
|
|||||||
|
|
||||||
// 3. try remote sources after everything else...
|
// 3. try remote sources after everything else...
|
||||||
|
|
||||||
|
// --from oci-model (model artifacts with header-only fetching)
|
||||||
|
Join(tagProvider(ocimodelsource.NewSourceProvider(userInput, cfg.RegistryOptions, cfg.Alias), OCIModelTag)).
|
||||||
|
|
||||||
// --from docker, registry, etc.
|
// --from docker, registry, etc.
|
||||||
Join(stereoscopeProviders.Select(PullTag)...).
|
Join(stereoscopeProviders.Select(PullTag)...).
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user