mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
Compare commits
13 Commits
cdb41b0c76
...
9c5279cb99
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c5279cb99 | ||
|
|
f7a19db98b | ||
|
|
13756ec768 | ||
|
|
3326ae44fa | ||
|
|
a08d5b78d9 | ||
|
|
ce74ed0309 | ||
|
|
0ff6a1af58 | ||
|
|
cd4d0ce062 | ||
|
|
a721a854a9 | ||
|
|
c715e01cc2 | ||
|
|
165611d2e4 | ||
|
|
746f00ad68 | ||
|
|
3f117a3eb5 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -73,3 +73,5 @@ cosign.pub
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -88,6 +88,7 @@ func TestPkgCoverageImage(t *testing.T) {
|
|||||||
definedPkgs.Remove(string(pkg.TerraformPkg))
|
definedPkgs.Remove(string(pkg.TerraformPkg))
|
||||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
||||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||||
|
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||||
|
|
||||||
var cases []testCase
|
var cases []testCase
|
||||||
cases = append(cases, commonTestCases...)
|
cases = append(cases, commonTestCases...)
|
||||||
@ -162,6 +163,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
|
|||||||
definedPkgs.Remove(string(pkg.UnknownPkg))
|
definedPkgs.Remove(string(pkg.UnknownPkg))
|
||||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
||||||
|
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||||
|
|
||||||
// for directory scans we should not expect to see any of the following package types
|
// for directory scans we should not expect to see any of the following package types
|
||||||
definedPkgs.Remove(string(pkg.KbPkg))
|
definedPkgs.Remove(string(pkg.KbPkg))
|
||||||
|
|||||||
8
go.mod
8
go.mod
@ -286,6 +286,8 @@ require (
|
|||||||
modernc.org/memory v1.11.0 // indirect
|
modernc.org/memory v1.11.0 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
|
require github.com/gpustack/gguf-parser-go v0.22.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect
|
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect
|
||||||
@ -306,6 +308,12 @@ require (
|
|||||||
github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
|
github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
|
||||||
github.com/aws/smithy-go v1.22.4 // indirect
|
github.com/aws/smithy-go v1.22.4 // indirect
|
||||||
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
||||||
|
github.com/henvic/httpretty v0.1.4 // indirect
|
||||||
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||||
|
gonum.org/v1/gonum v0.15.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
retract (
|
retract (
|
||||||
|
|||||||
11
go.sum
11
go.sum
@ -541,6 +541,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
|
|||||||
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
||||||
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
||||||
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
||||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||||
@ -590,6 +592,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
|||||||
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
||||||
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
||||||
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
||||||
|
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||||
|
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||||
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||||
@ -617,6 +621,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
|
|||||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||||
@ -722,9 +727,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
|
|||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||||
@ -851,6 +858,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
|
|||||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||||
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
||||||
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
||||||
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
||||||
@ -1304,6 +1313,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
|||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||||
|
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||||
|
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||||
|
|||||||
@ -3,5 +3,5 @@ package internal
|
|||||||
const (
|
const (
|
||||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||||
JSONSchemaVersion = "16.0.41"
|
JSONSchemaVersion = "16.0.42"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -27,6 +27,7 @@ func AllTypes() []any {
|
|||||||
pkg.ELFBinaryPackageNoteJSONPayload{},
|
pkg.ELFBinaryPackageNoteJSONPayload{},
|
||||||
pkg.ElixirMixLockEntry{},
|
pkg.ElixirMixLockEntry{},
|
||||||
pkg.ErlangRebarLockEntry{},
|
pkg.ErlangRebarLockEntry{},
|
||||||
|
pkg.GGUFFileHeader{},
|
||||||
pkg.GitHubActionsUseStatement{},
|
pkg.GitHubActionsUseStatement{},
|
||||||
pkg.GolangBinaryBuildinfoEntry{},
|
pkg.GolangBinaryBuildinfoEntry{},
|
||||||
pkg.GolangModuleEntry{},
|
pkg.GolangModuleEntry{},
|
||||||
|
|||||||
@ -123,6 +123,7 @@ var jsonTypes = makeJSONTypes(
|
|||||||
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
||||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||||
|
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
|
||||||
)
|
)
|
||||||
|
|
||||||
func expandLegacyNameVariants(names ...string) []string {
|
func expandLegacyNameVariants(names ...string) []string {
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package task
|
|||||||
import (
|
import (
|
||||||
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/ai"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||||
@ -175,6 +176,7 @@ func DefaultPackageTaskFactories() Factories {
|
|||||||
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
||||||
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
||||||
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
||||||
|
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
||||||
|
|
||||||
// deprecated catalogers ////////////////////////////////////////
|
// deprecated catalogers ////////////////////////////////////////
|
||||||
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
||||||
|
|||||||
4078
schema/json/schema-16.0.42.json
Normal file
4078
schema/json/schema-16.0.42.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||||
"$id": "anchore.io/schema/syft/json/16.0.41/document",
|
"$id": "anchore.io/schema/syft/json/16.0.42/document",
|
||||||
"$ref": "#/$defs/Document",
|
"$ref": "#/$defs/Document",
|
||||||
"$defs": {
|
"$defs": {
|
||||||
"AlpmDbEntry": {
|
"AlpmDbEntry": {
|
||||||
@ -1399,6 +1399,70 @@
|
|||||||
"size"
|
"size"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"GgufFileMetadata": {
|
||||||
|
"properties": {
|
||||||
|
"modelFormat": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelFormat is always \"gguf\""
|
||||||
|
},
|
||||||
|
"modelName": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelName is the name of the model (from general.name or filename)"
|
||||||
|
},
|
||||||
|
"modelVersion": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
|
||||||
|
},
|
||||||
|
"fileSize": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||||
|
},
|
||||||
|
"hash": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Hash is a content hash of the metadata (for stable global identifiers across remotes)"
|
||||||
|
},
|
||||||
|
"license": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "License is the license identifier (from general.license if present)"
|
||||||
|
},
|
||||||
|
"ggufVersion": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
|
||||||
|
},
|
||||||
|
"architecture": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
|
||||||
|
},
|
||||||
|
"quantization": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Parameters is the number of model parameters (if present in header)"
|
||||||
|
},
|
||||||
|
"tensorCount": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "TensorCount is the number of tensors in the model"
|
||||||
|
},
|
||||||
|
"header": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
|
||||||
|
},
|
||||||
|
"truncatedHeader": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "TruncatedHeader indicates if the header was truncated during parsing (for very large headers)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
"required": [
|
||||||
|
"modelFormat",
|
||||||
|
"modelName",
|
||||||
|
"ggufVersion",
|
||||||
|
"tensorCount"
|
||||||
|
],
|
||||||
|
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
|
||||||
|
},
|
||||||
"GithubActionsUseStatement": {
|
"GithubActionsUseStatement": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"value": {
|
"value": {
|
||||||
@ -2474,6 +2538,9 @@
|
|||||||
{
|
{
|
||||||
"$ref": "#/$defs/ErlangRebarLockEntry"
|
"$ref": "#/$defs/ErlangRebarLockEntry"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/$defs/GgufFileMetadata"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/$defs/GithubActionsUseStatement"
|
"$ref": "#/$defs/GithubActionsUseStatement"
|
||||||
},
|
},
|
||||||
|
|||||||
@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
|
|||||||
}
|
}
|
||||||
|
|
||||||
componentType := cyclonedx.ComponentTypeLibrary
|
componentType := cyclonedx.ComponentTypeLibrary
|
||||||
if p.Type == pkg.BinaryPkg {
|
switch p.Type {
|
||||||
|
case pkg.BinaryPkg:
|
||||||
componentType = cyclonedx.ComponentTypeApplication
|
componentType = cyclonedx.ComponentTypeApplication
|
||||||
|
case pkg.ModelPkg:
|
||||||
|
componentType = cyclonedx.ComponentTypeMachineLearningModel
|
||||||
}
|
}
|
||||||
|
|
||||||
return cyclonedx.Component{
|
return cyclonedx.Component{
|
||||||
|
|||||||
@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
|
|||||||
switch component.Type {
|
switch component.Type {
|
||||||
case cyclonedx.ComponentTypeOS:
|
case cyclonedx.ComponentTypeOS:
|
||||||
case cyclonedx.ComponentTypeContainer:
|
case cyclonedx.ComponentTypeContainer:
|
||||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary:
|
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
|
||||||
p := decodeComponent(component)
|
p := decodeComponent(component)
|
||||||
idMap[component.BOMRef] = p
|
idMap[component.BOMRef] = p
|
||||||
if component.BOMRef != "" {
|
if component.BOMRef != "" {
|
||||||
|
|||||||
@ -54,6 +54,7 @@ func Test_OriginatorSupplier(t *testing.T) {
|
|||||||
pkg.OpamPackage{},
|
pkg.OpamPackage{},
|
||||||
pkg.YarnLockEntry{},
|
pkg.YarnLockEntry{},
|
||||||
pkg.TerraformLockProviderEntry{},
|
pkg.TerraformLockProviderEntry{},
|
||||||
|
pkg.GGUFFileHeader{},
|
||||||
)
|
)
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
|
|||||||
answer = "acquired package info from Homebrew formula"
|
answer = "acquired package info from Homebrew formula"
|
||||||
case pkg.TerraformPkg:
|
case pkg.TerraformPkg:
|
||||||
answer = "acquired package info from Terraform dependency lock file"
|
answer = "acquired package info from Terraform dependency lock file"
|
||||||
|
case pkg.ModelPkg:
|
||||||
|
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
||||||
default:
|
default:
|
||||||
answer = "acquired package info from the following paths"
|
answer = "acquired package info from the following paths"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
|
|||||||
"acquired package info from Terraform dependency lock file",
|
"acquired package info from Terraform dependency lock file",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: pkg.Package{
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
var pkgTypes []pkg.Type
|
var pkgTypes []pkg.Type
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
/*
|
||||||
|
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
||||||
|
including support for GGUF (GPT-Generated Unified Format) model files.
|
||||||
|
*/
|
||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||||
|
func NewGGUFCataloger() pkg.Cataloger {
|
||||||
|
return generic.NewCataloger("gguf-cataloger").
|
||||||
|
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||||
|
}
|
||||||
381
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
381
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
@ -0,0 +1,381 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(t *testing.T) string // returns fixture directory
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "finds GGUF files in root",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestGGUFInDir(t, dir, "model1.gguf")
|
||||||
|
createTestGGUFInDir(t, dir, "model2.gguf")
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"model1.gguf",
|
||||||
|
"model2.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "finds GGUF files in subdirectories",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
modelsDir := filepath.Join(dir, "models")
|
||||||
|
os.MkdirAll(modelsDir, 0755)
|
||||||
|
createTestGGUFInDir(t, modelsDir, "llama.gguf")
|
||||||
|
|
||||||
|
deepDir := filepath.Join(dir, "deep", "nested", "path")
|
||||||
|
os.MkdirAll(deepDir, 0755)
|
||||||
|
createTestGGUFInDir(t, deepDir, "mistral.gguf")
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"models/llama.gguf",
|
||||||
|
"deep/nested/path/mistral.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ignores non-GGUF files",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestGGUFInDir(t, dir, "model.gguf")
|
||||||
|
|
||||||
|
// Create non-GGUF files
|
||||||
|
os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"model.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
fixtureDir := tt.setup(t)
|
||||||
|
|
||||||
|
pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, fixtureDir).
|
||||||
|
ExpectsResolverContentQueries(tt.expected).
|
||||||
|
TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Integration(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(t *testing.T) string
|
||||||
|
expectedPackages []pkg.Package
|
||||||
|
expectedRelationships []artifact.Relationship
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "catalog single GGUF file",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(291).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "llama3-8b").
|
||||||
|
withStringKV("general.version", "3.0").
|
||||||
|
withStringKV("general.license", "Apache-2.0").
|
||||||
|
withStringKV("general.quantization", "Q4_K_M").
|
||||||
|
withUint64KV("general.parameter_count", 8030000000).
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||||
|
os.WriteFile(path, data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "llama3-8b",
|
||||||
|
Version: "3.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(
|
||||||
|
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||||
|
),
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "llama3-8b",
|
||||||
|
ModelVersion: "3.0",
|
||||||
|
License: "Apache-2.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 8030000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 291,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "catalog multiple GGUF files",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create first model
|
||||||
|
data1 := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "model1").
|
||||||
|
withStringKV("general.version", "1.0").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
|
||||||
|
|
||||||
|
// Create second model
|
||||||
|
data2 := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(200).
|
||||||
|
withStringKV("general.architecture", "mistral").
|
||||||
|
withStringKV("general.name", "model2").
|
||||||
|
withStringKV("general.version", "2.0").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "model1",
|
||||||
|
Version: "1.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model1",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: unknownGGUFData,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 100,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "model2",
|
||||||
|
Version: "2.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model2",
|
||||||
|
ModelVersion: "2.0",
|
||||||
|
Architecture: "mistral",
|
||||||
|
Quantization: unknownGGUFData,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 200,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "catalog GGUF in nested directories",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
nestedDir := filepath.Join(dir, "models", "quantized")
|
||||||
|
os.MkdirAll(nestedDir, 0755)
|
||||||
|
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(150).
|
||||||
|
withStringKV("general.architecture", "qwen").
|
||||||
|
withStringKV("general.name", "qwen-nested").
|
||||||
|
build()
|
||||||
|
|
||||||
|
os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "qwen-nested",
|
||||||
|
Version: unknownGGUFData,
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "qwen-nested",
|
||||||
|
ModelVersion: unknownGGUFData,
|
||||||
|
Architecture: "qwen",
|
||||||
|
Quantization: unknownGGUFData,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 150,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
fixtureDir := tt.setup(t)
|
||||||
|
|
||||||
|
// Use pkgtest to catalog and compare
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, fixtureDir).
|
||||||
|
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||||
|
IgnoreLocationLayer().
|
||||||
|
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
||||||
|
WithCompareOptions(
|
||||||
|
// Ignore Hash as it's computed dynamically
|
||||||
|
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
|
||||||
|
)
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create a valid GGUF
|
||||||
|
validData := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "valid-model").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
|
||||||
|
|
||||||
|
// Create an invalid GGUF (wrong magic)
|
||||||
|
invalidData := newTestGGUFBuilder().buildInvalidMagic()
|
||||||
|
os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
|
||||||
|
|
||||||
|
// Create a truncated GGUF
|
||||||
|
os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
|
||||||
|
|
||||||
|
// Catalog should succeed and only return the valid package
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Should only find the valid model
|
||||||
|
require.Len(t, pkgs, 1)
|
||||||
|
assert.Equal(t, "valid-model", pkgs[0].Name)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Name(t *testing.T) {
|
||||||
|
cataloger := NewGGUFCataloger()
|
||||||
|
assert.Equal(t, "gguf-cataloger", cataloger.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Create a subdirectory to ensure glob still runs
|
||||||
|
os.MkdirAll(filepath.Join(dir, "models"), 0755)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
|
||||||
|
assert.Empty(t, pkgs)
|
||||||
|
assert.Empty(t, rels)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_MixedFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create GGUF file
|
||||||
|
ggufData := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
|
||||||
|
|
||||||
|
// Create other file types
|
||||||
|
os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
|
||||||
|
os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Should only find the GGUF model
|
||||||
|
require.Len(t, pkgs, 1)
|
||||||
|
assert.Equal(t, "test-model", pkgs[0].Name)
|
||||||
|
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
|
||||||
|
// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create lowercase .gguf
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "lowercase").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
|
||||||
|
|
||||||
|
// Create uppercase .GGUF (should not match the glob)
|
||||||
|
os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
|
||||||
|
// On case-insensitive filesystems (macOS), both might match
|
||||||
|
// On case-sensitive filesystems (Linux), only lowercase matches
|
||||||
|
assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
|
||||||
|
func createTestGGUFInDir(t *testing.T, dir, filename string) {
|
||||||
|
t.Helper()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, filename)
|
||||||
|
err := os.WriteFile(path, data, 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
69
syft/pkg/cataloger/ai/package.go
Normal file
69
syft/pkg/cataloger/ai/package.go
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
|
||||||
|
// Compute hash if not already set
|
||||||
|
if metadata.Hash == "" {
|
||||||
|
metadata.Hash = computeMetadataHash(metadata)
|
||||||
|
}
|
||||||
|
|
||||||
|
p := pkg.Package{
|
||||||
|
Name: metadata.ModelName,
|
||||||
|
Version: metadata.ModelVersion,
|
||||||
|
Locations: file.NewLocationSet(locations...),
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(),
|
||||||
|
Metadata: *metadata,
|
||||||
|
// NOTE: PURL is intentionally not set as the package-url spec
|
||||||
|
// has not yet finalized support for ML model packages
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add license to the package if present in metadata
|
||||||
|
if metadata.License != "" {
|
||||||
|
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
p.SetID()
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
|
||||||
|
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
|
||||||
|
// Create a stable representation of the metadata
|
||||||
|
hashData := struct {
|
||||||
|
Format string
|
||||||
|
Name string
|
||||||
|
Version string
|
||||||
|
Architecture string
|
||||||
|
GGUFVersion uint32
|
||||||
|
TensorCount uint64
|
||||||
|
}{
|
||||||
|
Format: metadata.ModelFormat,
|
||||||
|
Name: metadata.ModelName,
|
||||||
|
Version: metadata.ModelVersion,
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
GGUFVersion: metadata.GGUFVersion,
|
||||||
|
TensorCount: metadata.TensorCount,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal to JSON for stable hashing
|
||||||
|
jsonBytes, err := json.Marshal(hashData)
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute SHA256 hash
|
||||||
|
hash := sha256.Sum256(jsonBytes)
|
||||||
|
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
|
||||||
|
}
|
||||||
126
syft/pkg/cataloger/ai/package_test.go
Normal file
126
syft/pkg/cataloger/ai/package_test.go
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewGGUFPackage(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
metadata *pkg.GGUFFileHeader
|
||||||
|
locations []file.Location
|
||||||
|
checkFunc func(t *testing.T, p pkg.Package)
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "complete GGUF package with all fields",
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "llama3-8b-instruct",
|
||||||
|
ModelVersion: "3.0",
|
||||||
|
License: "Apache-2.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 8030000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 291,
|
||||||
|
Header: map[string]any{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
|
||||||
|
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff("3.0", p.Version); d != "" {
|
||||||
|
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||||
|
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||||
|
assert.Len(t, p.Licenses.ToSlice(), 1)
|
||||||
|
if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
|
||||||
|
t.Errorf("License value mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
assert.NotEmpty(t, p.ID())
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "minimal GGUF package",
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "simple-model",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "gpt2",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 50,
|
||||||
|
},
|
||||||
|
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
if d := cmp.Diff("simple-model", p.Name); d != "" {
|
||||||
|
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff("1.0", p.Version); d != "" {
|
||||||
|
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||||
|
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||||
|
assert.Empty(t, p.Licenses.ToSlice())
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "GGUF package with multiple locations",
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "multi-location-model",
|
||||||
|
ModelVersion: "1.5",
|
||||||
|
Architecture: "llama",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 150,
|
||||||
|
},
|
||||||
|
locations: []file.Location{
|
||||||
|
file.NewLocation("/models/model1.gguf"),
|
||||||
|
file.NewLocation("/models/model2.gguf"),
|
||||||
|
},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
assert.Len(t, p.Locations.ToSlice(), 2)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
p := newGGUFPackage(tt.metadata, tt.locations...)
|
||||||
|
|
||||||
|
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
|
||||||
|
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
|
||||||
|
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||||
|
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify metadata is attached
|
||||||
|
metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
|
||||||
|
require.True(t, ok, "metadata should be GGUFFileHeader")
|
||||||
|
if d := cmp.Diff(*tt.metadata, metadata); d != "" {
|
||||||
|
t.Errorf("Metadata mismatch (-want +got):\n%s", d)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.checkFunc != nil {
|
||||||
|
tt.checkFunc(t, p)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
92
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
92
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GGUF file format constants
|
||||||
|
const (
|
||||||
|
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
|
||||||
|
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
|
||||||
|
)
|
||||||
|
|
||||||
|
// ggufHeaderReader reads just the header portion of a GGUF file efficiently
|
||||||
|
type ggufHeaderReader struct {
|
||||||
|
reader io.Reader
|
||||||
|
}
|
||||||
|
|
||||||
|
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
||||||
|
// This is much more efficient than reading the entire file
|
||||||
|
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
||||||
|
// Read initial chunk to determine header size
|
||||||
|
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||||
|
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
|
||||||
|
if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify magic number
|
||||||
|
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
|
||||||
|
if magic != ggufMagicNumber {
|
||||||
|
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to read the metadata KV pairs to know the full header size
|
||||||
|
// For efficiency, we'll read incrementally up to maxHeaderSize
|
||||||
|
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
||||||
|
headerData = append(headerData, initialBuf...)
|
||||||
|
|
||||||
|
// Read the rest of the header in larger chunks for efficiency
|
||||||
|
buf := make([]byte, 64*1024) // 64KB chunks
|
||||||
|
for len(headerData) < maxHeaderSize {
|
||||||
|
n, err := r.reader.Read(buf)
|
||||||
|
if n > 0 {
|
||||||
|
headerData = append(headerData, buf[:n]...)
|
||||||
|
}
|
||||||
|
if err == io.EOF {
|
||||||
|
// Reached end of file, we have all the data
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(headerData) > maxHeaderSize {
|
||||||
|
// Truncate if we somehow read too much
|
||||||
|
headerData = headerData[:maxHeaderSize]
|
||||||
|
}
|
||||||
|
|
||||||
|
return headerData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper to convert gguf_parser metadata to simpler types
|
||||||
|
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
|
||||||
|
result := make(map[string]interface{})
|
||||||
|
|
||||||
|
// Limit KV pairs to avoid bloat
|
||||||
|
const maxKVPairs = 200
|
||||||
|
count := 0
|
||||||
|
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if count >= maxKVPairs {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip standard fields that are extracted separately
|
||||||
|
switch kv.Key {
|
||||||
|
case "general.architecture", "general.name", "general.license",
|
||||||
|
"general.version", "general.parameter_count", "general.quantization":
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result[kv.Key] = kv.Value
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
127
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
127
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal"
|
||||||
|
"github.com/anchore/syft/internal/unknown"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
|
)
|
||||||
|
|
||||||
|
const unknownGGUFData = "unknown"
|
||||||
|
|
||||||
|
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
||||||
|
// This implementation only reads the header portion of the file, not the entire model.
|
||||||
|
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
|
defer internal.CloseAndLogError(reader, reader.Path())
|
||||||
|
|
||||||
|
// Read only the header portion (not the entire file)
|
||||||
|
headerReader := &ggufHeaderReader{reader: reader}
|
||||||
|
headerData, err := headerReader.readHeader()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a temporary file with just the header for the library to parse
|
||||||
|
// The library requires a file path, so we create a minimal temp file
|
||||||
|
tempFile, err := os.CreateTemp("", "syft-gguf-header-*.gguf")
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
tempPath := tempFile.Name()
|
||||||
|
defer os.Remove(tempPath)
|
||||||
|
|
||||||
|
// Write header data to temp file
|
||||||
|
if _, err := tempFile.Write(headerData); err != nil {
|
||||||
|
tempFile.Close()
|
||||||
|
return nil, nil, fmt.Errorf("failed to write header to temp file: %w", err)
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
|
||||||
|
// Parse using gguf-parser-go with options to skip unnecessary data
|
||||||
|
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
|
||||||
|
gguf_parser.SkipLargeMetadata(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract metadata
|
||||||
|
metadata := ggufFile.Metadata()
|
||||||
|
|
||||||
|
// Convert to syft metadata structure
|
||||||
|
syftMetadata := &pkg.GGUFFileHeader{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: metadata.Name,
|
||||||
|
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
|
||||||
|
License: metadata.License,
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
Quantization: metadata.FileTypeDescriptor,
|
||||||
|
Parameters: uint64(metadata.Parameters),
|
||||||
|
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||||
|
TensorCount: ggufFile.Header.TensorCount,
|
||||||
|
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||||
|
TruncatedHeader: false, // We read the full header
|
||||||
|
Hash: "", // Will be computed in newGGUFPackage
|
||||||
|
}
|
||||||
|
|
||||||
|
// If model name is not in metadata, use filename
|
||||||
|
if syftMetadata.ModelName == "" {
|
||||||
|
syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
|
||||||
|
}
|
||||||
|
|
||||||
|
// If version is still unknown, try to infer from name
|
||||||
|
if syftMetadata.ModelVersion == unknownGGUFData {
|
||||||
|
syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create package from metadata
|
||||||
|
p := newGGUFPackage(
|
||||||
|
syftMetadata,
|
||||||
|
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||||
|
)
|
||||||
|
|
||||||
|
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVersion attempts to extract version from metadata KV pairs
|
||||||
|
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if kv.Key == "general.version" {
|
||||||
|
if v, ok := kv.Value.(string); ok && v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return unknownGGUFData
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVersionFromName tries to extract version from model name
|
||||||
|
func extractVersionFromName(_ string) string {
|
||||||
|
// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
|
||||||
|
// For now, return unknown - this could be enhanced with regex
|
||||||
|
return unknownGGUFData
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractModelNameFromPath extracts the model name from the file path
|
||||||
|
func extractModelNameFromPath(path string) string {
|
||||||
|
// Get the base filename
|
||||||
|
base := filepath.Base(path)
|
||||||
|
|
||||||
|
// Remove .gguf extension
|
||||||
|
name := strings.TrimSuffix(base, ".gguf")
|
||||||
|
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
|
||||||
|
// integrity check
|
||||||
|
var _ generic.Parser = parseGGUFModel
|
||||||
127
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
127
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GGUF type constants for test builder
|
||||||
|
const (
|
||||||
|
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
||||||
|
ggufTypeUint8 = 0
|
||||||
|
ggufTypeInt8 = 1
|
||||||
|
ggufTypeUint16 = 2
|
||||||
|
ggufTypeInt16 = 3
|
||||||
|
ggufTypeUint32 = 4
|
||||||
|
ggufTypeInt32 = 5
|
||||||
|
ggufTypeFloat32 = 6
|
||||||
|
ggufTypeUint64 = 7
|
||||||
|
ggufTypeInt64 = 8
|
||||||
|
ggufTypeFloat64 = 9
|
||||||
|
ggufTypeBool = 10
|
||||||
|
ggufTypeString = 11
|
||||||
|
ggufTypeArray = 12
|
||||||
|
)
|
||||||
|
|
||||||
|
// testGGUFBuilder helps build GGUF files for testing
|
||||||
|
type testGGUFBuilder struct {
|
||||||
|
buf *bytes.Buffer
|
||||||
|
version uint32
|
||||||
|
tensorCount uint64
|
||||||
|
kvPairs []testKVPair
|
||||||
|
}
|
||||||
|
|
||||||
|
type testKVPair struct {
|
||||||
|
key string
|
||||||
|
valueType uint32
|
||||||
|
value interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||||
|
return &testGGUFBuilder{
|
||||||
|
buf: new(bytes.Buffer),
|
||||||
|
version: 3,
|
||||||
|
tensorCount: 100,
|
||||||
|
kvPairs: []testKVPair{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||||
|
b.version = v
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||||
|
b.tensorCount = count
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) writeString(s string) {
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||||
|
b.buf.WriteString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) build() []byte {
|
||||||
|
// Write magic number "GGUF"
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||||
|
|
||||||
|
// Write version
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||||
|
|
||||||
|
// Write tensor count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||||
|
|
||||||
|
// Write KV count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||||
|
|
||||||
|
// Write KV pairs
|
||||||
|
for _, kv := range b.kvPairs {
|
||||||
|
// Write key
|
||||||
|
b.writeString(kv.key)
|
||||||
|
// Write value type
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||||
|
// Write value based on type
|
||||||
|
switch kv.valueType {
|
||||||
|
case ggufTypeString:
|
||||||
|
b.writeString(kv.value.(string))
|
||||||
|
case ggufTypeUint32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||||
|
case ggufTypeUint64:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||||
|
case ggufTypeUint8:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||||
|
case ggufTypeInt32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||||
|
case ggufTypeBool:
|
||||||
|
var v uint8
|
||||||
|
if kv.value.(bool) {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return b.buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildInvalidMagic creates a file with invalid magic number
|
||||||
|
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
47
syft/pkg/gguf.go
Normal file
47
syft/pkg/gguf.go
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
package pkg
|
||||||
|
|
||||||
|
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
|
||||||
|
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
|
||||||
|
// loading and saving of models, particularly quantized large language models.
|
||||||
|
type GGUFFileHeader struct {
|
||||||
|
// ModelFormat is always "gguf"
|
||||||
|
ModelFormat string `json:"modelFormat" cyclonedx:"modelFormat"`
|
||||||
|
|
||||||
|
// ModelName is the name of the model (from general.name or filename)
|
||||||
|
ModelName string `json:"modelName" cyclonedx:"modelName"`
|
||||||
|
|
||||||
|
// ModelVersion is the version of the model (if available in header, else "unknown")
|
||||||
|
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
|
||||||
|
|
||||||
|
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||||
|
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||||
|
|
||||||
|
// Hash is a content hash of the metadata (for stable global identifiers across remotes)
|
||||||
|
Hash string `json:"hash,omitempty" cyclonedx:"hash"`
|
||||||
|
|
||||||
|
// License is the license identifier (from general.license if present)
|
||||||
|
License string `json:"license,omitempty" cyclonedx:"license"`
|
||||||
|
|
||||||
|
// GGUFVersion is the GGUF format version (e.g., 3)
|
||||||
|
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
|
||||||
|
|
||||||
|
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
|
||||||
|
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||||
|
|
||||||
|
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
|
||||||
|
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
|
||||||
|
|
||||||
|
// Parameters is the number of model parameters (if present in header)
|
||||||
|
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
|
||||||
|
|
||||||
|
// TensorCount is the number of tensors in the model
|
||||||
|
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||||
|
|
||||||
|
// Header contains the remaining key-value pairs from the GGUF header that are not already
|
||||||
|
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||||
|
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||||
|
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||||
|
|
||||||
|
// TruncatedHeader indicates if the header was truncated during parsing (for very large headers)
|
||||||
|
TruncatedHeader bool `json:"truncatedHeader,omitempty" cyclonedx:"truncatedHeader"`
|
||||||
|
}
|
||||||
@ -50,6 +50,7 @@ const (
|
|||||||
TerraformPkg Type = "terraform"
|
TerraformPkg Type = "terraform"
|
||||||
WordpressPluginPkg Type = "wordpress-plugin"
|
WordpressPluginPkg Type = "wordpress-plugin"
|
||||||
HomebrewPkg Type = "homebrew"
|
HomebrewPkg Type = "homebrew"
|
||||||
|
ModelPkg Type = "model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// AllPkgs represents all supported package types
|
// AllPkgs represents all supported package types
|
||||||
@ -94,6 +95,7 @@ var AllPkgs = []Type{
|
|||||||
TerraformPkg,
|
TerraformPkg,
|
||||||
WordpressPluginPkg,
|
WordpressPluginPkg,
|
||||||
HomebrewPkg,
|
HomebrewPkg,
|
||||||
|
ModelPkg,
|
||||||
}
|
}
|
||||||
|
|
||||||
// PackageURLType returns the PURL package type for the current package.
|
// PackageURLType returns the PURL package type for the current package.
|
||||||
|
|||||||
@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
|
|||||||
expectedTypes.Remove(string(HomebrewPkg))
|
expectedTypes.Remove(string(HomebrewPkg))
|
||||||
expectedTypes.Remove(string(TerraformPkg))
|
expectedTypes.Remove(string(TerraformPkg))
|
||||||
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
||||||
|
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
|
||||||
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user