mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
chore: refactor to use gguf-parser-go; 50mb limit
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
f664f9eaf2
commit
c689dcfeef
8
go.mod
8
go.mod
@ -286,6 +286,8 @@ require (
|
|||||||
modernc.org/memory v1.11.0 // indirect
|
modernc.org/memory v1.11.0 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
|
require github.com/gpustack/gguf-parser-go v0.22.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cyphar.com/go-pathrs v0.2.1 // indirect
|
cyphar.com/go-pathrs v0.2.1 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
||||||
@ -310,7 +312,13 @@ require (
|
|||||||
github.com/clipperhouse/stringish v0.1.1 // indirect
|
github.com/clipperhouse/stringish v0.1.1 // indirect
|
||||||
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
|
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
|
||||||
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
||||||
|
github.com/henvic/httpretty v0.1.4 // indirect
|
||||||
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
|
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||||
|
gonum.org/v1/gonum v0.15.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
retract (
|
retract (
|
||||||
|
|||||||
11
go.sum
11
go.sum
@ -549,6 +549,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
|
|||||||
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
||||||
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
||||||
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
||||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||||
@ -598,6 +600,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
|||||||
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
||||||
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
||||||
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
||||||
|
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||||
|
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||||
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||||
@ -625,6 +629,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
|
|||||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||||
@ -730,9 +735,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
|
|||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||||
@ -860,6 +867,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
|
|||||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||||
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
||||||
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
||||||
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
||||||
@ -1313,6 +1322,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
|||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||||
|
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||||
|
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||||
|
|||||||
@ -3,7 +3,7 @@ package task
|
|||||||
import (
|
import (
|
||||||
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/aiartifact"
|
"github.com/anchore/syft/syft/pkg/cataloger/ai"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||||
@ -179,7 +179,7 @@ func DefaultPackageTaskFactories() Factories {
|
|||||||
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
||||||
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
||||||
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
||||||
newSimplePackageTaskFactory(aiartifact.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
||||||
|
|
||||||
// deprecated catalogers ////////////////////////////////////////
|
// deprecated catalogers ////////////////////////////////////////
|
||||||
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
Package aiartifact provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
||||||
including support for GGUF (GPT-Generated Unified Format) model files.
|
including support for GGUF (GPT-Generated Unified Format) model files.
|
||||||
*/
|
*/
|
||||||
package aiartifact
|
package ai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
@ -11,6 +11,6 @@ import (
|
|||||||
|
|
||||||
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||||
func NewGGUFCataloger() pkg.Cataloger {
|
func NewGGUFCataloger() pkg.Cataloger {
|
||||||
return generic.NewCataloger("model-gguf-cataloger").
|
return generic.NewCataloger("gguf-cataloger").
|
||||||
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||||
}
|
}
|
||||||
@ -1,4 +1,4 @@
|
|||||||
package aiartifact
|
package ai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
@ -171,7 +171,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "model1",
|
ModelName: "model1",
|
||||||
ModelVersion: "1.0",
|
ModelVersion: "1.0",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
Quantization: unkownGGUFData,
|
Quantization: unknownGGUFData,
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 100,
|
TensorCount: 100,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
@ -187,7 +187,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "model2",
|
ModelName: "model2",
|
||||||
ModelVersion: "2.0",
|
ModelVersion: "2.0",
|
||||||
Architecture: "mistral",
|
Architecture: "mistral",
|
||||||
Quantization: unkownGGUFData,
|
Quantization: unknownGGUFData,
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 200,
|
TensorCount: 200,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
@ -217,14 +217,14 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
expectedPackages: []pkg.Package{
|
expectedPackages: []pkg.Package{
|
||||||
{
|
{
|
||||||
Name: "qwen-nested",
|
Name: "qwen-nested",
|
||||||
Version: unkownGGUFData,
|
Version: unknownGGUFData,
|
||||||
Type: pkg.ModelPkg,
|
Type: pkg.ModelPkg,
|
||||||
Metadata: pkg.GGUFFileMetadata{
|
Metadata: pkg.GGUFFileMetadata{
|
||||||
ModelFormat: "gguf",
|
ModelFormat: "gguf",
|
||||||
ModelName: "qwen-nested",
|
ModelName: "qwen-nested",
|
||||||
ModelVersion: unkownGGUFData,
|
ModelVersion: unknownGGUFData,
|
||||||
Architecture: "qwen",
|
Architecture: "qwen",
|
||||||
Quantization: unkownGGUFData,
|
Quantization: unknownGGUFData,
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 150,
|
TensorCount: 150,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
69
syft/pkg/cataloger/ai/package.go
Normal file
69
syft/pkg/cataloger/ai/package.go
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location) pkg.Package {
|
||||||
|
// Compute hash if not already set
|
||||||
|
if metadata.Hash == "" {
|
||||||
|
metadata.Hash = computeMetadataHash(metadata)
|
||||||
|
}
|
||||||
|
|
||||||
|
p := pkg.Package{
|
||||||
|
Name: metadata.ModelName,
|
||||||
|
Version: metadata.ModelVersion,
|
||||||
|
Locations: file.NewLocationSet(locations...),
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(),
|
||||||
|
Metadata: *metadata,
|
||||||
|
// NOTE: PURL is intentionally not set as the package-url spec
|
||||||
|
// has not yet finalized support for ML model packages
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add license to the package if present in metadata
|
||||||
|
if metadata.License != "" {
|
||||||
|
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
p.SetID()
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
|
||||||
|
func computeMetadataHash(metadata *pkg.GGUFFileMetadata) string {
|
||||||
|
// Create a stable representation of the metadata
|
||||||
|
hashData := struct {
|
||||||
|
Format string
|
||||||
|
Name string
|
||||||
|
Version string
|
||||||
|
Architecture string
|
||||||
|
GGUFVersion uint32
|
||||||
|
TensorCount uint64
|
||||||
|
}{
|
||||||
|
Format: metadata.ModelFormat,
|
||||||
|
Name: metadata.ModelName,
|
||||||
|
Version: metadata.ModelVersion,
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
GGUFVersion: metadata.GGUFVersion,
|
||||||
|
TensorCount: metadata.TensorCount,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal to JSON for stable hashing
|
||||||
|
jsonBytes, err := json.Marshal(hashData)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("failed to marshal metadata for hashing: %v", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute SHA256 hash
|
||||||
|
hash := sha256.Sum256(jsonBytes)
|
||||||
|
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
|
||||||
|
}
|
||||||
@ -1,4 +1,4 @@
|
|||||||
package aiartifact
|
package ai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
92
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
92
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GGUF file format constants
|
||||||
|
const (
|
||||||
|
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
|
||||||
|
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
|
||||||
|
)
|
||||||
|
|
||||||
|
// ggufHeaderReader reads just the header portion of a GGUF file efficiently
|
||||||
|
type ggufHeaderReader struct {
|
||||||
|
reader io.Reader
|
||||||
|
}
|
||||||
|
|
||||||
|
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
||||||
|
// This is much more efficient than reading the entire file
|
||||||
|
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
||||||
|
// Read initial chunk to determine header size
|
||||||
|
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||||
|
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
|
||||||
|
if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify magic number
|
||||||
|
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
|
||||||
|
if magic != ggufMagicNumber {
|
||||||
|
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to read the metadata KV pairs to know the full header size
|
||||||
|
// For efficiency, we'll read incrementally up to maxHeaderSize
|
||||||
|
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
||||||
|
headerData = append(headerData, initialBuf...)
|
||||||
|
|
||||||
|
// Read the rest of the header in larger chunks for efficiency
|
||||||
|
buf := make([]byte, 64*1024) // 64KB chunks
|
||||||
|
for len(headerData) < maxHeaderSize {
|
||||||
|
n, err := r.reader.Read(buf)
|
||||||
|
if n > 0 {
|
||||||
|
headerData = append(headerData, buf[:n]...)
|
||||||
|
}
|
||||||
|
if err == io.EOF {
|
||||||
|
// Reached end of file, we have all the data
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(headerData) > maxHeaderSize {
|
||||||
|
// Truncate if we somehow read too much
|
||||||
|
headerData = headerData[:maxHeaderSize]
|
||||||
|
}
|
||||||
|
|
||||||
|
return headerData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper to convert gguf_parser metadata to simpler types
|
||||||
|
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
|
||||||
|
result := make(map[string]interface{})
|
||||||
|
|
||||||
|
// Limit KV pairs to avoid bloat
|
||||||
|
const maxKVPairs = 200
|
||||||
|
count := 0
|
||||||
|
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if count >= maxKVPairs {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip standard fields that are extracted separately
|
||||||
|
switch kv.Key {
|
||||||
|
case "general.architecture", "general.name", "general.license",
|
||||||
|
"general.version", "general.parameter_count", "general.quantization":
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result[kv.Key] = kv.Value
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
127
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
127
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal"
|
||||||
|
"github.com/anchore/syft/internal/unknown"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
|
)
|
||||||
|
|
||||||
|
const unknownGGUFData = "unknown"
|
||||||
|
|
||||||
|
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
||||||
|
// This implementation only reads the header portion of the file, not the entire model.
|
||||||
|
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
|
defer internal.CloseAndLogError(reader, reader.Path())
|
||||||
|
|
||||||
|
// Read only the header portion (not the entire file)
|
||||||
|
headerReader := &ggufHeaderReader{reader: reader}
|
||||||
|
headerData, err := headerReader.readHeader()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a temporary file with just the header for the library to parse
|
||||||
|
// The library requires a file path, so we create a minimal temp file
|
||||||
|
tempFile, err := os.CreateTemp("", "syft-gguf-header-*.gguf")
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
tempPath := tempFile.Name()
|
||||||
|
defer os.Remove(tempPath)
|
||||||
|
|
||||||
|
// Write header data to temp file
|
||||||
|
if _, err := tempFile.Write(headerData); err != nil {
|
||||||
|
tempFile.Close()
|
||||||
|
return nil, nil, fmt.Errorf("failed to write header to temp file: %w", err)
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
|
||||||
|
// Parse using gguf-parser-go with options to skip unnecessary data
|
||||||
|
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
|
||||||
|
gguf_parser.SkipLargeMetadata(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract metadata
|
||||||
|
metadata := ggufFile.Metadata()
|
||||||
|
|
||||||
|
// Convert to syft metadata structure
|
||||||
|
syftMetadata := &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: metadata.Name,
|
||||||
|
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
|
||||||
|
License: metadata.License,
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
Quantization: metadata.FileTypeDescriptor,
|
||||||
|
Parameters: uint64(metadata.Parameters),
|
||||||
|
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||||
|
TensorCount: ggufFile.Header.TensorCount,
|
||||||
|
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||||
|
TruncatedHeader: false, // We read the full header
|
||||||
|
Hash: "", // Will be computed in newGGUFPackage
|
||||||
|
}
|
||||||
|
|
||||||
|
// If model name is not in metadata, use filename
|
||||||
|
if syftMetadata.ModelName == "" {
|
||||||
|
syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
|
||||||
|
}
|
||||||
|
|
||||||
|
// If version is still unknown, try to infer from name
|
||||||
|
if syftMetadata.ModelVersion == unknownGGUFData {
|
||||||
|
syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create package from metadata
|
||||||
|
p := newGGUFPackage(
|
||||||
|
syftMetadata,
|
||||||
|
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||||
|
)
|
||||||
|
|
||||||
|
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVersion attempts to extract version from metadata KV pairs
|
||||||
|
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if kv.Key == "general.version" {
|
||||||
|
if v, ok := kv.Value.(string); ok && v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return unknownGGUFData
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVersionFromName tries to extract version from model name
|
||||||
|
func extractVersionFromName(_ string) string {
|
||||||
|
// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
|
||||||
|
// For now, return unknown - this could be enhanced with regex
|
||||||
|
return unknownGGUFData
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractModelNameFromPath extracts the model name from the file path
|
||||||
|
func extractModelNameFromPath(path string) string {
|
||||||
|
// Get the base filename
|
||||||
|
base := filepath.Base(path)
|
||||||
|
|
||||||
|
// Remove .gguf extension
|
||||||
|
name := strings.TrimSuffix(base, ".gguf")
|
||||||
|
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
|
||||||
|
// integrity check
|
||||||
|
var _ generic.Parser = parseGGUFModel
|
||||||
@ -1,10 +1,28 @@
|
|||||||
package aiartifact
|
package ai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// GGUF type constants for test builder
|
||||||
|
const (
|
||||||
|
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
||||||
|
ggufTypeUint8 = 0
|
||||||
|
ggufTypeInt8 = 1
|
||||||
|
ggufTypeUint16 = 2
|
||||||
|
ggufTypeInt16 = 3
|
||||||
|
ggufTypeUint32 = 4
|
||||||
|
ggufTypeInt32 = 5
|
||||||
|
ggufTypeFloat32 = 6
|
||||||
|
ggufTypeUint64 = 7
|
||||||
|
ggufTypeInt64 = 8
|
||||||
|
ggufTypeFloat64 = 9
|
||||||
|
ggufTypeBool = 10
|
||||||
|
ggufTypeString = 11
|
||||||
|
ggufTypeArray = 12
|
||||||
|
)
|
||||||
|
|
||||||
// testGGUFBuilder helps build GGUF files for testing
|
// testGGUFBuilder helps build GGUF files for testing
|
||||||
type testGGUFBuilder struct {
|
type testGGUFBuilder struct {
|
||||||
buf *bytes.Buffer
|
buf *bytes.Buffer
|
||||||
@ -1,28 +0,0 @@
|
|||||||
package aiartifact
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/anchore/syft/syft/file"
|
|
||||||
"github.com/anchore/syft/syft/pkg"
|
|
||||||
)
|
|
||||||
|
|
||||||
func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location) pkg.Package {
|
|
||||||
p := pkg.Package{
|
|
||||||
Name: metadata.ModelName,
|
|
||||||
Version: metadata.ModelVersion,
|
|
||||||
Locations: file.NewLocationSet(locations...),
|
|
||||||
Type: pkg.ModelPkg,
|
|
||||||
Licenses: pkg.NewLicenseSet(),
|
|
||||||
Metadata: *metadata,
|
|
||||||
// NOTE: PURL is intentionally not set as the package-url spec
|
|
||||||
// has not yet finalized support for ML model packages
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add license to the package if present in metadata
|
|
||||||
if metadata.License != "" {
|
|
||||||
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
|
|
||||||
}
|
|
||||||
|
|
||||||
p.SetID()
|
|
||||||
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
@ -1,345 +0,0 @@
|
|||||||
package aiartifact
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"crypto/sha256"
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/anchore/syft/internal/log"
|
|
||||||
"github.com/anchore/syft/syft/pkg"
|
|
||||||
)
|
|
||||||
|
|
||||||
// GGUF file format constants
|
|
||||||
const (
|
|
||||||
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
|
||||||
maxKVPairs = 10000 // Safety limit for KV pairs
|
|
||||||
maxKeyLen = 65535 // Maximum key length
|
|
||||||
maxTensors = 100000 // Safety limit for tensors
|
|
||||||
maxHeaderKV = 200 // Maximum KV pairs to include in Header map (to avoid bloat)
|
|
||||||
)
|
|
||||||
|
|
||||||
// GGUF value types (from GGUF spec)
|
|
||||||
const (
|
|
||||||
ggufTypeUint8 = 0
|
|
||||||
ggufTypeInt8 = 1
|
|
||||||
ggufTypeUint16 = 2
|
|
||||||
ggufTypeInt16 = 3
|
|
||||||
ggufTypeUint32 = 4
|
|
||||||
ggufTypeInt32 = 5
|
|
||||||
ggufTypeFloat32 = 6
|
|
||||||
ggufTypeUint64 = 7
|
|
||||||
ggufTypeInt64 = 8
|
|
||||||
ggufTypeFloat64 = 9
|
|
||||||
ggufTypeBool = 10
|
|
||||||
ggufTypeString = 11
|
|
||||||
ggufTypeArray = 12
|
|
||||||
)
|
|
||||||
|
|
||||||
const unknownGGUFData = "unknown"
|
|
||||||
|
|
||||||
//nolint:funlen
|
|
||||||
func parseGGUFHeader(data []byte, location string) (*pkg.GGUFFileMetadata, error) {
|
|
||||||
reader := bytes.NewReader(data)
|
|
||||||
// Read magic number
|
|
||||||
var magic uint32
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &magic); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read magic number: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if magic != ggufMagic {
|
|
||||||
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read version
|
|
||||||
var version uint32
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &version); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read version: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read tensor count
|
|
||||||
var tensorCount uint64
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &tensorCount); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read tensor count: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tensorCount > maxTensors {
|
|
||||||
log.Warnf("GGUF file has suspicious tensor count: %d (max: %d)", tensorCount, maxTensors)
|
|
||||||
tensorCount = maxTensors
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read metadata KV count
|
|
||||||
var kvCount uint64
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &kvCount); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read KV count: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if kvCount > maxKVPairs {
|
|
||||||
log.Warnf("GGUF file has suspicious KV count: %d (max: %d)", kvCount, maxKVPairs)
|
|
||||||
return nil, fmt.Errorf("KV count exceeds safety limit: %d", kvCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse metadata key-value pairs
|
|
||||||
kvMap := make(map[string]any)
|
|
||||||
truncated := false
|
|
||||||
|
|
||||||
for i := uint64(0); i < kvCount; i++ {
|
|
||||||
key, value, err := readKVPair(reader)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("failed to read KV pair %d: %v", i, err)
|
|
||||||
truncated = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if len(kvMap) < maxHeaderKV {
|
|
||||||
kvMap[key] = value
|
|
||||||
} else {
|
|
||||||
truncated = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract common metadata fields
|
|
||||||
metadata := &pkg.GGUFFileMetadata{
|
|
||||||
ModelFormat: "gguf",
|
|
||||||
GGUFVersion: version,
|
|
||||||
TensorCount: tensorCount,
|
|
||||||
Header: kvMap,
|
|
||||||
TruncatedHeader: truncated,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract known fields from KV map and remove them to avoid duplication in Header
|
|
||||||
if arch, ok := kvMap["general.architecture"].(string); ok {
|
|
||||||
metadata.Architecture = arch
|
|
||||||
delete(kvMap, "general.architecture")
|
|
||||||
}
|
|
||||||
|
|
||||||
if name, ok := kvMap["general.name"].(string); ok {
|
|
||||||
metadata.ModelName = name
|
|
||||||
delete(kvMap, "general.name")
|
|
||||||
} else {
|
|
||||||
// Fall back to filename if general.name not present
|
|
||||||
filename := filepath.Base(location)
|
|
||||||
metadata.ModelName = strings.TrimSuffix(filename, filepath.Ext(filename))
|
|
||||||
}
|
|
||||||
|
|
||||||
if license, ok := kvMap["general.license"].(string); ok {
|
|
||||||
metadata.License = license
|
|
||||||
delete(kvMap, "general.license")
|
|
||||||
}
|
|
||||||
|
|
||||||
if version, ok := kvMap["general.version"].(string); ok {
|
|
||||||
metadata.ModelVersion = version
|
|
||||||
delete(kvMap, "general.version")
|
|
||||||
} else {
|
|
||||||
metadata.ModelVersion = unknownGGUFData
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract parameters count if present
|
|
||||||
if params, ok := kvMap["general.parameter_count"].(uint64); ok {
|
|
||||||
metadata.Parameters = params
|
|
||||||
delete(kvMap, "general.parameter_count")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to infer quantization from general.quantization or from filename
|
|
||||||
if quant, ok := kvMap["general.quantization"].(string); ok {
|
|
||||||
metadata.Quantization = quant
|
|
||||||
delete(kvMap, "general.quantization")
|
|
||||||
} else if quantizedBy, ok := kvMap["general.quantized_by"].(string); ok && quantizedBy != "" {
|
|
||||||
// If quantized but no explicit quantization field, try to extract from filename
|
|
||||||
metadata.Quantization = inferQuantizationFromFilename(location)
|
|
||||||
// Note: we keep general.quantized_by in Header since it's not directly mapped to a field
|
|
||||||
} else {
|
|
||||||
metadata.Quantization = unknownGGUFData
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute hash of metadata for stable identifier
|
|
||||||
metadata.Hash = computeMetadataHash(metadata)
|
|
||||||
|
|
||||||
return metadata, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// readKVPair reads a single key-value pair from the GGUF header
|
|
||||||
func readKVPair(reader io.Reader) (string, interface{}, error) {
|
|
||||||
// Read key length
|
|
||||||
var keyLen uint64
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &keyLen); err != nil {
|
|
||||||
return "", nil, fmt.Errorf("failed to read key length: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if keyLen > maxKeyLen {
|
|
||||||
return "", nil, fmt.Errorf("key length exceeds maximum: %d", keyLen)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read key
|
|
||||||
keyBytes := make([]byte, keyLen)
|
|
||||||
if _, err := io.ReadFull(reader, keyBytes); err != nil {
|
|
||||||
return "", nil, fmt.Errorf("failed to read key: %w", err)
|
|
||||||
}
|
|
||||||
key := string(keyBytes)
|
|
||||||
|
|
||||||
// Read value type
|
|
||||||
var valueType uint32
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &valueType); err != nil {
|
|
||||||
return "", nil, fmt.Errorf("failed to read value type: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read value based on type
|
|
||||||
value, err := readValue(reader, valueType)
|
|
||||||
if err != nil {
|
|
||||||
return "", nil, fmt.Errorf("failed to read value for key %s: %w", key, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return key, value, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
//nolint:funlen
|
|
||||||
func readValue(reader io.Reader, valueType uint32) (any, error) {
|
|
||||||
switch valueType {
|
|
||||||
case ggufTypeUint8:
|
|
||||||
var v uint8
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeInt8:
|
|
||||||
var v int8
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeUint16:
|
|
||||||
var v uint16
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeInt16:
|
|
||||||
var v int16
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeUint32:
|
|
||||||
var v uint32
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeInt32:
|
|
||||||
var v int32
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeFloat32:
|
|
||||||
var v float32
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeUint64:
|
|
||||||
var v uint64
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeInt64:
|
|
||||||
var v int64
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeFloat64:
|
|
||||||
var v float64
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v, err
|
|
||||||
case ggufTypeBool:
|
|
||||||
var v uint8
|
|
||||||
err := binary.Read(reader, binary.LittleEndian, &v)
|
|
||||||
return v != 0, err
|
|
||||||
case ggufTypeString:
|
|
||||||
return readString(reader)
|
|
||||||
case ggufTypeArray:
|
|
||||||
return readArray(reader)
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unknown value type: %d", valueType)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// readString reads a length-prefixed UTF-8 string
|
|
||||||
func readString(reader io.Reader) (string, error) {
|
|
||||||
var length uint64
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &length); err != nil {
|
|
||||||
return "", fmt.Errorf("failed to read string length: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if length > maxKeyLen {
|
|
||||||
return "", fmt.Errorf("string length exceeds maximum: %d", length)
|
|
||||||
}
|
|
||||||
|
|
||||||
strBytes := make([]byte, length)
|
|
||||||
if _, err := io.ReadFull(reader, strBytes); err != nil {
|
|
||||||
return "", fmt.Errorf("failed to read string: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return string(strBytes), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// readArray reads an array value
|
|
||||||
func readArray(reader io.Reader) (interface{}, error) {
|
|
||||||
// Read array element type
|
|
||||||
var elemType uint32
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &elemType); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read array element type: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read array length
|
|
||||||
var length uint64
|
|
||||||
if err := binary.Read(reader, binary.LittleEndian, &length); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read array length: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if length > 1000 {
|
|
||||||
// Limit array size to avoid memory issues
|
|
||||||
return nil, fmt.Errorf("array length too large: %d", length)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read array elements
|
|
||||||
var elements []interface{}
|
|
||||||
for i := uint64(0); i < length; i++ {
|
|
||||||
value, err := readValue(reader, elemType)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read array element %d: %w", i, err)
|
|
||||||
}
|
|
||||||
elements = append(elements, value)
|
|
||||||
}
|
|
||||||
|
|
||||||
return elements, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// inferQuantizationFromFilename attempts to extract quantization info from filename
|
|
||||||
func inferQuantizationFromFilename(filename string) string {
|
|
||||||
// Common quantization patterns: Q4_K_M, IQ4_NL, Q5_K_S, etc.
|
|
||||||
quantPattern := regexp.MustCompile(`[IQ]\d+_[A-Z_]+`)
|
|
||||||
if match := quantPattern.FindString(filename); match != "" {
|
|
||||||
return match
|
|
||||||
}
|
|
||||||
return unknownGGUFData
|
|
||||||
}
|
|
||||||
|
|
||||||
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
|
|
||||||
func computeMetadataHash(metadata *pkg.GGUFFileMetadata) string {
|
|
||||||
// Create a stable representation of the metadata
|
|
||||||
hashData := struct {
|
|
||||||
Format string
|
|
||||||
Name string
|
|
||||||
Version string
|
|
||||||
Architecture string
|
|
||||||
GGUFVersion uint32
|
|
||||||
TensorCount uint64
|
|
||||||
}{
|
|
||||||
Format: metadata.ModelFormat,
|
|
||||||
Name: metadata.ModelName,
|
|
||||||
Version: metadata.ModelVersion,
|
|
||||||
Architecture: metadata.Architecture,
|
|
||||||
GGUFVersion: metadata.GGUFVersion,
|
|
||||||
TensorCount: metadata.TensorCount,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Marshal to JSON for stable hashing
|
|
||||||
jsonBytes, err := json.Marshal(hashData)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("failed to marshal metadata for hashing: %v", err)
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute SHA256 hash
|
|
||||||
hash := sha256.Sum256(jsonBytes)
|
|
||||||
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
|
|
||||||
}
|
|
||||||
@ -1,68 +0,0 @@
|
|||||||
package aiartifact
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
|
|
||||||
"github.com/anchore/syft/internal"
|
|
||||||
"github.com/anchore/syft/internal/log"
|
|
||||||
"github.com/anchore/syft/internal/unknown"
|
|
||||||
"github.com/anchore/syft/syft/artifact"
|
|
||||||
"github.com/anchore/syft/syft/file"
|
|
||||||
"github.com/anchore/syft/syft/pkg"
|
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
|
||||||
)
|
|
||||||
|
|
||||||
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
|
||||||
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
|
||||||
defer internal.CloseAndLogError(reader, reader.Path())
|
|
||||||
|
|
||||||
// Read header (we'll read a reasonable amount to parse the header without reading entire file)
|
|
||||||
// GGUF headers are typically < 1MB, but we'll use a 10MB limit to be safe
|
|
||||||
const maxHeaderSize = 10 * 1024 * 1024
|
|
||||||
limitedReader := io.LimitReader(reader, maxHeaderSize)
|
|
||||||
|
|
||||||
// We need to buffer the data because we need to check magic and parse
|
|
||||||
headerData := make([]byte, 0, 8192) // Start with 8KB buffer
|
|
||||||
buf := make([]byte, 8192)
|
|
||||||
for {
|
|
||||||
n, err := limitedReader.Read(buf)
|
|
||||||
if n > 0 {
|
|
||||||
headerData = append(headerData, buf[:n]...)
|
|
||||||
}
|
|
||||||
if err == io.EOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, fmt.Errorf("error reading file: %w", err)
|
|
||||||
}
|
|
||||||
// Stop if we've read enough for a reasonable header
|
|
||||||
if len(headerData) > maxHeaderSize {
|
|
||||||
log.Warnf("GGUF header at %s exceeds max size, truncating", reader.Path())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if this is actually a GGUF file
|
|
||||||
if len(headerData) < 4 {
|
|
||||||
return nil, nil, fmt.Errorf("file too small to be a valid GGUF file")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the GGUF header
|
|
||||||
metadata, err := parseGGUFHeader(headerData, reader.Path())
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create package from metadata
|
|
||||||
p := newGGUFPackage(
|
|
||||||
metadata,
|
|
||||||
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
|
||||||
)
|
|
||||||
|
|
||||||
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
|
||||||
}
|
|
||||||
|
|
||||||
// integrity check
|
|
||||||
var _ generic.Parser = parseGGUFModel
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -266,8 +266,6 @@ func TypeByName(name string) Type {
|
|||||||
return WordpressPluginPkg
|
return WordpressPluginPkg
|
||||||
case "homebrew":
|
case "homebrew":
|
||||||
return HomebrewPkg
|
return HomebrewPkg
|
||||||
case "model":
|
|
||||||
return ModelPkg
|
|
||||||
default:
|
default:
|
||||||
return UnknownPkg
|
return UnknownPkg
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user