mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 00:13:15 +01:00
Compare commits
2 Commits
2e100f33f3
...
89842bd2f6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
89842bd2f6 | ||
|
|
4a60c41f38 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -73,3 +73,5 @@ cosign.pub
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
|
||||
|
||||
@ -106,8 +106,8 @@ syft <image> -o <format>
|
||||
Where the `formats` available are:
|
||||
- `syft-json`: Use this to get as much information out of Syft as possible!
|
||||
- `syft-text`: A row-oriented, human-and-machine-friendly output.
|
||||
- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/).
|
||||
|
||||
@ -87,6 +87,7 @@ func TestPkgCoverageImage(t *testing.T) {
|
||||
definedPkgs.Remove(string(pkg.TerraformPkg))
|
||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||
|
||||
var cases []testCase
|
||||
cases = append(cases, commonTestCases...)
|
||||
@ -161,6 +162,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
|
||||
definedPkgs.Remove(string(pkg.UnknownPkg))
|
||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
||||
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||
|
||||
// for directory scans we should not expect to see any of the following package types
|
||||
definedPkgs.Remove(string(pkg.KbPkg))
|
||||
|
||||
20
go.mod
20
go.mod
@ -11,7 +11,6 @@ require (
|
||||
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
|
||||
github.com/acobaugh/osrelease v0.1.0
|
||||
github.com/adrg/xdg v0.5.3
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
|
||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
|
||||
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
|
||||
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
|
||||
@ -168,7 +167,6 @@ require (
|
||||
github.com/goccy/go-yaml v1.18.0
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
|
||||
github.com/google/s2a-go v0.1.8 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
||||
@ -209,10 +207,6 @@ require (
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/ncruces/go-strftime v0.1.9 // indirect
|
||||
github.com/nwaples/rardecode v1.1.3 // indirect
|
||||
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
|
||||
github.com/olekukonko/errors v1.1.0 // indirect
|
||||
github.com/olekukonko/ll v0.1.2 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.1 // indirect
|
||||
github.com/opencontainers/runtime-spec v1.1.0 // indirect
|
||||
github.com/opencontainers/selinux v1.13.0 // indirect
|
||||
@ -286,6 +280,11 @@ require (
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/cespare/xxhash/v2 v2.3.0
|
||||
github.com/gpustack/gguf-parser-go v0.22.1
|
||||
)
|
||||
|
||||
require (
|
||||
cyphar.com/go-pathrs v0.2.1 // indirect
|
||||
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
||||
@ -310,7 +309,16 @@ require (
|
||||
github.com/clipperhouse/stringish v0.1.1 // indirect
|
||||
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
|
||||
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
||||
github.com/henvic/httpretty v0.1.4 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
|
||||
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
|
||||
github.com/olekukonko/errors v1.1.0 // indirect
|
||||
github.com/olekukonko/ll v0.1.2 // indirect
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||
gonum.org/v1/gonum v0.15.1 // indirect
|
||||
)
|
||||
|
||||
retract (
|
||||
|
||||
18
go.sum
18
go.sum
@ -110,8 +110,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
|
||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
|
||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
|
||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
|
||||
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
|
||||
@ -229,7 +227,6 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy
|
||||
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
|
||||
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
|
||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
@ -480,8 +477,6 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||
@ -549,6 +544,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
|
||||
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
||||
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
||||
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
||||
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
|
||||
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||
@ -598,6 +595,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
||||
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
||||
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
||||
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
||||
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||
@ -625,6 +624,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||
@ -730,9 +730,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||
@ -749,8 +751,6 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+
|
||||
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
|
||||
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
|
||||
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
|
||||
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
|
||||
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
||||
github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A=
|
||||
github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
|
||||
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=
|
||||
@ -860,6 +860,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
|
||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
||||
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
||||
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
||||
@ -1313,6 +1315,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||
|
||||
@ -3,5 +3,5 @@ package internal
|
||||
const (
|
||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||
JSONSchemaVersion = "16.0.42"
|
||||
JSONSchemaVersion = "16.0.43"
|
||||
)
|
||||
|
||||
@ -1,17 +1,40 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/archiver/v3"
|
||||
"github.com/anchore/syft/internal"
|
||||
)
|
||||
|
||||
// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
|
||||
func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
|
||||
tarReader, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
|
||||
}
|
||||
defer internal.CloseAndLogError(tarReader, archivePath)
|
||||
|
||||
format, _, err := archives.Identify(ctx, archivePath, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to identify tar compression format: %w", err)
|
||||
}
|
||||
|
||||
extractor, ok := format.(archives.Extractor)
|
||||
if !ok {
|
||||
return fmt.Errorf("file format does not support extraction: %s", archivePath)
|
||||
}
|
||||
|
||||
return extractor.Extract(ctx, tarReader, visitor)
|
||||
}
|
||||
|
||||
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
|
||||
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) {
|
||||
func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
|
||||
results := make(map[string]Opener)
|
||||
|
||||
// don't allow for full traversal, only select traversal from given paths
|
||||
@ -19,9 +42,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
|
||||
return results, nil
|
||||
}
|
||||
|
||||
visitor := func(file archiver.File) error {
|
||||
defer file.Close()
|
||||
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
// ignore directories
|
||||
if file.IsDir() {
|
||||
return nil
|
||||
@ -43,7 +64,13 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
|
||||
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
|
||||
defer tempFile.Close()
|
||||
|
||||
if err := safeCopy(tempFile, file.ReadCloser); err != nil {
|
||||
packedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
defer internal.CloseAndLogError(packedFile, archivePath)
|
||||
|
||||
if err := safeCopy(tempFile, packedFile); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
|
||||
}
|
||||
|
||||
@ -52,7 +79,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
|
||||
return nil
|
||||
}
|
||||
|
||||
return results, archiver.Walk(archivePath, visitor)
|
||||
return results, TraverseFilesInTar(ctx, archivePath, visitor)
|
||||
}
|
||||
|
||||
func matchesAnyGlob(name string, globs ...string) bool {
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"github.com/scylladb/go-set/strset"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
@ -14,22 +16,25 @@ import (
|
||||
type ZipFileManifest map[string]os.FileInfo
|
||||
|
||||
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
|
||||
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) {
|
||||
zipReader, err := OpenZip(archivePath)
|
||||
func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) {
|
||||
zipReader, err := os.Open(archivePath)
|
||||
manifest := make(ZipFileManifest)
|
||||
if err != nil {
|
||||
log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
|
||||
return manifest, err
|
||||
}
|
||||
defer func() {
|
||||
err = zipReader.Close()
|
||||
if err != nil {
|
||||
if err = zipReader.Close(); err != nil {
|
||||
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, file := range zipReader.File {
|
||||
manifest.Add(file.Name, file.FileInfo())
|
||||
err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error {
|
||||
manifest.Add(file.NameInArchive, file.FileInfo)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return manifest, err
|
||||
}
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path"
|
||||
@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) {
|
||||
|
||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
||||
|
||||
actual, err := NewZipFileManifest(archiveFilePath)
|
||||
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) {
|
||||
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
|
||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
|
||||
|
||||
actual, err := NewZipFileManifest(archiveFilePath)
|
||||
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
|
||||
|
||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
||||
|
||||
z, err := NewZipFileManifest(archiveFilePath)
|
||||
z, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
|
||||
@ -1,13 +1,15 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
)
|
||||
|
||||
@ -25,7 +27,7 @@ type errZipSlipDetected struct {
|
||||
}
|
||||
|
||||
func (e *errZipSlipDetected) Error() string {
|
||||
return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
|
||||
return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
|
||||
}
|
||||
|
||||
type zipTraversalRequest map[string]struct{}
|
||||
@ -39,38 +41,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest {
|
||||
}
|
||||
|
||||
// TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
|
||||
func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error {
|
||||
func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error {
|
||||
request := newZipTraverseRequest(paths...)
|
||||
|
||||
zipReader, err := OpenZip(archivePath)
|
||||
zipReader, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
err = zipReader.Close()
|
||||
if err != nil {
|
||||
if err := zipReader.Close(); err != nil {
|
||||
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, file := range zipReader.File {
|
||||
return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
|
||||
// if no paths are given then assume that all files should be traversed
|
||||
if len(paths) > 0 {
|
||||
if _, ok := request[file.Name]; !ok {
|
||||
if _, ok := request[file.NameInArchive]; !ok {
|
||||
// this file path is not of interest
|
||||
continue
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if err = visitor(file); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return visitor(ctx, file)
|
||||
})
|
||||
}
|
||||
|
||||
// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
|
||||
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
|
||||
func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) {
|
||||
results := make(map[string]Opener)
|
||||
|
||||
// don't allow for full traversal, only select traversal from given paths
|
||||
@ -78,9 +76,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
|
||||
return results, nil
|
||||
}
|
||||
|
||||
visitor := func(file *zip.File) error {
|
||||
tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-"
|
||||
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-"
|
||||
tempFile, err := os.CreateTemp(dir, tempfilePrefix)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create temp file: %w", err)
|
||||
@ -92,33 +89,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
|
||||
|
||||
zippedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
err := zippedFile.Close()
|
||||
if err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
|
||||
if err := zippedFile.Close(); err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if file.FileInfo().IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
||||
if file.IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
|
||||
}
|
||||
|
||||
if err := safeCopy(tempFile, zippedFile); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
|
||||
results[file.Name] = Opener{path: tempFile.Name()}
|
||||
results[file.NameInArchive] = Opener{path: tempFile.Name()}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return results, TraverseFilesInZip(archivePath, visitor, paths...)
|
||||
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
|
||||
}
|
||||
|
||||
// ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
|
||||
func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) {
|
||||
func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) {
|
||||
results := make(map[string]string)
|
||||
|
||||
// don't allow for full traversal, only select traversal from given paths
|
||||
@ -126,37 +122,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er
|
||||
return results, nil
|
||||
}
|
||||
|
||||
visitor := func(file *zip.File) error {
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
zippedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := zippedFile.Close(); err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if file.FileInfo().IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
||||
if file.IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
|
||||
}
|
||||
|
||||
var buffer bytes.Buffer
|
||||
if err := safeCopy(&buffer, zippedFile); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
|
||||
results[file.Name] = buffer.String()
|
||||
results[file.NameInArchive] = buffer.String()
|
||||
|
||||
err = zippedFile.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return results, TraverseFilesInZip(archivePath, visitor, paths...)
|
||||
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
|
||||
}
|
||||
|
||||
// UnzipToDir extracts a zip archive to a target directory.
|
||||
func UnzipToDir(archivePath, targetDir string) error {
|
||||
visitor := func(file *zip.File) error {
|
||||
joinedPath, err := safeJoin(targetDir, file.Name)
|
||||
func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
joinedPath, err := SafeJoin(targetDir, file.NameInArchive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -164,11 +161,11 @@ func UnzipToDir(archivePath, targetDir string) error {
|
||||
return extractSingleFile(file, joinedPath, archivePath)
|
||||
}
|
||||
|
||||
return TraverseFilesInZip(archivePath, visitor)
|
||||
return TraverseFilesInZip(ctx, archivePath, visitor)
|
||||
}
|
||||
|
||||
// safeJoin ensures that any destinations do not resolve to a path above the prefix path.
|
||||
func safeJoin(prefix string, dest ...string) (string, error) {
|
||||
// SafeJoin ensures that any destinations do not resolve to a path above the prefix path.
|
||||
func SafeJoin(prefix string, dest ...string) (string, error) {
|
||||
joinResult := filepath.Join(append([]string{prefix}, dest...)...)
|
||||
cleanJoinResult := filepath.Clean(joinResult)
|
||||
if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
|
||||
@ -181,13 +178,18 @@ func safeJoin(prefix string, dest ...string) (string, error) {
|
||||
return joinResult, nil
|
||||
}
|
||||
|
||||
func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error {
|
||||
func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error {
|
||||
zippedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := zippedFile.Close(); err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if file.FileInfo().IsDir() {
|
||||
if file.IsDir() {
|
||||
err = os.MkdirAll(expandedFilePath, file.Mode())
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
||||
@ -202,20 +204,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := outputFile.Close(); err != nil {
|
||||
log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := safeCopy(outputFile, zippedFile); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err)
|
||||
}
|
||||
|
||||
err = outputFile.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err)
|
||||
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err)
|
||||
}
|
||||
}
|
||||
|
||||
err = zippedFile.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@ -17,6 +19,7 @@ import (
|
||||
|
||||
"github.com/go-test/deep"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func equal(r1, r2 io.Reader) (bool, error) {
|
||||
@ -55,7 +58,7 @@ func TestUnzipToDir(t *testing.T) {
|
||||
expectedPaths := len(expectedZipArchiveEntries)
|
||||
observedPaths := 0
|
||||
|
||||
err = UnzipToDir(archiveFilePath, unzipDestinationDir)
|
||||
err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to unzip archive: %+v", err)
|
||||
}
|
||||
@ -145,7 +148,7 @@ func TestContentsFromZip(t *testing.T) {
|
||||
paths = append(paths, p)
|
||||
}
|
||||
|
||||
actual, err := ContentsFromZip(archivePath, paths...)
|
||||
actual, err := ContentsFromZip(context.Background(), archivePath, paths...)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
@ -307,9 +310,528 @@ func TestSafeJoin(t *testing.T) {
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) {
|
||||
actual, err := safeJoin(test.prefix, test.args...)
|
||||
actual, err := SafeJoin(test.prefix, test.args...)
|
||||
test.errAssertion(t, err)
|
||||
assert.Equal(t, test.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSymlinkProtection demonstrates that SafeJoin protects against symlink-based
|
||||
// directory traversal attacks by validating that archive entry paths cannot escape
|
||||
// the extraction directory.
|
||||
func TestSafeJoin_SymlinkProtection(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
archivePath string // Path as it would appear in the archive
|
||||
expectError bool
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "path traversal via ../",
|
||||
archivePath: "../../../outside/file.txt",
|
||||
expectError: true,
|
||||
description: "Archive entry with ../ trying to escape extraction dir",
|
||||
},
|
||||
{
|
||||
name: "absolute path symlink target",
|
||||
archivePath: "../../../sensitive.txt",
|
||||
expectError: true,
|
||||
description: "Simulates symlink pointing outside via relative path",
|
||||
},
|
||||
{
|
||||
name: "safe relative path within extraction dir",
|
||||
archivePath: "subdir/safe.txt",
|
||||
expectError: false,
|
||||
description: "Normal file path that stays within extraction directory",
|
||||
},
|
||||
{
|
||||
name: "safe path with internal ../",
|
||||
archivePath: "dir1/../dir2/file.txt",
|
||||
expectError: false,
|
||||
description: "Path with ../ that still resolves within extraction dir",
|
||||
},
|
||||
{
|
||||
name: "deeply nested traversal",
|
||||
archivePath: "../../../../../../tmp/evil.txt",
|
||||
expectError: true,
|
||||
description: "Multiple levels of ../ trying to escape",
|
||||
},
|
||||
{
|
||||
name: "single parent directory escape",
|
||||
archivePath: "../",
|
||||
expectError: true,
|
||||
description: "Simple one-level escape attempt",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create temp directories to simulate extraction scenario
|
||||
tmpDir := t.TempDir()
|
||||
extractDir := filepath.Join(tmpDir, "extract")
|
||||
outsideDir := filepath.Join(tmpDir, "outside")
|
||||
|
||||
require.NoError(t, os.MkdirAll(extractDir, 0755))
|
||||
require.NoError(t, os.MkdirAll(outsideDir, 0755))
|
||||
|
||||
// Create a file outside extraction dir that an attacker might target
|
||||
outsideFile := filepath.Join(outsideDir, "sensitive.txt")
|
||||
require.NoError(t, os.WriteFile(outsideFile, []byte("sensitive data"), 0644))
|
||||
|
||||
// Test SafeJoin - this is what happens when processing archive entries
|
||||
result, err := SafeJoin(extractDir, tt.archivePath)
|
||||
|
||||
if tt.expectError {
|
||||
// Should block malicious paths
|
||||
require.Error(t, err, "Expected SafeJoin to reject malicious path")
|
||||
var zipSlipErr *errZipSlipDetected
|
||||
assert.ErrorAs(t, err, &zipSlipErr, "Error should be errZipSlipDetected type")
|
||||
assert.Empty(t, result, "Result should be empty for blocked paths")
|
||||
} else {
|
||||
// Should allow safe paths
|
||||
require.NoError(t, err, "Expected SafeJoin to allow safe path")
|
||||
assert.NotEmpty(t, result, "Result should not be empty for safe paths")
|
||||
assert.True(t, strings.HasPrefix(filepath.Clean(result), filepath.Clean(extractDir)),
|
||||
"Safe path should resolve within extraction directory")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestUnzipToDir_SymlinkAttacks tests UnzipToDir function with malicious ZIP archives
|
||||
// containing symlink entries that attempt path traversal attacks.
|
||||
//
|
||||
// EXPECTED BEHAVIOR: UnzipToDir should either:
|
||||
// 1. Detect and reject symlinks explicitly with a security error, OR
|
||||
// 2. Extract them safely (library converts symlinks to regular files)
|
||||
func TestUnzipToDir_SymlinkAttacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
symlinkName string
|
||||
fileName string
|
||||
errContains string
|
||||
}{
|
||||
{
|
||||
name: "direct symlink to outside directory",
|
||||
symlinkName: "evil_link",
|
||||
fileName: "evil_link/payload.txt",
|
||||
errContains: "not a directory", // attempt to write through symlink leaf (which is not a directory)
|
||||
},
|
||||
{
|
||||
name: "directory symlink attack",
|
||||
symlinkName: "safe_dir/link",
|
||||
fileName: "safe_dir/link/payload.txt",
|
||||
errContains: "not a directory", // attempt to write through symlink (which is not a directory)
|
||||
},
|
||||
{
|
||||
name: "symlink without payload file",
|
||||
symlinkName: "standalone_link",
|
||||
fileName: "", // no payload file
|
||||
errContains: "", // no error expected, symlink without payload is safe
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// create outside target directory
|
||||
outsideDir := filepath.Join(tempDir, "outside_target")
|
||||
require.NoError(t, os.MkdirAll(outsideDir, 0755))
|
||||
|
||||
// create extraction directory
|
||||
extractDir := filepath.Join(tempDir, "extract")
|
||||
require.NoError(t, os.MkdirAll(extractDir, 0755))
|
||||
|
||||
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, outsideDir, tt.fileName)
|
||||
|
||||
err := UnzipToDir(context.Background(), maliciousZip, extractDir)
|
||||
|
||||
// check error expectations
|
||||
if tt.errContains != "" {
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tt.errContains)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
analyzeExtractionDirectory(t, extractDir)
|
||||
|
||||
// check if payload file escaped extraction directory
|
||||
if tt.fileName != "" {
|
||||
maliciousFile := filepath.Join(outsideDir, filepath.Base(tt.fileName))
|
||||
checkFileOutsideExtraction(t, maliciousFile)
|
||||
}
|
||||
|
||||
// check if symlink was created pointing outside
|
||||
symlinkPath := filepath.Join(extractDir, tt.symlinkName)
|
||||
checkSymlinkCreation(t, symlinkPath, extractDir, outsideDir)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestContentsFromZip_SymlinkAttacks tests the ContentsFromZip function with malicious
|
||||
// ZIP archives containing symlink entries.
|
||||
//
|
||||
// EXPECTED BEHAVIOR: ContentsFromZip should either:
|
||||
// 1. Reject symlinks explicitly, OR
|
||||
// 2. Return empty content for symlinks (library behavior)
|
||||
//
|
||||
// Though ContentsFromZip doesn't write to disk, but if symlinks are followed, it could read sensitive
|
||||
// files from outside the archive.
|
||||
func TestContentsFromZip_SymlinkAttacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
symlinkName string
|
||||
symlinkTarget string
|
||||
requestPath string
|
||||
errContains string
|
||||
}{
|
||||
{
|
||||
name: "request symlink entry directly",
|
||||
symlinkName: "evil_link",
|
||||
symlinkTarget: "/etc/hosts", // attempt to read sensitive file
|
||||
requestPath: "evil_link",
|
||||
errContains: "", // no error expected - library returns symlink metadata
|
||||
},
|
||||
{
|
||||
name: "symlink in nested directory",
|
||||
symlinkName: "nested/link",
|
||||
symlinkTarget: "/etc/hosts",
|
||||
requestPath: "nested/link",
|
||||
errContains: "", // no error expected - library returns symlink metadata
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// create malicious ZIP with symlink entry (no payload file needed)
|
||||
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
|
||||
|
||||
contents, err := ContentsFromZip(context.Background(), maliciousZip, tt.requestPath)
|
||||
|
||||
// check error expectations
|
||||
if tt.errContains != "" {
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tt.errContains)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
// verify symlink handling - library should return symlink target as content (metadata)
|
||||
content, found := contents[tt.requestPath]
|
||||
require.True(t, found, "symlink entry should be found in results")
|
||||
|
||||
// verify symlink was NOT followed (content should be target path or empty)
|
||||
if content != "" && content != tt.symlinkTarget {
|
||||
// content is not empty and not the symlink target - check if actual file was read
|
||||
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
|
||||
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
|
||||
if readErr == nil && string(targetContent) == content {
|
||||
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was read!")
|
||||
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
|
||||
t.Logf(" content length: %d bytes", len(content))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractFromZipToUniqueTempFile_SymlinkAttacks tests the ExtractFromZipToUniqueTempFile
|
||||
// function with malicious ZIP archives containing symlink entries.
|
||||
//
|
||||
// EXPECTED BEHAVIOR: ExtractFromZipToUniqueTempFile should either:
|
||||
// 1. Reject symlinks explicitly, OR
|
||||
// 2. Extract them safely (library converts to empty files, filepath.Base sanitizes names)
|
||||
//
|
||||
// This function uses filepath.Base() on the archive entry name for temp file prefix and
|
||||
// os.CreateTemp() which creates files in the specified directory, so it should be protected.
|
||||
func TestExtractFromZipToUniqueTempFile_SymlinkAttacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
symlinkName string
|
||||
symlinkTarget string
|
||||
requestPath string
|
||||
errContains string
|
||||
}{
|
||||
{
|
||||
name: "extract symlink entry to temp file",
|
||||
symlinkName: "evil_link",
|
||||
symlinkTarget: "/etc/passwd",
|
||||
requestPath: "evil_link",
|
||||
errContains: "", // no error expected - library extracts symlink metadata
|
||||
},
|
||||
{
|
||||
name: "extract nested symlink",
|
||||
symlinkName: "nested/dir/link",
|
||||
symlinkTarget: "/tmp/outside",
|
||||
requestPath: "nested/dir/link",
|
||||
errContains: "", // no error expected
|
||||
},
|
||||
{
|
||||
name: "extract path traversal symlink name",
|
||||
symlinkName: "../../escape",
|
||||
symlinkTarget: "/tmp/outside",
|
||||
requestPath: "../../escape",
|
||||
errContains: "", // no error expected - filepath.Base sanitizes name
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
|
||||
|
||||
// create temp directory for extraction
|
||||
extractTempDir := filepath.Join(tempDir, "temp_extract")
|
||||
require.NoError(t, os.MkdirAll(extractTempDir, 0755))
|
||||
|
||||
openers, err := ExtractFromZipToUniqueTempFile(context.Background(), maliciousZip, extractTempDir, tt.requestPath)
|
||||
|
||||
// check error expectations
|
||||
if tt.errContains != "" {
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tt.errContains)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
// verify symlink was extracted
|
||||
opener, found := openers[tt.requestPath]
|
||||
require.True(t, found, "symlink entry should be extracted")
|
||||
|
||||
// verify temp file is within temp directory
|
||||
tempFilePath := opener.path
|
||||
cleanTempDir := filepath.Clean(extractTempDir)
|
||||
cleanTempFile := filepath.Clean(tempFilePath)
|
||||
require.True(t, strings.HasPrefix(cleanTempFile, cleanTempDir),
|
||||
"temp file must be within temp directory: %s not in %s", cleanTempFile, cleanTempDir)
|
||||
|
||||
// verify symlink was NOT followed (content should be target path or empty)
|
||||
f, openErr := opener.Open()
|
||||
require.NoError(t, openErr)
|
||||
defer f.Close()
|
||||
|
||||
content, readErr := io.ReadAll(f)
|
||||
require.NoError(t, readErr)
|
||||
|
||||
// check if symlink was followed (content matches actual file)
|
||||
if len(content) > 0 && string(content) != tt.symlinkTarget {
|
||||
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
|
||||
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
|
||||
if readErr == nil && string(targetContent) == string(content) {
|
||||
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was copied!")
|
||||
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
|
||||
t.Logf(" content length: %d bytes", len(content))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// forensicFindings contains the results of analyzing an extraction directory
|
||||
type forensicFindings struct {
|
||||
symlinksFound []forensicSymlink
|
||||
regularFiles []string
|
||||
directories []string
|
||||
symlinkVulnerabilities []string
|
||||
}
|
||||
|
||||
type forensicSymlink struct {
|
||||
path string
|
||||
target string
|
||||
escapesExtraction bool
|
||||
resolvedPath string
|
||||
}
|
||||
|
||||
// analyzeExtractionDirectory walks the extraction directory and detects symlinks that point
|
||||
// outside the extraction directory. It is silent unless vulnerabilities are found.
|
||||
func analyzeExtractionDirectory(t *testing.T, extractDir string) forensicFindings {
|
||||
t.Helper()
|
||||
|
||||
findings := forensicFindings{}
|
||||
|
||||
filepath.Walk(extractDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
// only log if there's an error walking the directory
|
||||
t.Logf("Error walking %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
relPath := strings.TrimPrefix(path, extractDir+"/")
|
||||
if relPath == "" {
|
||||
relPath = "."
|
||||
}
|
||||
|
||||
// use Lstat to detect symlinks without following them
|
||||
linfo, lerr := os.Lstat(path)
|
||||
if lerr == nil && linfo.Mode()&os.ModeSymlink != 0 {
|
||||
target, _ := os.Readlink(path)
|
||||
|
||||
// resolve to see where it actually points
|
||||
var resolvedPath string
|
||||
var escapesExtraction bool
|
||||
|
||||
if filepath.IsAbs(target) {
|
||||
// absolute symlink
|
||||
resolvedPath = target
|
||||
cleanExtractDir := filepath.Clean(extractDir)
|
||||
escapesExtraction = !strings.HasPrefix(filepath.Clean(target), cleanExtractDir)
|
||||
|
||||
if escapesExtraction {
|
||||
t.Errorf("critical issue!... absolute symlink created: %s → %s", relPath, target)
|
||||
t.Logf(" this symlink points outside the extraction directory")
|
||||
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
|
||||
fmt.Sprintf("absolute symlink: %s → %s", relPath, target))
|
||||
}
|
||||
} else {
|
||||
// relative symlink - resolve it
|
||||
resolvedPath = filepath.Join(filepath.Dir(path), target)
|
||||
cleanResolved := filepath.Clean(resolvedPath)
|
||||
cleanExtractDir := filepath.Clean(extractDir)
|
||||
|
||||
escapesExtraction = !strings.HasPrefix(cleanResolved, cleanExtractDir)
|
||||
|
||||
if escapesExtraction {
|
||||
t.Errorf("critical issue!... symlink escapes extraction dir: %s → %s", relPath, target)
|
||||
t.Logf(" symlink resolves to: %s (outside extraction directory)", cleanResolved)
|
||||
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
|
||||
fmt.Sprintf("relative symlink escape: %s → %s (resolves to %s)", relPath, target, cleanResolved))
|
||||
}
|
||||
}
|
||||
|
||||
findings.symlinksFound = append(findings.symlinksFound, forensicSymlink{
|
||||
path: relPath,
|
||||
target: target,
|
||||
escapesExtraction: escapesExtraction,
|
||||
resolvedPath: resolvedPath,
|
||||
})
|
||||
} else {
|
||||
// regular file or directory - collect silently
|
||||
if info.IsDir() {
|
||||
findings.directories = append(findings.directories, relPath)
|
||||
} else {
|
||||
findings.regularFiles = append(findings.regularFiles, relPath)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return findings
|
||||
}
|
||||
|
||||
// checkFileOutsideExtraction checks if a file was written outside the extraction directory.
|
||||
// Returns true if the file exists (vulnerability), false otherwise. Silent on success.
|
||||
func checkFileOutsideExtraction(t *testing.T, filePath string) bool {
|
||||
t.Helper()
|
||||
|
||||
if stat, err := os.Stat(filePath); err == nil {
|
||||
content, _ := os.ReadFile(filePath)
|
||||
t.Errorf("critical issue!... file written OUTSIDE extraction directory!")
|
||||
t.Logf(" location: %s", filePath)
|
||||
t.Logf(" size: %d bytes", stat.Size())
|
||||
t.Logf(" content: %s", string(content))
|
||||
t.Logf(" ...this means an attacker can write files to arbitrary locations on the filesystem")
|
||||
return true
|
||||
}
|
||||
// no file found outside extraction directory...
|
||||
return false
|
||||
}
|
||||
|
||||
// checkSymlinkCreation verifies if a symlink was created at the expected path and reports
|
||||
// whether it points outside the extraction directory. Silent unless a symlink is found.
|
||||
func checkSymlinkCreation(t *testing.T, symlinkPath, extractDir, expectedTarget string) bool {
|
||||
t.Helper()
|
||||
|
||||
if linfo, err := os.Lstat(symlinkPath); err == nil {
|
||||
if linfo.Mode()&os.ModeSymlink != 0 {
|
||||
target, _ := os.Readlink(symlinkPath)
|
||||
|
||||
if expectedTarget != "" && target == expectedTarget {
|
||||
t.Errorf("critical issue!... symlink pointing outside extraction dir was created!")
|
||||
t.Logf(" Symlink: %s → %s", symlinkPath, target)
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if it escapes even if target doesn't match expected
|
||||
if filepath.IsAbs(target) {
|
||||
cleanExtractDir := filepath.Clean(extractDir)
|
||||
if !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) {
|
||||
t.Errorf("critical issue!... absolute symlink escapes extraction dir!")
|
||||
t.Logf(" symlink: %s → %s", symlinkPath, target)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
// if it exists but is not a symlink, that's good (attack was thwarted)...
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// createMaliciousZipWithSymlink creates a ZIP archive containing a symlink entry pointing to an arbitrary target,
|
||||
// followed by a file entry that attempts to write through that symlink.
|
||||
// returns the path to the created ZIP archive.
|
||||
func createMaliciousZipWithSymlink(t *testing.T, tempDir, symlinkName, symlinkTarget, fileName string) string {
|
||||
t.Helper()
|
||||
|
||||
maliciousZip := filepath.Join(tempDir, "malicious.zip")
|
||||
zipFile, err := os.Create(maliciousZip)
|
||||
require.NoError(t, err)
|
||||
defer zipFile.Close()
|
||||
|
||||
zw := zip.NewWriter(zipFile)
|
||||
|
||||
// create parent directories if the symlink is nested
|
||||
if dir := filepath.Dir(symlinkName); dir != "." {
|
||||
dirHeader := &zip.FileHeader{
|
||||
Name: dir + "/",
|
||||
Method: zip.Store,
|
||||
}
|
||||
dirHeader.SetMode(os.ModeDir | 0755)
|
||||
_, err = zw.CreateHeader(dirHeader)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// create symlink entry pointing outside extraction directory
|
||||
// note: ZIP format stores symlinks as regular files with the target path as content
|
||||
symlinkHeader := &zip.FileHeader{
|
||||
Name: symlinkName,
|
||||
Method: zip.Store,
|
||||
}
|
||||
symlinkHeader.SetMode(os.ModeSymlink | 0755)
|
||||
|
||||
symlinkWriter, err := zw.CreateHeader(symlinkHeader)
|
||||
require.NoError(t, err)
|
||||
|
||||
// write the symlink target as the file content (this is how ZIP stores symlinks)
|
||||
_, err = symlinkWriter.Write([]byte(symlinkTarget))
|
||||
require.NoError(t, err)
|
||||
|
||||
// create file entry that will be written through the symlink
|
||||
if fileName != "" {
|
||||
payloadContent := []byte("MALICIOUS PAYLOAD - This should NOT be written outside extraction dir!")
|
||||
payloadHeader := &zip.FileHeader{
|
||||
Name: fileName,
|
||||
Method: zip.Deflate,
|
||||
}
|
||||
payloadHeader.SetMode(0644)
|
||||
|
||||
payloadWriter, err := zw.CreateHeader(payloadHeader)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = payloadWriter.Write(payloadContent)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.NoError(t, zw.Close())
|
||||
require.NoError(t, zipFile.Close())
|
||||
|
||||
return maliciousZip
|
||||
}
|
||||
|
||||
@ -1,229 +0,0 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
)
|
||||
|
||||
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
|
||||
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
|
||||
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
|
||||
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
|
||||
|
||||
const (
|
||||
directoryEndLen = 22
|
||||
directory64LocLen = 20
|
||||
directory64EndLen = 56
|
||||
directory64LocSignature = 0x07064b50
|
||||
directory64EndSignature = 0x06064b50
|
||||
)
|
||||
|
||||
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
|
||||
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
|
||||
type ZipReadCloser struct {
|
||||
*zip.Reader
|
||||
io.Closer
|
||||
}
|
||||
|
||||
// OpenZip provides a ZipReadCloser for the given filepath.
|
||||
func OpenZip(filepath string) (*ZipReadCloser, error) {
|
||||
f, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
|
||||
// need to find the start of the archive and keep track of this offset.
|
||||
offset, err := findArchiveStartOffset(f, fi.Size())
|
||||
if err != nil {
|
||||
log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, err := f.Seek(0, io.SeekStart); err != nil {
|
||||
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
|
||||
}
|
||||
|
||||
if offset > math.MaxInt64 {
|
||||
return nil, fmt.Errorf("archive start offset too large: %v", offset)
|
||||
}
|
||||
offset64 := int64(offset)
|
||||
|
||||
size := fi.Size() - offset64
|
||||
|
||||
r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size)
|
||||
if err != nil {
|
||||
log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &ZipReadCloser{
|
||||
Reader: r,
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type readBuf []byte
|
||||
|
||||
func (b *readBuf) uint16() uint16 {
|
||||
v := binary.LittleEndian.Uint16(*b)
|
||||
*b = (*b)[2:]
|
||||
return v
|
||||
}
|
||||
|
||||
func (b *readBuf) uint32() uint32 {
|
||||
v := binary.LittleEndian.Uint32(*b)
|
||||
*b = (*b)[4:]
|
||||
return v
|
||||
}
|
||||
|
||||
func (b *readBuf) uint64() uint64 {
|
||||
v := binary.LittleEndian.Uint64(*b)
|
||||
*b = (*b)[8:]
|
||||
return v
|
||||
}
|
||||
|
||||
type directoryEnd struct {
|
||||
diskNbr uint32 // unused
|
||||
dirDiskNbr uint32 // unused
|
||||
dirRecordsThisDisk uint64 // unused
|
||||
directoryRecords uint64
|
||||
directorySize uint64
|
||||
directoryOffset uint64 // relative to file
|
||||
}
|
||||
|
||||
// note: this is derived from readDirectoryEnd within the archive/zip package
|
||||
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
|
||||
// look for directoryEndSignature in the last 1k, then in the last 65k
|
||||
var buf []byte
|
||||
var directoryEndOffset int64
|
||||
for i, bLen := range []int64{1024, 65 * 1024} {
|
||||
if bLen > size {
|
||||
bLen = size
|
||||
}
|
||||
buf = make([]byte, int(bLen))
|
||||
if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) {
|
||||
return 0, err
|
||||
}
|
||||
if p := findSignatureInBlock(buf); p >= 0 {
|
||||
buf = buf[p:]
|
||||
directoryEndOffset = size - bLen + int64(p)
|
||||
break
|
||||
}
|
||||
if i == 1 || bLen == size {
|
||||
return 0, zip.ErrFormat
|
||||
}
|
||||
}
|
||||
|
||||
if buf == nil {
|
||||
// we were unable to find the directoryEndSignature block
|
||||
return 0, zip.ErrFormat
|
||||
}
|
||||
|
||||
// read header into struct
|
||||
b := readBuf(buf[4:]) // skip signature
|
||||
d := &directoryEnd{
|
||||
diskNbr: uint32(b.uint16()),
|
||||
dirDiskNbr: uint32(b.uint16()),
|
||||
dirRecordsThisDisk: uint64(b.uint16()),
|
||||
directoryRecords: uint64(b.uint16()),
|
||||
directorySize: uint64(b.uint32()),
|
||||
directoryOffset: uint64(b.uint32()),
|
||||
}
|
||||
// Calculate where the zip data actually begins
|
||||
|
||||
// These values mean that the file can be a zip64 file
|
||||
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
|
||||
p, err := findDirectory64End(r, directoryEndOffset)
|
||||
if err == nil && p >= 0 {
|
||||
directoryEndOffset = p
|
||||
err = readDirectory64End(r, p, d)
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
|
||||
|
||||
// Make sure directoryOffset points to somewhere in our file.
|
||||
if d.directoryOffset >= uint64(size) {
|
||||
return 0, zip.ErrFormat
|
||||
}
|
||||
return startOfArchive, nil
|
||||
}
|
||||
|
||||
// findDirectory64End tries to read the zip64 locator just before the
|
||||
// directory end and returns the offset of the zip64 directory end if
|
||||
// found.
|
||||
func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
|
||||
locOffset := directoryEndOffset - directory64LocLen
|
||||
if locOffset < 0 {
|
||||
return -1, nil // no need to look for a header outside the file
|
||||
}
|
||||
buf := make([]byte, directory64LocLen)
|
||||
if _, err := r.ReadAt(buf, locOffset); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
b := readBuf(buf)
|
||||
if sig := b.uint32(); sig != directory64LocSignature {
|
||||
return -1, nil
|
||||
}
|
||||
if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
|
||||
return -1, nil // the file is not a valid zip64-file
|
||||
}
|
||||
p := b.uint64() // relative offset of the zip64 end of central directory record
|
||||
if b.uint32() != 1 { // total number of disks
|
||||
return -1, nil // the file is not a valid zip64-file
|
||||
}
|
||||
return int64(p), nil
|
||||
}
|
||||
|
||||
// readDirectory64End reads the zip64 directory end and updates the
|
||||
// directory end with the zip64 directory end values.
|
||||
func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
|
||||
buf := make([]byte, directory64EndLen)
|
||||
if _, err := r.ReadAt(buf, offset); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b := readBuf(buf)
|
||||
if sig := b.uint32(); sig != directory64EndSignature {
|
||||
return errors.New("could not read directory64End")
|
||||
}
|
||||
|
||||
b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
|
||||
d.diskNbr = b.uint32() // number of this disk
|
||||
d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
|
||||
d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
|
||||
d.directoryRecords = b.uint64() // total number of entries in the central directory
|
||||
d.directorySize = b.uint64() // size of the central directory
|
||||
d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func findSignatureInBlock(b []byte) int {
|
||||
for i := len(b) - directoryEndLen; i >= 0; i-- {
|
||||
// defined from directoryEndSignature
|
||||
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
|
||||
// n is length of comment
|
||||
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
|
||||
if n+directoryEndLen+i <= len(b) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
@ -1,50 +0,0 @@
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package file
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFindArchiveStartOffset(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
archivePrep func(tb testing.TB) string
|
||||
expected uint64
|
||||
}{
|
||||
{
|
||||
name: "standard, non-nested zip",
|
||||
archivePrep: prepZipSourceFixture,
|
||||
expected: 0,
|
||||
},
|
||||
{
|
||||
name: "zip with prepended bytes",
|
||||
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
|
||||
expected: 36,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
archivePath := test.archivePrep(t)
|
||||
f, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
t.Fatalf("could not open archive %q: %+v", archivePath, err)
|
||||
}
|
||||
fi, err := os.Stat(f.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("unable to stat archive: %+v", err)
|
||||
}
|
||||
|
||||
actual, err := findArchiveStartOffset(f, fi.Size())
|
||||
if err != nil {
|
||||
t.Fatalf("unable to find offset: %+v", err)
|
||||
}
|
||||
assert.Equal(t, test.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -27,6 +27,7 @@ func AllTypes() []any {
|
||||
pkg.ELFBinaryPackageNoteJSONPayload{},
|
||||
pkg.ElixirMixLockEntry{},
|
||||
pkg.ErlangRebarLockEntry{},
|
||||
pkg.GGUFFileHeader{},
|
||||
pkg.GitHubActionsUseStatement{},
|
||||
pkg.GolangBinaryBuildinfoEntry{},
|
||||
pkg.GolangModuleEntry{},
|
||||
|
||||
@ -124,6 +124,7 @@ var jsonTypes = makeJSONTypes(
|
||||
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
|
||||
)
|
||||
|
||||
func expandLegacyNameVariants(names ...string) []string {
|
||||
|
||||
@ -3,6 +3,7 @@ package task
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/ai"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||
@ -178,6 +179,7 @@ func DefaultPackageTaskFactories() Factories {
|
||||
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
||||
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
||||
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
||||
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
||||
|
||||
// deprecated catalogers ////////////////////////////////////////
|
||||
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
||||
|
||||
@ -4,7 +4,8 @@ import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/archiver/v3"
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/internal/sbomsync"
|
||||
"github.com/anchore/syft/syft/cataloging"
|
||||
@ -57,9 +58,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
|
||||
}
|
||||
|
||||
if c.IncludeUnexpandedArchives {
|
||||
ctx := context.Background()
|
||||
for coords := range s.Artifacts.FileMetadata {
|
||||
unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath)
|
||||
if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
||||
format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil)
|
||||
if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
||||
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
|
||||
}
|
||||
}
|
||||
|
||||
4193
schema/json/schema-16.0.43.json
Normal file
4193
schema/json/schema-16.0.43.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "anchore.io/schema/syft/json/16.0.42/document",
|
||||
"$id": "anchore.io/schema/syft/json/16.0.43/document",
|
||||
"$ref": "#/$defs/Document",
|
||||
"$defs": {
|
||||
"AlpmDbEntry": {
|
||||
@ -1433,6 +1433,48 @@
|
||||
],
|
||||
"description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
|
||||
},
|
||||
"GgufFileHeader": {
|
||||
"properties": {
|
||||
"ggufVersion": {
|
||||
"type": "integer",
|
||||
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
|
||||
},
|
||||
"fileSize": {
|
||||
"type": "integer",
|
||||
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||
},
|
||||
"architecture": {
|
||||
"type": "string",
|
||||
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
|
||||
},
|
||||
"quantization": {
|
||||
"type": "string",
|
||||
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
|
||||
},
|
||||
"parameters": {
|
||||
"type": "integer",
|
||||
"description": "Parameters is the number of model parameters (if present in header)"
|
||||
},
|
||||
"tensorCount": {
|
||||
"type": "integer",
|
||||
"description": "TensorCount is the number of tensors in the model"
|
||||
},
|
||||
"header": {
|
||||
"type": "object",
|
||||
"description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
|
||||
},
|
||||
"metadataHash": {
|
||||
"type": "string",
|
||||
"description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"ggufVersion",
|
||||
"tensorCount"
|
||||
],
|
||||
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
|
||||
},
|
||||
"GithubActionsUseStatement": {
|
||||
"properties": {
|
||||
"value": {
|
||||
@ -2579,6 +2621,9 @@
|
||||
{
|
||||
"$ref": "#/$defs/ErlangRebarLockEntry"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/GgufFileHeader"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/GithubActionsUseStatement"
|
||||
},
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/anchore/archiver/v3"
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/packageurl-go"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
@ -153,8 +155,8 @@ func trimRelative(s string) string {
|
||||
|
||||
// isArchive returns true if the path appears to be an archive
|
||||
func isArchive(path string) bool {
|
||||
_, err := archiver.ByExtension(path)
|
||||
return err == nil
|
||||
format, _, err := archives.Identify(context.Background(), path, nil)
|
||||
return err == nil && format != nil
|
||||
}
|
||||
|
||||
func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) {
|
||||
|
||||
@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
|
||||
}
|
||||
|
||||
componentType := cyclonedx.ComponentTypeLibrary
|
||||
if p.Type == pkg.BinaryPkg {
|
||||
switch p.Type {
|
||||
case pkg.BinaryPkg:
|
||||
componentType = cyclonedx.ComponentTypeApplication
|
||||
case pkg.ModelPkg:
|
||||
componentType = cyclonedx.ComponentTypeMachineLearningModel
|
||||
}
|
||||
|
||||
return cyclonedx.Component{
|
||||
|
||||
@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
|
||||
switch component.Type {
|
||||
case cyclonedx.ComponentTypeOS:
|
||||
case cyclonedx.ComponentTypeContainer:
|
||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary:
|
||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
|
||||
p := decodeComponent(component)
|
||||
idMap[component.BOMRef] = p
|
||||
if component.BOMRef != "" {
|
||||
|
||||
@ -55,6 +55,7 @@ func Test_OriginatorSupplier(t *testing.T) {
|
||||
pkg.OpamPackage{},
|
||||
pkg.YarnLockEntry{},
|
||||
pkg.TerraformLockProviderEntry{},
|
||||
pkg.GGUFFileHeader{},
|
||||
)
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
|
||||
answer = "acquired package info from Homebrew formula"
|
||||
case pkg.TerraformPkg:
|
||||
answer = "acquired package info from Terraform dependency lock file"
|
||||
case pkg.ModelPkg:
|
||||
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
||||
default:
|
||||
answer = "acquired package info from the following paths"
|
||||
}
|
||||
|
||||
@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
|
||||
"acquired package info from Terraform dependency lock file",
|
||||
},
|
||||
},
|
||||
{
|
||||
input: pkg.Package{
|
||||
Type: pkg.ModelPkg,
|
||||
},
|
||||
expected: []string{
|
||||
"",
|
||||
},
|
||||
},
|
||||
}
|
||||
var pkgTypes []pkg.Type
|
||||
for _, test := range tests {
|
||||
|
||||
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
@ -0,0 +1,16 @@
|
||||
/*
|
||||
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
||||
including support for GGUF (GPT-Generated Unified Format) model files.
|
||||
*/
|
||||
package ai
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||
func NewGGUFCataloger() pkg.Cataloger {
|
||||
return generic.NewCataloger("gguf-cataloger").
|
||||
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||
}
|
||||
140
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
140
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
@ -0,0 +1,140 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||
)
|
||||
|
||||
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixture string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "obtain gguf files",
|
||||
fixture: "test-fixtures/glob-paths",
|
||||
expected: []string{
|
||||
"models/model.gguf",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, test.fixture).
|
||||
ExpectsResolverContentQueries(test.expected).
|
||||
TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T) string
|
||||
expectedPackages []pkg.Package
|
||||
expectedRelationships []artifact.Relationship
|
||||
}{
|
||||
{
|
||||
name: "catalog single GGUF file",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "llama3-8b").
|
||||
withStringKV("general.version", "3.0").
|
||||
withStringKV("general.license", "Apache-2.0").
|
||||
withStringKV("general.quantization", "Q4_K_M").
|
||||
withUint64KV("general.parameter_count", 8030000000).
|
||||
withStringKV("general.some_random_kv", "foobar").
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||
os.WriteFile(path, data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "llama3-8b",
|
||||
Version: "3.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
MetadataKeyValuesHash: "6e3d368066455ce4",
|
||||
RemainingKeyValues: map[string]interface{}{
|
||||
"general.some_random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
{
|
||||
name: "catalog GGUF file with minimal metadata",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "gpt2").
|
||||
withStringKV("general.name", "gpt2-small").
|
||||
withStringKV("gpt2.context_length", "1024").
|
||||
withUint32KV("gpt2.embedding_length", 768).
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, "gpt2-small.gguf")
|
||||
os.WriteFile(path, data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "gpt2-small",
|
||||
Version: "",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
MetadataKeyValuesHash: "9dc6f23591062a27",
|
||||
RemainingKeyValues: map[string]interface{}{
|
||||
"gpt2.context_length": "1024",
|
||||
"gpt2.embedding_length": uint32(768),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fixtureDir := tt.setup(t)
|
||||
|
||||
// Use pkgtest to catalog and compare
|
||||
pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, fixtureDir).
|
||||
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||
IgnoreLocationLayer().
|
||||
IgnorePackageFields("FoundBy", "Locations").
|
||||
TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
22
syft/pkg/cataloger/ai/package.go
Normal file
22
syft/pkg/cataloger/ai/package.go
Normal file
@ -0,0 +1,22 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package {
|
||||
p := pkg.Package{
|
||||
Name: modelName,
|
||||
Version: version,
|
||||
Locations: file.NewLocationSet(locations...),
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
|
||||
Metadata: *metadata,
|
||||
// NOTE: PURL is intentionally not set as the package-url spec
|
||||
// has not yet finalized support for ML model packages
|
||||
}
|
||||
p.SetID()
|
||||
|
||||
return p
|
||||
}
|
||||
121
syft/pkg/cataloger/ai/package_test.go
Normal file
121
syft/pkg/cataloger/ai/package_test.go
Normal file
@ -0,0 +1,121 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||
)
|
||||
|
||||
func TestNewGGUFPackage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
metadata *pkg.GGUFFileHeader
|
||||
input struct {
|
||||
modelName string
|
||||
version string
|
||||
license string
|
||||
locations []file.Location
|
||||
}
|
||||
expected pkg.Package
|
||||
}{
|
||||
{
|
||||
name: "complete GGUF package with all fields",
|
||||
input: struct {
|
||||
modelName string
|
||||
version string
|
||||
license string
|
||||
locations []file.Location
|
||||
}{
|
||||
modelName: "llama3-8b",
|
||||
version: "3.0",
|
||||
license: "Apache-2.0",
|
||||
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||
},
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
RemainingKeyValues: map[string]any{
|
||||
"general.random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
expected: pkg.Package{
|
||||
Name: "llama3-8b",
|
||||
Version: "3.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
RemainingKeyValues: map[string]any{
|
||||
"general.random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal GGUF package",
|
||||
input: struct {
|
||||
modelName string
|
||||
version string
|
||||
license string
|
||||
locations []file.Location
|
||||
}{
|
||||
modelName: "gpt2-small",
|
||||
version: "1.0",
|
||||
license: "MIT",
|
||||
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||
},
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 50,
|
||||
},
|
||||
expected: pkg.Package{
|
||||
Name: "gpt2-small",
|
||||
Version: "1.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("MIT", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 50,
|
||||
},
|
||||
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
actual := newGGUFPackage(
|
||||
tt.metadata,
|
||||
tt.input.modelName,
|
||||
tt.input.version,
|
||||
tt.input.license,
|
||||
tt.input.locations...,
|
||||
)
|
||||
|
||||
// Verify metadata type
|
||||
_, ok := actual.Metadata.(pkg.GGUFFileHeader)
|
||||
require.True(t, ok, "metadata should be GGUFFileHeader")
|
||||
|
||||
// Use AssertPackagesEqual for comprehensive comparison
|
||||
pkgtest.AssertPackagesEqual(t, tt.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
63
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
63
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
@ -0,0 +1,63 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
)
|
||||
|
||||
// GGUF file format constants
|
||||
const (
|
||||
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
|
||||
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
|
||||
)
|
||||
|
||||
// copyHeader copies the GGUF header from the reader to the writer.
|
||||
// It validates the magic number first, then copies the rest of the data.
|
||||
// The reader should be wrapped with io.LimitedReader to prevent OOM issues.
|
||||
func copyHeader(w io.Writer, r io.Reader) error {
|
||||
// Read initial chunk to validate magic number
|
||||
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
|
||||
if _, err := io.ReadFull(r, initialBuf); err != nil {
|
||||
return fmt.Errorf("failed to read GGUF header prefix: %w", err)
|
||||
}
|
||||
|
||||
// Verify magic number
|
||||
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
|
||||
if magic != ggufMagicNumber {
|
||||
return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
||||
}
|
||||
|
||||
// Write the initial buffer to the writer
|
||||
if _, err := w.Write(initialBuf); err != nil {
|
||||
return fmt.Errorf("failed to write GGUF header prefix: %w", err)
|
||||
}
|
||||
|
||||
// Copy the rest of the header from reader to writer
|
||||
// The LimitedReader will return EOF once maxHeaderSize is reached
|
||||
if _, err := io.Copy(w, r); err != nil {
|
||||
return fmt.Errorf("failed to copy GGUF header: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Helper to convert gguf_parser metadata to simpler types
|
||||
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
|
||||
for _, kv := range kvs {
|
||||
// Skip standard fields that are extracted separately
|
||||
switch kv.Key {
|
||||
case "general.architecture", "general.name", "general.license",
|
||||
"general.version", "general.parameter_count", "general.quantization":
|
||||
continue
|
||||
}
|
||||
result[kv.Key] = kv.Value
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
135
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
135
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
@ -0,0 +1,135 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/internal/unknown"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
||||
// This implementation only reads the header portion of the file, not the entire model.
|
||||
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
defer internal.CloseAndLogError(reader, reader.Path())
|
||||
|
||||
// Create a temporary file for the library to parse
|
||||
// The library requires a file path, so we create a temp file
|
||||
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
tempPath := tempFile.Name()
|
||||
defer os.Remove(tempPath)
|
||||
|
||||
// Copy and validate the GGUF file header using LimitedReader to prevent OOM
|
||||
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
|
||||
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
|
||||
if err := copyHeader(tempFile, limitedReader); err != nil {
|
||||
tempFile.Close()
|
||||
return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err)
|
||||
}
|
||||
tempFile.Close()
|
||||
|
||||
// Parse using gguf-parser-go with options to skip unnecessary data
|
||||
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
|
||||
gguf_parser.SkipLargeMetadata(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
||||
}
|
||||
|
||||
// Extract metadata
|
||||
metadata := ggufFile.Metadata()
|
||||
|
||||
// Extract version separately (will be set on Package.Version)
|
||||
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
|
||||
|
||||
// Convert to syft metadata structure
|
||||
syftMetadata := &pkg.GGUFFileHeader{
|
||||
Architecture: metadata.Architecture,
|
||||
Quantization: metadata.FileTypeDescriptor,
|
||||
Parameters: uint64(metadata.Parameters),
|
||||
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||
TensorCount: ggufFile.Header.TensorCount,
|
||||
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
|
||||
}
|
||||
|
||||
// If model name is not in metadata, use filename
|
||||
if metadata.Name == "" {
|
||||
metadata.Name = extractModelNameFromPath(reader.Path())
|
||||
}
|
||||
|
||||
// Create package from metadata
|
||||
p := newGGUFPackage(
|
||||
syftMetadata,
|
||||
metadata.Name,
|
||||
modelVersion,
|
||||
metadata.License,
|
||||
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||
)
|
||||
|
||||
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
||||
}
|
||||
|
||||
// computeKVMetadataHash computes a stable hash of the KV metadata for use as a global identifier
|
||||
func computeKVMetadataHash(metadata gguf_parser.GGUFMetadataKVs) string {
|
||||
// Sort the KV pairs by key for stable hashing
|
||||
sortedKVs := make([]gguf_parser.GGUFMetadataKV, len(metadata))
|
||||
copy(sortedKVs, metadata)
|
||||
sort.Slice(sortedKVs, func(i, j int) bool {
|
||||
return sortedKVs[i].Key < sortedKVs[j].Key
|
||||
})
|
||||
|
||||
// Marshal sorted KVs to JSON for stable hashing
|
||||
jsonBytes, err := json.Marshal(sortedKVs)
|
||||
if err != nil {
|
||||
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Compute xxhash
|
||||
hash := xxhash.Sum64(jsonBytes)
|
||||
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
|
||||
}
|
||||
|
||||
// extractVersion attempts to extract version from metadata KV pairs
|
||||
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||
for _, kv := range kvs {
|
||||
if kv.Key == "general.version" {
|
||||
if v, ok := kv.Value.(string); ok && v != "" {
|
||||
return v
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractModelNameFromPath extracts the model name from the file path
|
||||
func extractModelNameFromPath(path string) string {
|
||||
// Get the base filename
|
||||
base := filepath.Base(path)
|
||||
|
||||
// Remove .gguf extension
|
||||
name := strings.TrimSuffix(base, ".gguf")
|
||||
|
||||
return name
|
||||
}
|
||||
|
||||
// integrity check
|
||||
var _ generic.Parser = parseGGUFModel
|
||||
128
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
128
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
@ -0,0 +1,128 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// GGUF type constants for test builder
|
||||
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
|
||||
const (
|
||||
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
||||
ggufTypeUint8 = 0
|
||||
ggufTypeInt8 = 1
|
||||
ggufTypeUint16 = 2
|
||||
ggufTypeInt16 = 3
|
||||
ggufTypeUint32 = 4
|
||||
ggufTypeInt32 = 5
|
||||
ggufTypeFloat32 = 6
|
||||
ggufTypeBool = 7
|
||||
ggufTypeString = 8
|
||||
ggufTypeArray = 9
|
||||
ggufTypeUint64 = 10
|
||||
ggufTypeInt64 = 11
|
||||
ggufTypeFloat64 = 12
|
||||
)
|
||||
|
||||
// testGGUFBuilder helps build GGUF files for testing
|
||||
type testGGUFBuilder struct {
|
||||
buf *bytes.Buffer
|
||||
version uint32
|
||||
tensorCount uint64
|
||||
kvPairs []testKVPair
|
||||
}
|
||||
|
||||
type testKVPair struct {
|
||||
key string
|
||||
valueType uint32
|
||||
value interface{}
|
||||
}
|
||||
|
||||
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||
return &testGGUFBuilder{
|
||||
buf: new(bytes.Buffer),
|
||||
version: 3,
|
||||
tensorCount: 0,
|
||||
kvPairs: []testKVPair{},
|
||||
}
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||
b.version = v
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||
b.tensorCount = count
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) writeString(s string) {
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||
b.buf.WriteString(s)
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) build() []byte {
|
||||
// Write magic number "GGUF"
|
||||
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||
|
||||
// Write version
|
||||
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||
|
||||
// Write tensor count
|
||||
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||
|
||||
// Write KV count
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||
|
||||
// Write KV pairs
|
||||
for _, kv := range b.kvPairs {
|
||||
// Write key
|
||||
b.writeString(kv.key)
|
||||
// Write value type
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||
// Write value based on type
|
||||
switch kv.valueType {
|
||||
case ggufTypeString:
|
||||
b.writeString(kv.value.(string))
|
||||
case ggufTypeUint32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||
case ggufTypeUint64:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||
case ggufTypeUint8:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||
case ggufTypeInt32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||
case ggufTypeBool:
|
||||
var v uint8
|
||||
if kv.value.(bool) {
|
||||
v = 1
|
||||
}
|
||||
binary.Write(b.buf, binary.LittleEndian, v)
|
||||
}
|
||||
}
|
||||
|
||||
return b.buf.Bytes()
|
||||
}
|
||||
|
||||
// buildInvalidMagic creates a file with invalid magic number
|
||||
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||
buf := new(bytes.Buffer)
|
||||
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||
return buf.Bytes()
|
||||
}
|
||||
@ -80,7 +80,7 @@ func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ f
|
||||
|
||||
// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives
|
||||
func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
|
||||
parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg)
|
||||
// note: even on error, we should always run cleanup functions
|
||||
defer cleanupFn()
|
||||
if err != nil {
|
||||
@ -99,7 +99,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
|
||||
|
||||
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
|
||||
// and parse nested archives or ignore them.
|
||||
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
|
||||
func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
|
||||
// fetch the last element of the virtual path
|
||||
virtualElements := strings.Split(reader.Path(), ":")
|
||||
currentFilepath := virtualElements[len(virtualElements)-1]
|
||||
@ -109,7 +109,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg
|
||||
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
|
||||
}
|
||||
|
||||
fileManifest, err := intFile.NewZipFileManifest(archivePath)
|
||||
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
|
||||
if err != nil {
|
||||
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
|
||||
}
|
||||
@ -226,7 +226,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
|
||||
}
|
||||
|
||||
// fetch the manifest file
|
||||
contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...)
|
||||
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err)
|
||||
}
|
||||
@ -387,8 +387,9 @@ type parsedPomProject struct {
|
||||
|
||||
// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
|
||||
func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) {
|
||||
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
// Find the pom.properties/pom.xml if the names seem like a plausible match
|
||||
properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
|
||||
artifactsMap := j.buildArtifactsMap(properties)
|
||||
pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap)
|
||||
@ -519,13 +520,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
|
||||
var pkgs []pkg.Package
|
||||
|
||||
// pom.properties
|
||||
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// pom.xml
|
||||
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -575,7 +576,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
|
||||
}
|
||||
|
||||
if len(licenseMatches) > 0 {
|
||||
contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...)
|
||||
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
|
||||
}
|
||||
@ -616,7 +617,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare
|
||||
// associating each discovered package to the given parent package.
|
||||
func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
// search and parse pom.properties files & fetch the contents
|
||||
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
|
||||
openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
|
||||
}
|
||||
@ -680,8 +681,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit
|
||||
return nestedPkgs, nestedRelationships, nil
|
||||
}
|
||||
|
||||
func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
|
||||
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
|
||||
func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
|
||||
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||
}
|
||||
@ -709,8 +710,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
|
||||
return propertiesByParentPath, nil
|
||||
}
|
||||
|
||||
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
|
||||
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
|
||||
func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
|
||||
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||
}
|
||||
|
||||
@ -72,7 +72,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// setup parser
|
||||
ap, cleanupFn, err := newJavaArchiveParser(
|
||||
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
@ -372,7 +372,7 @@ func TestParseJar(t *testing.T) {
|
||||
UseNetwork: false,
|
||||
UseMavenLocalRepository: false,
|
||||
}
|
||||
parser, cleanupFn, err := newJavaArchiveParser(
|
||||
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
@ -1499,7 +1499,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
|
||||
fixture, err := os.Open(fixturePath)
|
||||
require.NoError(t, err)
|
||||
|
||||
parser, cleanupFn, err := newJavaArchiveParser(
|
||||
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
@ -1636,7 +1636,7 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) {
|
||||
|
||||
ctx := context.TODO()
|
||||
// setup parser
|
||||
ap, cleanupFn, err := newJavaArchiveParser(
|
||||
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
|
||||
@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con
|
||||
}
|
||||
|
||||
func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
|
||||
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con
|
||||
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
|
||||
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
|
||||
// or archiver).
|
||||
fileManifest, err := intFile.NewZipFileManifest(archivePath)
|
||||
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
|
||||
}
|
||||
|
||||
37
syft/pkg/gguf.go
Normal file
37
syft/pkg/gguf.go
Normal file
@ -0,0 +1,37 @@
|
||||
package pkg
|
||||
|
||||
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
|
||||
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
|
||||
// loading and saving of models, particularly quantized large language models.
|
||||
// The Model Name, License, and Version fields have all been lifted up to be on the syft Package.
|
||||
type GGUFFileHeader struct {
|
||||
// GGUFVersion is the GGUF format version (e.g., 3)
|
||||
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
|
||||
|
||||
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||
|
||||
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
|
||||
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||
|
||||
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
|
||||
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
|
||||
|
||||
// Parameters is the number of model parameters (if present in header)
|
||||
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
|
||||
|
||||
// TensorCount is the number of tensors in the model
|
||||
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||
|
||||
// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
|
||||
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||
RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||
|
||||
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
|
||||
// This hash is computed over the complete header metadata (including the fields extracted
|
||||
// into typed fields above) and provides a stable identifier for the model configuration
|
||||
// across different file locations or remotes. It allows matching identical models even
|
||||
// when stored in different repositories or with different filenames.
|
||||
MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
|
||||
}
|
||||
@ -54,6 +54,7 @@ const (
|
||||
TerraformPkg Type = "terraform"
|
||||
WordpressPluginPkg Type = "wordpress-plugin"
|
||||
HomebrewPkg Type = "homebrew"
|
||||
ModelPkg Type = "model"
|
||||
)
|
||||
|
||||
// AllPkgs represents all supported package types
|
||||
@ -98,6 +99,7 @@ var AllPkgs = []Type{
|
||||
TerraformPkg,
|
||||
WordpressPluginPkg,
|
||||
HomebrewPkg,
|
||||
ModelPkg,
|
||||
}
|
||||
|
||||
// PackageURLType returns the PURL package type for the current package.
|
||||
|
||||
@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
|
||||
expectedTypes.Remove(string(HomebrewPkg))
|
||||
expectedTypes.Remove(string(TerraformPkg))
|
||||
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
||||
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
|
||||
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
@ -4,13 +4,15 @@ import (
|
||||
"context"
|
||||
"crypto"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"github.com/opencontainers/go-digest"
|
||||
|
||||
"github.com/anchore/archiver/v3"
|
||||
stereoFile "github.com/anchore/stereoscope/pkg/file"
|
||||
intFile "github.com/anchore/syft/internal/file"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
@ -208,18 +210,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
|
||||
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
|
||||
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
|
||||
// unarchived.
|
||||
envelopedUnarchiver, err := archiver.ByExtension(path)
|
||||
if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok {
|
||||
// when tar/zip files are extracted, if there are multiple entries at the same
|
||||
// location, the last entry wins
|
||||
// NOTE: this currently does not display any messages if an overwrite happens
|
||||
switch v := unarchiver.(type) {
|
||||
case *archiver.Tar:
|
||||
v.OverwriteExisting = true
|
||||
case *archiver.Zip:
|
||||
v.OverwriteExisting = true
|
||||
}
|
||||
|
||||
envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil)
|
||||
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
|
||||
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
|
||||
@ -246,15 +238,58 @@ func digestOfFileContents(path string) string {
|
||||
return di.String()
|
||||
}
|
||||
|
||||
func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) {
|
||||
func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
|
||||
var cleanupFn = func() error { return nil }
|
||||
archive, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err)
|
||||
}
|
||||
defer archive.Close()
|
||||
|
||||
tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
|
||||
if err != nil {
|
||||
return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
|
||||
return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
|
||||
}
|
||||
|
||||
cleanupFn := func() error {
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
// Protect against symlink attacks by ensuring path doesn't escape tempDir
|
||||
destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if file.IsDir() {
|
||||
return os.MkdirAll(destPath, file.Mode())
|
||||
}
|
||||
|
||||
if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
|
||||
return fmt.Errorf("failed to create parent directory: %w", err)
|
||||
}
|
||||
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open file in archive: %w", err)
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
destFile, err := os.Create(destPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create file in destination: %w", err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
if err := destFile.Chmod(file.Mode()); err != nil {
|
||||
return fmt.Errorf("failed to change mode of destination file: %w", err)
|
||||
}
|
||||
|
||||
if _, err := io.Copy(destFile, rc); err != nil {
|
||||
return fmt.Errorf("failed to copy file contents: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return tempDir, func() error {
|
||||
return os.RemoveAll(tempDir)
|
||||
}
|
||||
|
||||
return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir)
|
||||
}, unarchiver.Extract(context.Background(), archive, visitor)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user