mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 00:13:15 +01:00
Compare commits
2 Commits
2e100f33f3
...
89842bd2f6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
89842bd2f6 | ||
|
|
4a60c41f38 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -73,3 +73,5 @@ cosign.pub
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -106,8 +106,8 @@ syft <image> -o <format>
|
|||||||
Where the `formats` available are:
|
Where the `formats` available are:
|
||||||
- `syft-json`: Use this to get as much information out of Syft as possible!
|
- `syft-json`: Use this to get as much information out of Syft as possible!
|
||||||
- `syft-text`: A row-oriented, human-and-machine-friendly output.
|
- `syft-text`: A row-oriented, human-and-machine-friendly output.
|
||||||
- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
- `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||||
- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
- `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||||
- `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
- `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||||
- `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
- `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||||
- `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/).
|
- `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/).
|
||||||
|
|||||||
@ -87,6 +87,7 @@ func TestPkgCoverageImage(t *testing.T) {
|
|||||||
definedPkgs.Remove(string(pkg.TerraformPkg))
|
definedPkgs.Remove(string(pkg.TerraformPkg))
|
||||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
||||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||||
|
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||||
|
|
||||||
var cases []testCase
|
var cases []testCase
|
||||||
cases = append(cases, commonTestCases...)
|
cases = append(cases, commonTestCases...)
|
||||||
@ -161,6 +162,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
|
|||||||
definedPkgs.Remove(string(pkg.UnknownPkg))
|
definedPkgs.Remove(string(pkg.UnknownPkg))
|
||||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
||||||
|
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||||
|
|
||||||
// for directory scans we should not expect to see any of the following package types
|
// for directory scans we should not expect to see any of the following package types
|
||||||
definedPkgs.Remove(string(pkg.KbPkg))
|
definedPkgs.Remove(string(pkg.KbPkg))
|
||||||
|
|||||||
20
go.mod
20
go.mod
@ -11,7 +11,6 @@ require (
|
|||||||
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
|
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
|
||||||
github.com/acobaugh/osrelease v0.1.0
|
github.com/acobaugh/osrelease v0.1.0
|
||||||
github.com/adrg/xdg v0.5.3
|
github.com/adrg/xdg v0.5.3
|
||||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
|
|
||||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
|
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
|
||||||
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
|
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
|
||||||
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
|
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
|
||||||
@ -168,7 +167,6 @@ require (
|
|||||||
github.com/goccy/go-yaml v1.18.0
|
github.com/goccy/go-yaml v1.18.0
|
||||||
github.com/gogo/protobuf v1.3.2 // indirect
|
github.com/gogo/protobuf v1.3.2 // indirect
|
||||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
|
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
|
||||||
github.com/golang/snappy v0.0.4 // indirect
|
|
||||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
|
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
|
||||||
github.com/google/s2a-go v0.1.8 // indirect
|
github.com/google/s2a-go v0.1.8 // indirect
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
||||||
@ -209,10 +207,6 @@ require (
|
|||||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||||
github.com/muesli/termenv v0.16.0 // indirect
|
github.com/muesli/termenv v0.16.0 // indirect
|
||||||
github.com/ncruces/go-strftime v0.1.9 // indirect
|
github.com/ncruces/go-strftime v0.1.9 // indirect
|
||||||
github.com/nwaples/rardecode v1.1.3 // indirect
|
|
||||||
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
|
|
||||||
github.com/olekukonko/errors v1.1.0 // indirect
|
|
||||||
github.com/olekukonko/ll v0.1.2 // indirect
|
|
||||||
github.com/opencontainers/image-spec v1.1.1 // indirect
|
github.com/opencontainers/image-spec v1.1.1 // indirect
|
||||||
github.com/opencontainers/runtime-spec v1.1.0 // indirect
|
github.com/opencontainers/runtime-spec v1.1.0 // indirect
|
||||||
github.com/opencontainers/selinux v1.13.0 // indirect
|
github.com/opencontainers/selinux v1.13.0 // indirect
|
||||||
@ -286,6 +280,11 @@ require (
|
|||||||
modernc.org/memory v1.11.0 // indirect
|
modernc.org/memory v1.11.0 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cyphar.com/go-pathrs v0.2.1 // indirect
|
cyphar.com/go-pathrs v0.2.1 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
||||||
@ -310,7 +309,16 @@ require (
|
|||||||
github.com/clipperhouse/stringish v0.1.1 // indirect
|
github.com/clipperhouse/stringish v0.1.1 // indirect
|
||||||
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
|
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
|
||||||
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
||||||
|
github.com/henvic/httpretty v0.1.4 // indirect
|
||||||
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
|
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
|
||||||
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
|
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
|
||||||
|
github.com/olekukonko/errors v1.1.0 // indirect
|
||||||
|
github.com/olekukonko/ll v0.1.2 // indirect
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||||
|
gonum.org/v1/gonum v0.15.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
retract (
|
retract (
|
||||||
|
|||||||
18
go.sum
18
go.sum
@ -110,8 +110,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
|
|||||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
|
|
||||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
|
|
||||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
|
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
|
||||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
|
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
|
||||||
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
|
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
|
||||||
@ -229,7 +227,6 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy
|
|||||||
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
||||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||||
github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||||
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
|
|
||||||
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
|
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
|
||||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
@ -480,8 +477,6 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
|
|||||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||||
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
|
||||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
|
||||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||||
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||||
@ -549,6 +544,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
|
|||||||
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
||||||
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
||||||
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
||||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||||
@ -598,6 +595,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
|||||||
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
||||||
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
||||||
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
||||||
|
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||||
|
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||||
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||||
@ -625,6 +624,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
|
|||||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||||
@ -730,9 +730,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
|
|||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
||||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
|
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||||
@ -749,8 +751,6 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+
|
|||||||
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
|
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
|
||||||
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
|
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
|
||||||
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
|
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
|
||||||
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
|
|
||||||
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
|
||||||
github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A=
|
github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A=
|
||||||
github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
|
github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
|
||||||
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=
|
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=
|
||||||
@ -860,6 +860,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
|
|||||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||||
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||||
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
||||||
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
||||||
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
||||||
@ -1313,6 +1315,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
|||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||||
|
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||||
|
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||||
|
|||||||
@ -3,5 +3,5 @@ package internal
|
|||||||
const (
|
const (
|
||||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||||
JSONSchemaVersion = "16.0.42"
|
JSONSchemaVersion = "16.0.43"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -1,17 +1,40 @@
|
|||||||
package file
|
package file
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/bmatcuk/doublestar/v4"
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
|
"github.com/mholt/archives"
|
||||||
|
|
||||||
"github.com/anchore/archiver/v3"
|
"github.com/anchore/syft/internal"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
|
||||||
|
func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
|
||||||
|
tarReader, err := os.Open(archivePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
|
||||||
|
}
|
||||||
|
defer internal.CloseAndLogError(tarReader, archivePath)
|
||||||
|
|
||||||
|
format, _, err := archives.Identify(ctx, archivePath, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to identify tar compression format: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
extractor, ok := format.(archives.Extractor)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("file format does not support extraction: %s", archivePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
return extractor.Extract(ctx, tarReader, visitor)
|
||||||
|
}
|
||||||
|
|
||||||
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
|
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
|
||||||
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) {
|
func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
|
||||||
results := make(map[string]Opener)
|
results := make(map[string]Opener)
|
||||||
|
|
||||||
// don't allow for full traversal, only select traversal from given paths
|
// don't allow for full traversal, only select traversal from given paths
|
||||||
@ -19,9 +42,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
|
|||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
visitor := func(file archiver.File) error {
|
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
// ignore directories
|
// ignore directories
|
||||||
if file.IsDir() {
|
if file.IsDir() {
|
||||||
return nil
|
return nil
|
||||||
@ -43,7 +64,13 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
|
|||||||
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
|
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
|
||||||
defer tempFile.Close()
|
defer tempFile.Close()
|
||||||
|
|
||||||
if err := safeCopy(tempFile, file.ReadCloser); err != nil {
|
packedFile, err := file.Open()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
|
||||||
|
}
|
||||||
|
defer internal.CloseAndLogError(packedFile, archivePath)
|
||||||
|
|
||||||
|
if err := safeCopy(tempFile, packedFile); err != nil {
|
||||||
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
|
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,7 +79,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return results, archiver.Walk(archivePath, visitor)
|
return results, TraverseFilesInTar(ctx, archivePath, visitor)
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchesAnyGlob(name string, globs ...string) bool {
|
func matchesAnyGlob(name string, globs ...string) bool {
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
package file
|
package file
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"os"
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mholt/archives"
|
||||||
"github.com/scylladb/go-set/strset"
|
"github.com/scylladb/go-set/strset"
|
||||||
|
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
@ -14,22 +16,25 @@ import (
|
|||||||
type ZipFileManifest map[string]os.FileInfo
|
type ZipFileManifest map[string]os.FileInfo
|
||||||
|
|
||||||
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
|
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
|
||||||
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) {
|
func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) {
|
||||||
zipReader, err := OpenZip(archivePath)
|
zipReader, err := os.Open(archivePath)
|
||||||
manifest := make(ZipFileManifest)
|
manifest := make(ZipFileManifest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
|
log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
|
||||||
return manifest, err
|
return manifest, err
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
err = zipReader.Close()
|
if err = zipReader.Close(); err != nil {
|
||||||
if err != nil {
|
|
||||||
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
|
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
for _, file := range zipReader.File {
|
err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error {
|
||||||
manifest.Add(file.Name, file.FileInfo())
|
manifest.Add(file.NameInArchive, file.FileInfo)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return manifest, err
|
||||||
}
|
}
|
||||||
return manifest, nil
|
return manifest, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
package file
|
package file
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) {
|
|||||||
|
|
||||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
||||||
|
|
||||||
actual, err := NewZipFileManifest(archiveFilePath)
|
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||||
}
|
}
|
||||||
@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) {
|
|||||||
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
|
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
|
||||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
|
archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
|
||||||
|
|
||||||
actual, err := NewZipFileManifest(archiveFilePath)
|
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||||
}
|
}
|
||||||
@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
|
|||||||
|
|
||||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
||||||
|
|
||||||
z, err := NewZipFileManifest(archiveFilePath)
|
z, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,13 +1,15 @@
|
|||||||
package file
|
package file
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"archive/zip"
|
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mholt/archives"
|
||||||
|
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -25,7 +27,7 @@ type errZipSlipDetected struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (e *errZipSlipDetected) Error() string {
|
func (e *errZipSlipDetected) Error() string {
|
||||||
return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
|
return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
|
||||||
}
|
}
|
||||||
|
|
||||||
type zipTraversalRequest map[string]struct{}
|
type zipTraversalRequest map[string]struct{}
|
||||||
@ -39,38 +41,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
|
// TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
|
||||||
func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error {
|
func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error {
|
||||||
request := newZipTraverseRequest(paths...)
|
request := newZipTraverseRequest(paths...)
|
||||||
|
|
||||||
zipReader, err := OpenZip(archivePath)
|
zipReader, err := os.Open(archivePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
|
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
err = zipReader.Close()
|
if err := zipReader.Close(); err != nil {
|
||||||
if err != nil {
|
|
||||||
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
|
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
for _, file := range zipReader.File {
|
return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
|
||||||
// if no paths are given then assume that all files should be traversed
|
// if no paths are given then assume that all files should be traversed
|
||||||
if len(paths) > 0 {
|
if len(paths) > 0 {
|
||||||
if _, ok := request[file.Name]; !ok {
|
if _, ok := request[file.NameInArchive]; !ok {
|
||||||
// this file path is not of interest
|
// this file path is not of interest
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = visitor(file); err != nil {
|
return visitor(ctx, file)
|
||||||
return err
|
})
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
|
// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
|
||||||
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
|
func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) {
|
||||||
results := make(map[string]Opener)
|
results := make(map[string]Opener)
|
||||||
|
|
||||||
// don't allow for full traversal, only select traversal from given paths
|
// don't allow for full traversal, only select traversal from given paths
|
||||||
@ -78,9 +76,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
|
|||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
visitor := func(file *zip.File) error {
|
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||||
tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-"
|
tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-"
|
||||||
|
|
||||||
tempFile, err := os.CreateTemp(dir, tempfilePrefix)
|
tempFile, err := os.CreateTemp(dir, tempfilePrefix)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create temp file: %w", err)
|
return fmt.Errorf("unable to create temp file: %w", err)
|
||||||
@ -92,33 +89,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
|
|||||||
|
|
||||||
zippedFile, err := file.Open()
|
zippedFile, err := file.Open()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
err := zippedFile.Close()
|
if err := zippedFile.Close(); err != nil {
|
||||||
if err != nil {
|
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if file.FileInfo().IsDir() {
|
if file.IsDir() {
|
||||||
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := safeCopy(tempFile, zippedFile); err != nil {
|
if err := safeCopy(tempFile, zippedFile); err != nil {
|
||||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
|
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
results[file.Name] = Opener{path: tempFile.Name()}
|
results[file.NameInArchive] = Opener{path: tempFile.Name()}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return results, TraverseFilesInZip(archivePath, visitor, paths...)
|
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
|
// ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
|
||||||
func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) {
|
func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) {
|
||||||
results := make(map[string]string)
|
results := make(map[string]string)
|
||||||
|
|
||||||
// don't allow for full traversal, only select traversal from given paths
|
// don't allow for full traversal, only select traversal from given paths
|
||||||
@ -126,37 +122,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er
|
|||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
visitor := func(file *zip.File) error {
|
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||||
zippedFile, err := file.Open()
|
zippedFile, err := file.Open()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||||
}
|
}
|
||||||
|
defer func() {
|
||||||
|
if err := zippedFile.Close(); err != nil {
|
||||||
|
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
if file.FileInfo().IsDir() {
|
if file.IsDir() {
|
||||||
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
|
||||||
}
|
}
|
||||||
|
|
||||||
var buffer bytes.Buffer
|
var buffer bytes.Buffer
|
||||||
if err := safeCopy(&buffer, zippedFile); err != nil {
|
if err := safeCopy(&buffer, zippedFile); err != nil {
|
||||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
|
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
results[file.Name] = buffer.String()
|
results[file.NameInArchive] = buffer.String()
|
||||||
|
|
||||||
err = zippedFile.Close()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return results, TraverseFilesInZip(archivePath, visitor, paths...)
|
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnzipToDir extracts a zip archive to a target directory.
|
// UnzipToDir extracts a zip archive to a target directory.
|
||||||
func UnzipToDir(archivePath, targetDir string) error {
|
func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
|
||||||
visitor := func(file *zip.File) error {
|
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||||
joinedPath, err := safeJoin(targetDir, file.Name)
|
joinedPath, err := SafeJoin(targetDir, file.NameInArchive)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -164,11 +161,11 @@ func UnzipToDir(archivePath, targetDir string) error {
|
|||||||
return extractSingleFile(file, joinedPath, archivePath)
|
return extractSingleFile(file, joinedPath, archivePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
return TraverseFilesInZip(archivePath, visitor)
|
return TraverseFilesInZip(ctx, archivePath, visitor)
|
||||||
}
|
}
|
||||||
|
|
||||||
// safeJoin ensures that any destinations do not resolve to a path above the prefix path.
|
// SafeJoin ensures that any destinations do not resolve to a path above the prefix path.
|
||||||
func safeJoin(prefix string, dest ...string) (string, error) {
|
func SafeJoin(prefix string, dest ...string) (string, error) {
|
||||||
joinResult := filepath.Join(append([]string{prefix}, dest...)...)
|
joinResult := filepath.Join(append([]string{prefix}, dest...)...)
|
||||||
cleanJoinResult := filepath.Clean(joinResult)
|
cleanJoinResult := filepath.Clean(joinResult)
|
||||||
if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
|
if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
|
||||||
@ -181,13 +178,18 @@ func safeJoin(prefix string, dest ...string) (string, error) {
|
|||||||
return joinResult, nil
|
return joinResult, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error {
|
func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error {
|
||||||
zippedFile, err := file.Open()
|
zippedFile, err := file.Open()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||||
}
|
}
|
||||||
|
defer func() {
|
||||||
|
if err := zippedFile.Close(); err != nil {
|
||||||
|
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
if file.FileInfo().IsDir() {
|
if file.IsDir() {
|
||||||
err = os.MkdirAll(expandedFilePath, file.Mode())
|
err = os.MkdirAll(expandedFilePath, file.Mode())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
||||||
@ -202,20 +204,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
||||||
}
|
}
|
||||||
|
defer func() {
|
||||||
|
if err := outputFile.Close(); err != nil {
|
||||||
|
log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
if err := safeCopy(outputFile, zippedFile); err != nil {
|
if err := safeCopy(outputFile, zippedFile); err != nil {
|
||||||
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err)
|
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err)
|
||||||
}
|
|
||||||
|
|
||||||
err = outputFile.Close()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = zippedFile.Close()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,6 +4,8 @@
|
|||||||
package file
|
package file
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"context"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
@ -17,6 +19,7 @@ import (
|
|||||||
|
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func equal(r1, r2 io.Reader) (bool, error) {
|
func equal(r1, r2 io.Reader) (bool, error) {
|
||||||
@ -55,7 +58,7 @@ func TestUnzipToDir(t *testing.T) {
|
|||||||
expectedPaths := len(expectedZipArchiveEntries)
|
expectedPaths := len(expectedZipArchiveEntries)
|
||||||
observedPaths := 0
|
observedPaths := 0
|
||||||
|
|
||||||
err = UnzipToDir(archiveFilePath, unzipDestinationDir)
|
err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unable to unzip archive: %+v", err)
|
t.Fatalf("unable to unzip archive: %+v", err)
|
||||||
}
|
}
|
||||||
@ -145,7 +148,7 @@ func TestContentsFromZip(t *testing.T) {
|
|||||||
paths = append(paths, p)
|
paths = append(paths, p)
|
||||||
}
|
}
|
||||||
|
|
||||||
actual, err := ContentsFromZip(archivePath, paths...)
|
actual, err := ContentsFromZip(context.Background(), archivePath, paths...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||||
}
|
}
|
||||||
@ -307,9 +310,528 @@ func TestSafeJoin(t *testing.T) {
|
|||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) {
|
t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) {
|
||||||
actual, err := safeJoin(test.prefix, test.args...)
|
actual, err := SafeJoin(test.prefix, test.args...)
|
||||||
test.errAssertion(t, err)
|
test.errAssertion(t, err)
|
||||||
assert.Equal(t, test.expected, actual)
|
assert.Equal(t, test.expected, actual)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestSymlinkProtection demonstrates that SafeJoin protects against symlink-based
|
||||||
|
// directory traversal attacks by validating that archive entry paths cannot escape
|
||||||
|
// the extraction directory.
|
||||||
|
func TestSafeJoin_SymlinkProtection(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
archivePath string // Path as it would appear in the archive
|
||||||
|
expectError bool
|
||||||
|
description string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "path traversal via ../",
|
||||||
|
archivePath: "../../../outside/file.txt",
|
||||||
|
expectError: true,
|
||||||
|
description: "Archive entry with ../ trying to escape extraction dir",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "absolute path symlink target",
|
||||||
|
archivePath: "../../../sensitive.txt",
|
||||||
|
expectError: true,
|
||||||
|
description: "Simulates symlink pointing outside via relative path",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "safe relative path within extraction dir",
|
||||||
|
archivePath: "subdir/safe.txt",
|
||||||
|
expectError: false,
|
||||||
|
description: "Normal file path that stays within extraction directory",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "safe path with internal ../",
|
||||||
|
archivePath: "dir1/../dir2/file.txt",
|
||||||
|
expectError: false,
|
||||||
|
description: "Path with ../ that still resolves within extraction dir",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "deeply nested traversal",
|
||||||
|
archivePath: "../../../../../../tmp/evil.txt",
|
||||||
|
expectError: true,
|
||||||
|
description: "Multiple levels of ../ trying to escape",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single parent directory escape",
|
||||||
|
archivePath: "../",
|
||||||
|
expectError: true,
|
||||||
|
description: "Simple one-level escape attempt",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
// Create temp directories to simulate extraction scenario
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
extractDir := filepath.Join(tmpDir, "extract")
|
||||||
|
outsideDir := filepath.Join(tmpDir, "outside")
|
||||||
|
|
||||||
|
require.NoError(t, os.MkdirAll(extractDir, 0755))
|
||||||
|
require.NoError(t, os.MkdirAll(outsideDir, 0755))
|
||||||
|
|
||||||
|
// Create a file outside extraction dir that an attacker might target
|
||||||
|
outsideFile := filepath.Join(outsideDir, "sensitive.txt")
|
||||||
|
require.NoError(t, os.WriteFile(outsideFile, []byte("sensitive data"), 0644))
|
||||||
|
|
||||||
|
// Test SafeJoin - this is what happens when processing archive entries
|
||||||
|
result, err := SafeJoin(extractDir, tt.archivePath)
|
||||||
|
|
||||||
|
if tt.expectError {
|
||||||
|
// Should block malicious paths
|
||||||
|
require.Error(t, err, "Expected SafeJoin to reject malicious path")
|
||||||
|
var zipSlipErr *errZipSlipDetected
|
||||||
|
assert.ErrorAs(t, err, &zipSlipErr, "Error should be errZipSlipDetected type")
|
||||||
|
assert.Empty(t, result, "Result should be empty for blocked paths")
|
||||||
|
} else {
|
||||||
|
// Should allow safe paths
|
||||||
|
require.NoError(t, err, "Expected SafeJoin to allow safe path")
|
||||||
|
assert.NotEmpty(t, result, "Result should not be empty for safe paths")
|
||||||
|
assert.True(t, strings.HasPrefix(filepath.Clean(result), filepath.Clean(extractDir)),
|
||||||
|
"Safe path should resolve within extraction directory")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestUnzipToDir_SymlinkAttacks tests UnzipToDir function with malicious ZIP archives
|
||||||
|
// containing symlink entries that attempt path traversal attacks.
|
||||||
|
//
|
||||||
|
// EXPECTED BEHAVIOR: UnzipToDir should either:
|
||||||
|
// 1. Detect and reject symlinks explicitly with a security error, OR
|
||||||
|
// 2. Extract them safely (library converts symlinks to regular files)
|
||||||
|
func TestUnzipToDir_SymlinkAttacks(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
symlinkName string
|
||||||
|
fileName string
|
||||||
|
errContains string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "direct symlink to outside directory",
|
||||||
|
symlinkName: "evil_link",
|
||||||
|
fileName: "evil_link/payload.txt",
|
||||||
|
errContains: "not a directory", // attempt to write through symlink leaf (which is not a directory)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "directory symlink attack",
|
||||||
|
symlinkName: "safe_dir/link",
|
||||||
|
fileName: "safe_dir/link/payload.txt",
|
||||||
|
errContains: "not a directory", // attempt to write through symlink (which is not a directory)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "symlink without payload file",
|
||||||
|
symlinkName: "standalone_link",
|
||||||
|
fileName: "", // no payload file
|
||||||
|
errContains: "", // no error expected, symlink without payload is safe
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
|
// create outside target directory
|
||||||
|
outsideDir := filepath.Join(tempDir, "outside_target")
|
||||||
|
require.NoError(t, os.MkdirAll(outsideDir, 0755))
|
||||||
|
|
||||||
|
// create extraction directory
|
||||||
|
extractDir := filepath.Join(tempDir, "extract")
|
||||||
|
require.NoError(t, os.MkdirAll(extractDir, 0755))
|
||||||
|
|
||||||
|
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, outsideDir, tt.fileName)
|
||||||
|
|
||||||
|
err := UnzipToDir(context.Background(), maliciousZip, extractDir)
|
||||||
|
|
||||||
|
// check error expectations
|
||||||
|
if tt.errContains != "" {
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), tt.errContains)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzeExtractionDirectory(t, extractDir)
|
||||||
|
|
||||||
|
// check if payload file escaped extraction directory
|
||||||
|
if tt.fileName != "" {
|
||||||
|
maliciousFile := filepath.Join(outsideDir, filepath.Base(tt.fileName))
|
||||||
|
checkFileOutsideExtraction(t, maliciousFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if symlink was created pointing outside
|
||||||
|
symlinkPath := filepath.Join(extractDir, tt.symlinkName)
|
||||||
|
checkSymlinkCreation(t, symlinkPath, extractDir, outsideDir)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestContentsFromZip_SymlinkAttacks tests the ContentsFromZip function with malicious
|
||||||
|
// ZIP archives containing symlink entries.
|
||||||
|
//
|
||||||
|
// EXPECTED BEHAVIOR: ContentsFromZip should either:
|
||||||
|
// 1. Reject symlinks explicitly, OR
|
||||||
|
// 2. Return empty content for symlinks (library behavior)
|
||||||
|
//
|
||||||
|
// Though ContentsFromZip doesn't write to disk, but if symlinks are followed, it could read sensitive
|
||||||
|
// files from outside the archive.
|
||||||
|
func TestContentsFromZip_SymlinkAttacks(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
symlinkName string
|
||||||
|
symlinkTarget string
|
||||||
|
requestPath string
|
||||||
|
errContains string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "request symlink entry directly",
|
||||||
|
symlinkName: "evil_link",
|
||||||
|
symlinkTarget: "/etc/hosts", // attempt to read sensitive file
|
||||||
|
requestPath: "evil_link",
|
||||||
|
errContains: "", // no error expected - library returns symlink metadata
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "symlink in nested directory",
|
||||||
|
symlinkName: "nested/link",
|
||||||
|
symlinkTarget: "/etc/hosts",
|
||||||
|
requestPath: "nested/link",
|
||||||
|
errContains: "", // no error expected - library returns symlink metadata
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
|
// create malicious ZIP with symlink entry (no payload file needed)
|
||||||
|
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
|
||||||
|
|
||||||
|
contents, err := ContentsFromZip(context.Background(), maliciousZip, tt.requestPath)
|
||||||
|
|
||||||
|
// check error expectations
|
||||||
|
if tt.errContains != "" {
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), tt.errContains)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// verify symlink handling - library should return symlink target as content (metadata)
|
||||||
|
content, found := contents[tt.requestPath]
|
||||||
|
require.True(t, found, "symlink entry should be found in results")
|
||||||
|
|
||||||
|
// verify symlink was NOT followed (content should be target path or empty)
|
||||||
|
if content != "" && content != tt.symlinkTarget {
|
||||||
|
// content is not empty and not the symlink target - check if actual file was read
|
||||||
|
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
|
||||||
|
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
|
||||||
|
if readErr == nil && string(targetContent) == content {
|
||||||
|
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was read!")
|
||||||
|
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
|
||||||
|
t.Logf(" content length: %d bytes", len(content))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractFromZipToUniqueTempFile_SymlinkAttacks tests the ExtractFromZipToUniqueTempFile
|
||||||
|
// function with malicious ZIP archives containing symlink entries.
|
||||||
|
//
|
||||||
|
// EXPECTED BEHAVIOR: ExtractFromZipToUniqueTempFile should either:
|
||||||
|
// 1. Reject symlinks explicitly, OR
|
||||||
|
// 2. Extract them safely (library converts to empty files, filepath.Base sanitizes names)
|
||||||
|
//
|
||||||
|
// This function uses filepath.Base() on the archive entry name for temp file prefix and
|
||||||
|
// os.CreateTemp() which creates files in the specified directory, so it should be protected.
|
||||||
|
func TestExtractFromZipToUniqueTempFile_SymlinkAttacks(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
symlinkName string
|
||||||
|
symlinkTarget string
|
||||||
|
requestPath string
|
||||||
|
errContains string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "extract symlink entry to temp file",
|
||||||
|
symlinkName: "evil_link",
|
||||||
|
symlinkTarget: "/etc/passwd",
|
||||||
|
requestPath: "evil_link",
|
||||||
|
errContains: "", // no error expected - library extracts symlink metadata
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "extract nested symlink",
|
||||||
|
symlinkName: "nested/dir/link",
|
||||||
|
symlinkTarget: "/tmp/outside",
|
||||||
|
requestPath: "nested/dir/link",
|
||||||
|
errContains: "", // no error expected
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "extract path traversal symlink name",
|
||||||
|
symlinkName: "../../escape",
|
||||||
|
symlinkTarget: "/tmp/outside",
|
||||||
|
requestPath: "../../escape",
|
||||||
|
errContains: "", // no error expected - filepath.Base sanitizes name
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
|
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
|
||||||
|
|
||||||
|
// create temp directory for extraction
|
||||||
|
extractTempDir := filepath.Join(tempDir, "temp_extract")
|
||||||
|
require.NoError(t, os.MkdirAll(extractTempDir, 0755))
|
||||||
|
|
||||||
|
openers, err := ExtractFromZipToUniqueTempFile(context.Background(), maliciousZip, extractTempDir, tt.requestPath)
|
||||||
|
|
||||||
|
// check error expectations
|
||||||
|
if tt.errContains != "" {
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), tt.errContains)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// verify symlink was extracted
|
||||||
|
opener, found := openers[tt.requestPath]
|
||||||
|
require.True(t, found, "symlink entry should be extracted")
|
||||||
|
|
||||||
|
// verify temp file is within temp directory
|
||||||
|
tempFilePath := opener.path
|
||||||
|
cleanTempDir := filepath.Clean(extractTempDir)
|
||||||
|
cleanTempFile := filepath.Clean(tempFilePath)
|
||||||
|
require.True(t, strings.HasPrefix(cleanTempFile, cleanTempDir),
|
||||||
|
"temp file must be within temp directory: %s not in %s", cleanTempFile, cleanTempDir)
|
||||||
|
|
||||||
|
// verify symlink was NOT followed (content should be target path or empty)
|
||||||
|
f, openErr := opener.Open()
|
||||||
|
require.NoError(t, openErr)
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
content, readErr := io.ReadAll(f)
|
||||||
|
require.NoError(t, readErr)
|
||||||
|
|
||||||
|
// check if symlink was followed (content matches actual file)
|
||||||
|
if len(content) > 0 && string(content) != tt.symlinkTarget {
|
||||||
|
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
|
||||||
|
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
|
||||||
|
if readErr == nil && string(targetContent) == string(content) {
|
||||||
|
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was copied!")
|
||||||
|
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
|
||||||
|
t.Logf(" content length: %d bytes", len(content))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// forensicFindings contains the results of analyzing an extraction directory
|
||||||
|
type forensicFindings struct {
|
||||||
|
symlinksFound []forensicSymlink
|
||||||
|
regularFiles []string
|
||||||
|
directories []string
|
||||||
|
symlinkVulnerabilities []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type forensicSymlink struct {
|
||||||
|
path string
|
||||||
|
target string
|
||||||
|
escapesExtraction bool
|
||||||
|
resolvedPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
// analyzeExtractionDirectory walks the extraction directory and detects symlinks that point
|
||||||
|
// outside the extraction directory. It is silent unless vulnerabilities are found.
|
||||||
|
func analyzeExtractionDirectory(t *testing.T, extractDir string) forensicFindings {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
findings := forensicFindings{}
|
||||||
|
|
||||||
|
filepath.Walk(extractDir, func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
// only log if there's an error walking the directory
|
||||||
|
t.Logf("Error walking %s: %v", path, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
relPath := strings.TrimPrefix(path, extractDir+"/")
|
||||||
|
if relPath == "" {
|
||||||
|
relPath = "."
|
||||||
|
}
|
||||||
|
|
||||||
|
// use Lstat to detect symlinks without following them
|
||||||
|
linfo, lerr := os.Lstat(path)
|
||||||
|
if lerr == nil && linfo.Mode()&os.ModeSymlink != 0 {
|
||||||
|
target, _ := os.Readlink(path)
|
||||||
|
|
||||||
|
// resolve to see where it actually points
|
||||||
|
var resolvedPath string
|
||||||
|
var escapesExtraction bool
|
||||||
|
|
||||||
|
if filepath.IsAbs(target) {
|
||||||
|
// absolute symlink
|
||||||
|
resolvedPath = target
|
||||||
|
cleanExtractDir := filepath.Clean(extractDir)
|
||||||
|
escapesExtraction = !strings.HasPrefix(filepath.Clean(target), cleanExtractDir)
|
||||||
|
|
||||||
|
if escapesExtraction {
|
||||||
|
t.Errorf("critical issue!... absolute symlink created: %s → %s", relPath, target)
|
||||||
|
t.Logf(" this symlink points outside the extraction directory")
|
||||||
|
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
|
||||||
|
fmt.Sprintf("absolute symlink: %s → %s", relPath, target))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// relative symlink - resolve it
|
||||||
|
resolvedPath = filepath.Join(filepath.Dir(path), target)
|
||||||
|
cleanResolved := filepath.Clean(resolvedPath)
|
||||||
|
cleanExtractDir := filepath.Clean(extractDir)
|
||||||
|
|
||||||
|
escapesExtraction = !strings.HasPrefix(cleanResolved, cleanExtractDir)
|
||||||
|
|
||||||
|
if escapesExtraction {
|
||||||
|
t.Errorf("critical issue!... symlink escapes extraction dir: %s → %s", relPath, target)
|
||||||
|
t.Logf(" symlink resolves to: %s (outside extraction directory)", cleanResolved)
|
||||||
|
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
|
||||||
|
fmt.Sprintf("relative symlink escape: %s → %s (resolves to %s)", relPath, target, cleanResolved))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
findings.symlinksFound = append(findings.symlinksFound, forensicSymlink{
|
||||||
|
path: relPath,
|
||||||
|
target: target,
|
||||||
|
escapesExtraction: escapesExtraction,
|
||||||
|
resolvedPath: resolvedPath,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
// regular file or directory - collect silently
|
||||||
|
if info.IsDir() {
|
||||||
|
findings.directories = append(findings.directories, relPath)
|
||||||
|
} else {
|
||||||
|
findings.regularFiles = append(findings.regularFiles, relPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
return findings
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkFileOutsideExtraction checks if a file was written outside the extraction directory.
|
||||||
|
// Returns true if the file exists (vulnerability), false otherwise. Silent on success.
|
||||||
|
func checkFileOutsideExtraction(t *testing.T, filePath string) bool {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
if stat, err := os.Stat(filePath); err == nil {
|
||||||
|
content, _ := os.ReadFile(filePath)
|
||||||
|
t.Errorf("critical issue!... file written OUTSIDE extraction directory!")
|
||||||
|
t.Logf(" location: %s", filePath)
|
||||||
|
t.Logf(" size: %d bytes", stat.Size())
|
||||||
|
t.Logf(" content: %s", string(content))
|
||||||
|
t.Logf(" ...this means an attacker can write files to arbitrary locations on the filesystem")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// no file found outside extraction directory...
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkSymlinkCreation verifies if a symlink was created at the expected path and reports
|
||||||
|
// whether it points outside the extraction directory. Silent unless a symlink is found.
|
||||||
|
func checkSymlinkCreation(t *testing.T, symlinkPath, extractDir, expectedTarget string) bool {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
if linfo, err := os.Lstat(symlinkPath); err == nil {
|
||||||
|
if linfo.Mode()&os.ModeSymlink != 0 {
|
||||||
|
target, _ := os.Readlink(symlinkPath)
|
||||||
|
|
||||||
|
if expectedTarget != "" && target == expectedTarget {
|
||||||
|
t.Errorf("critical issue!... symlink pointing outside extraction dir was created!")
|
||||||
|
t.Logf(" Symlink: %s → %s", symlinkPath, target)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it escapes even if target doesn't match expected
|
||||||
|
if filepath.IsAbs(target) {
|
||||||
|
cleanExtractDir := filepath.Clean(extractDir)
|
||||||
|
if !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) {
|
||||||
|
t.Errorf("critical issue!... absolute symlink escapes extraction dir!")
|
||||||
|
t.Logf(" symlink: %s → %s", symlinkPath, target)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if it exists but is not a symlink, that's good (attack was thwarted)...
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// createMaliciousZipWithSymlink creates a ZIP archive containing a symlink entry pointing to an arbitrary target,
|
||||||
|
// followed by a file entry that attempts to write through that symlink.
|
||||||
|
// returns the path to the created ZIP archive.
|
||||||
|
func createMaliciousZipWithSymlink(t *testing.T, tempDir, symlinkName, symlinkTarget, fileName string) string {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
maliciousZip := filepath.Join(tempDir, "malicious.zip")
|
||||||
|
zipFile, err := os.Create(maliciousZip)
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer zipFile.Close()
|
||||||
|
|
||||||
|
zw := zip.NewWriter(zipFile)
|
||||||
|
|
||||||
|
// create parent directories if the symlink is nested
|
||||||
|
if dir := filepath.Dir(symlinkName); dir != "." {
|
||||||
|
dirHeader := &zip.FileHeader{
|
||||||
|
Name: dir + "/",
|
||||||
|
Method: zip.Store,
|
||||||
|
}
|
||||||
|
dirHeader.SetMode(os.ModeDir | 0755)
|
||||||
|
_, err = zw.CreateHeader(dirHeader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// create symlink entry pointing outside extraction directory
|
||||||
|
// note: ZIP format stores symlinks as regular files with the target path as content
|
||||||
|
symlinkHeader := &zip.FileHeader{
|
||||||
|
Name: symlinkName,
|
||||||
|
Method: zip.Store,
|
||||||
|
}
|
||||||
|
symlinkHeader.SetMode(os.ModeSymlink | 0755)
|
||||||
|
|
||||||
|
symlinkWriter, err := zw.CreateHeader(symlinkHeader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// write the symlink target as the file content (this is how ZIP stores symlinks)
|
||||||
|
_, err = symlinkWriter.Write([]byte(symlinkTarget))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// create file entry that will be written through the symlink
|
||||||
|
if fileName != "" {
|
||||||
|
payloadContent := []byte("MALICIOUS PAYLOAD - This should NOT be written outside extraction dir!")
|
||||||
|
payloadHeader := &zip.FileHeader{
|
||||||
|
Name: fileName,
|
||||||
|
Method: zip.Deflate,
|
||||||
|
}
|
||||||
|
payloadHeader.SetMode(0644)
|
||||||
|
|
||||||
|
payloadWriter, err := zw.CreateHeader(payloadHeader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
_, err = payloadWriter.Write(payloadContent)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, zw.Close())
|
||||||
|
require.NoError(t, zipFile.Close())
|
||||||
|
|
||||||
|
return maliciousZip
|
||||||
|
}
|
||||||
|
|||||||
@ -1,229 +0,0 @@
|
|||||||
package file
|
|
||||||
|
|
||||||
import (
|
|
||||||
"archive/zip"
|
|
||||||
"encoding/binary"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"math"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/anchore/syft/internal/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
|
|
||||||
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
|
|
||||||
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
|
|
||||||
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
|
|
||||||
|
|
||||||
const (
|
|
||||||
directoryEndLen = 22
|
|
||||||
directory64LocLen = 20
|
|
||||||
directory64EndLen = 56
|
|
||||||
directory64LocSignature = 0x07064b50
|
|
||||||
directory64EndSignature = 0x06064b50
|
|
||||||
)
|
|
||||||
|
|
||||||
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
|
|
||||||
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
|
|
||||||
type ZipReadCloser struct {
|
|
||||||
*zip.Reader
|
|
||||||
io.Closer
|
|
||||||
}
|
|
||||||
|
|
||||||
// OpenZip provides a ZipReadCloser for the given filepath.
|
|
||||||
func OpenZip(filepath string) (*ZipReadCloser, error) {
|
|
||||||
f, err := os.Open(filepath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
fi, err := f.Stat()
|
|
||||||
if err != nil {
|
|
||||||
f.Close()
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
|
|
||||||
// need to find the start of the archive and keep track of this offset.
|
|
||||||
offset, err := findArchiveStartOffset(f, fi.Size())
|
|
||||||
if err != nil {
|
|
||||||
log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := f.Seek(0, io.SeekStart); err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if offset > math.MaxInt64 {
|
|
||||||
return nil, fmt.Errorf("archive start offset too large: %v", offset)
|
|
||||||
}
|
|
||||||
offset64 := int64(offset)
|
|
||||||
|
|
||||||
size := fi.Size() - offset64
|
|
||||||
|
|
||||||
r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size)
|
|
||||||
if err != nil {
|
|
||||||
log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &ZipReadCloser{
|
|
||||||
Reader: r,
|
|
||||||
Closer: f,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type readBuf []byte
|
|
||||||
|
|
||||||
func (b *readBuf) uint16() uint16 {
|
|
||||||
v := binary.LittleEndian.Uint16(*b)
|
|
||||||
*b = (*b)[2:]
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *readBuf) uint32() uint32 {
|
|
||||||
v := binary.LittleEndian.Uint32(*b)
|
|
||||||
*b = (*b)[4:]
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *readBuf) uint64() uint64 {
|
|
||||||
v := binary.LittleEndian.Uint64(*b)
|
|
||||||
*b = (*b)[8:]
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
|
|
||||||
type directoryEnd struct {
|
|
||||||
diskNbr uint32 // unused
|
|
||||||
dirDiskNbr uint32 // unused
|
|
||||||
dirRecordsThisDisk uint64 // unused
|
|
||||||
directoryRecords uint64
|
|
||||||
directorySize uint64
|
|
||||||
directoryOffset uint64 // relative to file
|
|
||||||
}
|
|
||||||
|
|
||||||
// note: this is derived from readDirectoryEnd within the archive/zip package
|
|
||||||
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
|
|
||||||
// look for directoryEndSignature in the last 1k, then in the last 65k
|
|
||||||
var buf []byte
|
|
||||||
var directoryEndOffset int64
|
|
||||||
for i, bLen := range []int64{1024, 65 * 1024} {
|
|
||||||
if bLen > size {
|
|
||||||
bLen = size
|
|
||||||
}
|
|
||||||
buf = make([]byte, int(bLen))
|
|
||||||
if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
if p := findSignatureInBlock(buf); p >= 0 {
|
|
||||||
buf = buf[p:]
|
|
||||||
directoryEndOffset = size - bLen + int64(p)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if i == 1 || bLen == size {
|
|
||||||
return 0, zip.ErrFormat
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if buf == nil {
|
|
||||||
// we were unable to find the directoryEndSignature block
|
|
||||||
return 0, zip.ErrFormat
|
|
||||||
}
|
|
||||||
|
|
||||||
// read header into struct
|
|
||||||
b := readBuf(buf[4:]) // skip signature
|
|
||||||
d := &directoryEnd{
|
|
||||||
diskNbr: uint32(b.uint16()),
|
|
||||||
dirDiskNbr: uint32(b.uint16()),
|
|
||||||
dirRecordsThisDisk: uint64(b.uint16()),
|
|
||||||
directoryRecords: uint64(b.uint16()),
|
|
||||||
directorySize: uint64(b.uint32()),
|
|
||||||
directoryOffset: uint64(b.uint32()),
|
|
||||||
}
|
|
||||||
// Calculate where the zip data actually begins
|
|
||||||
|
|
||||||
// These values mean that the file can be a zip64 file
|
|
||||||
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
|
|
||||||
p, err := findDirectory64End(r, directoryEndOffset)
|
|
||||||
if err == nil && p >= 0 {
|
|
||||||
directoryEndOffset = p
|
|
||||||
err = readDirectory64End(r, p, d)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
|
|
||||||
|
|
||||||
// Make sure directoryOffset points to somewhere in our file.
|
|
||||||
if d.directoryOffset >= uint64(size) {
|
|
||||||
return 0, zip.ErrFormat
|
|
||||||
}
|
|
||||||
return startOfArchive, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// findDirectory64End tries to read the zip64 locator just before the
|
|
||||||
// directory end and returns the offset of the zip64 directory end if
|
|
||||||
// found.
|
|
||||||
func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
|
|
||||||
locOffset := directoryEndOffset - directory64LocLen
|
|
||||||
if locOffset < 0 {
|
|
||||||
return -1, nil // no need to look for a header outside the file
|
|
||||||
}
|
|
||||||
buf := make([]byte, directory64LocLen)
|
|
||||||
if _, err := r.ReadAt(buf, locOffset); err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
b := readBuf(buf)
|
|
||||||
if sig := b.uint32(); sig != directory64LocSignature {
|
|
||||||
return -1, nil
|
|
||||||
}
|
|
||||||
if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
|
|
||||||
return -1, nil // the file is not a valid zip64-file
|
|
||||||
}
|
|
||||||
p := b.uint64() // relative offset of the zip64 end of central directory record
|
|
||||||
if b.uint32() != 1 { // total number of disks
|
|
||||||
return -1, nil // the file is not a valid zip64-file
|
|
||||||
}
|
|
||||||
return int64(p), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// readDirectory64End reads the zip64 directory end and updates the
|
|
||||||
// directory end with the zip64 directory end values.
|
|
||||||
func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
|
|
||||||
buf := make([]byte, directory64EndLen)
|
|
||||||
if _, err := r.ReadAt(buf, offset); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
b := readBuf(buf)
|
|
||||||
if sig := b.uint32(); sig != directory64EndSignature {
|
|
||||||
return errors.New("could not read directory64End")
|
|
||||||
}
|
|
||||||
|
|
||||||
b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
|
|
||||||
d.diskNbr = b.uint32() // number of this disk
|
|
||||||
d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
|
|
||||||
d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
|
|
||||||
d.directoryRecords = b.uint64() // total number of entries in the central directory
|
|
||||||
d.directorySize = b.uint64() // size of the central directory
|
|
||||||
d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func findSignatureInBlock(b []byte) int {
|
|
||||||
for i := len(b) - directoryEndLen; i >= 0; i-- {
|
|
||||||
// defined from directoryEndSignature
|
|
||||||
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
|
|
||||||
// n is length of comment
|
|
||||||
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
|
|
||||||
if n+directoryEndLen+i <= len(b) {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
//go:build !windows
|
|
||||||
// +build !windows
|
|
||||||
|
|
||||||
package file
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestFindArchiveStartOffset(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
archivePrep func(tb testing.TB) string
|
|
||||||
expected uint64
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "standard, non-nested zip",
|
|
||||||
archivePrep: prepZipSourceFixture,
|
|
||||||
expected: 0,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "zip with prepended bytes",
|
|
||||||
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
|
|
||||||
expected: 36,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.name, func(t *testing.T) {
|
|
||||||
archivePath := test.archivePrep(t)
|
|
||||||
f, err := os.Open(archivePath)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("could not open archive %q: %+v", archivePath, err)
|
|
||||||
}
|
|
||||||
fi, err := os.Stat(f.Name())
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unable to stat archive: %+v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
actual, err := findArchiveStartOffset(f, fi.Size())
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unable to find offset: %+v", err)
|
|
||||||
}
|
|
||||||
assert.Equal(t, test.expected, actual)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -27,6 +27,7 @@ func AllTypes() []any {
|
|||||||
pkg.ELFBinaryPackageNoteJSONPayload{},
|
pkg.ELFBinaryPackageNoteJSONPayload{},
|
||||||
pkg.ElixirMixLockEntry{},
|
pkg.ElixirMixLockEntry{},
|
||||||
pkg.ErlangRebarLockEntry{},
|
pkg.ErlangRebarLockEntry{},
|
||||||
|
pkg.GGUFFileHeader{},
|
||||||
pkg.GitHubActionsUseStatement{},
|
pkg.GitHubActionsUseStatement{},
|
||||||
pkg.GolangBinaryBuildinfoEntry{},
|
pkg.GolangBinaryBuildinfoEntry{},
|
||||||
pkg.GolangModuleEntry{},
|
pkg.GolangModuleEntry{},
|
||||||
|
|||||||
@ -124,6 +124,7 @@ var jsonTypes = makeJSONTypes(
|
|||||||
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
||||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||||
|
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
|
||||||
)
|
)
|
||||||
|
|
||||||
func expandLegacyNameVariants(names ...string) []string {
|
func expandLegacyNameVariants(names ...string) []string {
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package task
|
|||||||
import (
|
import (
|
||||||
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/ai"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||||
@ -178,6 +179,7 @@ func DefaultPackageTaskFactories() Factories {
|
|||||||
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
||||||
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
||||||
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
||||||
|
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
||||||
|
|
||||||
// deprecated catalogers ////////////////////////////////////////
|
// deprecated catalogers ////////////////////////////////////////
|
||||||
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
||||||
|
|||||||
@ -4,7 +4,8 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/anchore/archiver/v3"
|
"github.com/mholt/archives"
|
||||||
|
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
"github.com/anchore/syft/internal/sbomsync"
|
"github.com/anchore/syft/internal/sbomsync"
|
||||||
"github.com/anchore/syft/syft/cataloging"
|
"github.com/anchore/syft/syft/cataloging"
|
||||||
@ -57,9 +58,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if c.IncludeUnexpandedArchives {
|
if c.IncludeUnexpandedArchives {
|
||||||
|
ctx := context.Background()
|
||||||
for coords := range s.Artifacts.FileMetadata {
|
for coords := range s.Artifacts.FileMetadata {
|
||||||
unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath)
|
format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil)
|
||||||
if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
||||||
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
|
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
4193
schema/json/schema-16.0.43.json
Normal file
4193
schema/json/schema-16.0.43.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||||
"$id": "anchore.io/schema/syft/json/16.0.42/document",
|
"$id": "anchore.io/schema/syft/json/16.0.43/document",
|
||||||
"$ref": "#/$defs/Document",
|
"$ref": "#/$defs/Document",
|
||||||
"$defs": {
|
"$defs": {
|
||||||
"AlpmDbEntry": {
|
"AlpmDbEntry": {
|
||||||
@ -1433,6 +1433,48 @@
|
|||||||
],
|
],
|
||||||
"description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
|
"description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
|
||||||
},
|
},
|
||||||
|
"GgufFileHeader": {
|
||||||
|
"properties": {
|
||||||
|
"ggufVersion": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
|
||||||
|
},
|
||||||
|
"fileSize": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||||
|
},
|
||||||
|
"architecture": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
|
||||||
|
},
|
||||||
|
"quantization": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Parameters is the number of model parameters (if present in header)"
|
||||||
|
},
|
||||||
|
"tensorCount": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "TensorCount is the number of tensors in the model"
|
||||||
|
},
|
||||||
|
"header": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
|
||||||
|
},
|
||||||
|
"metadataHash": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
"required": [
|
||||||
|
"ggufVersion",
|
||||||
|
"tensorCount"
|
||||||
|
],
|
||||||
|
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
|
||||||
|
},
|
||||||
"GithubActionsUseStatement": {
|
"GithubActionsUseStatement": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"value": {
|
"value": {
|
||||||
@ -2579,6 +2621,9 @@
|
|||||||
{
|
{
|
||||||
"$ref": "#/$defs/ErlangRebarLockEntry"
|
"$ref": "#/$defs/ErlangRebarLockEntry"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/$defs/GgufFileHeader"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/$defs/GithubActionsUseStatement"
|
"$ref": "#/$defs/GithubActionsUseStatement"
|
||||||
},
|
},
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
package model
|
package model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/anchore/archiver/v3"
|
"github.com/mholt/archives"
|
||||||
|
|
||||||
"github.com/anchore/packageurl-go"
|
"github.com/anchore/packageurl-go"
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
@ -153,8 +155,8 @@ func trimRelative(s string) string {
|
|||||||
|
|
||||||
// isArchive returns true if the path appears to be an archive
|
// isArchive returns true if the path appears to be an archive
|
||||||
func isArchive(path string) bool {
|
func isArchive(path string) bool {
|
||||||
_, err := archiver.ByExtension(path)
|
format, _, err := archives.Identify(context.Background(), path, nil)
|
||||||
return err == nil
|
return err == nil && format != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) {
|
func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) {
|
||||||
|
|||||||
@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
|
|||||||
}
|
}
|
||||||
|
|
||||||
componentType := cyclonedx.ComponentTypeLibrary
|
componentType := cyclonedx.ComponentTypeLibrary
|
||||||
if p.Type == pkg.BinaryPkg {
|
switch p.Type {
|
||||||
|
case pkg.BinaryPkg:
|
||||||
componentType = cyclonedx.ComponentTypeApplication
|
componentType = cyclonedx.ComponentTypeApplication
|
||||||
|
case pkg.ModelPkg:
|
||||||
|
componentType = cyclonedx.ComponentTypeMachineLearningModel
|
||||||
}
|
}
|
||||||
|
|
||||||
return cyclonedx.Component{
|
return cyclonedx.Component{
|
||||||
|
|||||||
@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
|
|||||||
switch component.Type {
|
switch component.Type {
|
||||||
case cyclonedx.ComponentTypeOS:
|
case cyclonedx.ComponentTypeOS:
|
||||||
case cyclonedx.ComponentTypeContainer:
|
case cyclonedx.ComponentTypeContainer:
|
||||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary:
|
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
|
||||||
p := decodeComponent(component)
|
p := decodeComponent(component)
|
||||||
idMap[component.BOMRef] = p
|
idMap[component.BOMRef] = p
|
||||||
if component.BOMRef != "" {
|
if component.BOMRef != "" {
|
||||||
|
|||||||
@ -55,6 +55,7 @@ func Test_OriginatorSupplier(t *testing.T) {
|
|||||||
pkg.OpamPackage{},
|
pkg.OpamPackage{},
|
||||||
pkg.YarnLockEntry{},
|
pkg.YarnLockEntry{},
|
||||||
pkg.TerraformLockProviderEntry{},
|
pkg.TerraformLockProviderEntry{},
|
||||||
|
pkg.GGUFFileHeader{},
|
||||||
)
|
)
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
|
|||||||
answer = "acquired package info from Homebrew formula"
|
answer = "acquired package info from Homebrew formula"
|
||||||
case pkg.TerraformPkg:
|
case pkg.TerraformPkg:
|
||||||
answer = "acquired package info from Terraform dependency lock file"
|
answer = "acquired package info from Terraform dependency lock file"
|
||||||
|
case pkg.ModelPkg:
|
||||||
|
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
||||||
default:
|
default:
|
||||||
answer = "acquired package info from the following paths"
|
answer = "acquired package info from the following paths"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
|
|||||||
"acquired package info from Terraform dependency lock file",
|
"acquired package info from Terraform dependency lock file",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: pkg.Package{
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
var pkgTypes []pkg.Type
|
var pkgTypes []pkg.Type
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
/*
|
||||||
|
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
||||||
|
including support for GGUF (GPT-Generated Unified Format) model files.
|
||||||
|
*/
|
||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||||
|
func NewGGUFCataloger() pkg.Cataloger {
|
||||||
|
return generic.NewCataloger("gguf-cataloger").
|
||||||
|
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||||
|
}
|
||||||
140
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
140
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
fixture string
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "obtain gguf files",
|
||||||
|
fixture: "test-fixtures/glob-paths",
|
||||||
|
expected: []string{
|
||||||
|
"models/model.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, test.fixture).
|
||||||
|
ExpectsResolverContentQueries(test.expected).
|
||||||
|
TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(t *testing.T) string
|
||||||
|
expectedPackages []pkg.Package
|
||||||
|
expectedRelationships []artifact.Relationship
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "catalog single GGUF file",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "llama3-8b").
|
||||||
|
withStringKV("general.version", "3.0").
|
||||||
|
withStringKV("general.license", "Apache-2.0").
|
||||||
|
withStringKV("general.quantization", "Q4_K_M").
|
||||||
|
withUint64KV("general.parameter_count", 8030000000).
|
||||||
|
withStringKV("general.some_random_kv", "foobar").
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||||
|
os.WriteFile(path, data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "llama3-8b",
|
||||||
|
Version: "3.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(
|
||||||
|
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||||
|
),
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Unknown",
|
||||||
|
Parameters: 0,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 0,
|
||||||
|
MetadataKeyValuesHash: "6e3d368066455ce4",
|
||||||
|
RemainingKeyValues: map[string]interface{}{
|
||||||
|
"general.some_random_kv": "foobar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "catalog GGUF file with minimal metadata",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "gpt2").
|
||||||
|
withStringKV("general.name", "gpt2-small").
|
||||||
|
withStringKV("gpt2.context_length", "1024").
|
||||||
|
withUint32KV("gpt2.embedding_length", 768).
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "gpt2-small.gguf")
|
||||||
|
os.WriteFile(path, data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "gpt2-small",
|
||||||
|
Version: "",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(),
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
Architecture: "gpt2",
|
||||||
|
Quantization: "Unknown",
|
||||||
|
Parameters: 0,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 0,
|
||||||
|
MetadataKeyValuesHash: "9dc6f23591062a27",
|
||||||
|
RemainingKeyValues: map[string]interface{}{
|
||||||
|
"gpt2.context_length": "1024",
|
||||||
|
"gpt2.embedding_length": uint32(768),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
fixtureDir := tt.setup(t)
|
||||||
|
|
||||||
|
// Use pkgtest to catalog and compare
|
||||||
|
pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, fixtureDir).
|
||||||
|
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||||
|
IgnoreLocationLayer().
|
||||||
|
IgnorePackageFields("FoundBy", "Locations").
|
||||||
|
TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
22
syft/pkg/cataloger/ai/package.go
Normal file
22
syft/pkg/cataloger/ai/package.go
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package {
|
||||||
|
p := pkg.Package{
|
||||||
|
Name: modelName,
|
||||||
|
Version: version,
|
||||||
|
Locations: file.NewLocationSet(locations...),
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
|
||||||
|
Metadata: *metadata,
|
||||||
|
// NOTE: PURL is intentionally not set as the package-url spec
|
||||||
|
// has not yet finalized support for ML model packages
|
||||||
|
}
|
||||||
|
p.SetID()
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
121
syft/pkg/cataloger/ai/package_test.go
Normal file
121
syft/pkg/cataloger/ai/package_test.go
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewGGUFPackage(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
metadata *pkg.GGUFFileHeader
|
||||||
|
input struct {
|
||||||
|
modelName string
|
||||||
|
version string
|
||||||
|
license string
|
||||||
|
locations []file.Location
|
||||||
|
}
|
||||||
|
expected pkg.Package
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "complete GGUF package with all fields",
|
||||||
|
input: struct {
|
||||||
|
modelName string
|
||||||
|
version string
|
||||||
|
license string
|
||||||
|
locations []file.Location
|
||||||
|
}{
|
||||||
|
modelName: "llama3-8b",
|
||||||
|
version: "3.0",
|
||||||
|
license: "Apache-2.0",
|
||||||
|
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||||
|
},
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 8030000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 291,
|
||||||
|
RemainingKeyValues: map[string]any{
|
||||||
|
"general.random_kv": "foobar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: pkg.Package{
|
||||||
|
Name: "llama3-8b",
|
||||||
|
Version: "3.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(
|
||||||
|
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||||
|
),
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 8030000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 291,
|
||||||
|
RemainingKeyValues: map[string]any{
|
||||||
|
"general.random_kv": "foobar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "minimal GGUF package",
|
||||||
|
input: struct {
|
||||||
|
modelName string
|
||||||
|
version string
|
||||||
|
license string
|
||||||
|
locations []file.Location
|
||||||
|
}{
|
||||||
|
modelName: "gpt2-small",
|
||||||
|
version: "1.0",
|
||||||
|
license: "MIT",
|
||||||
|
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||||
|
},
|
||||||
|
metadata: &pkg.GGUFFileHeader{
|
||||||
|
Architecture: "gpt2",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 50,
|
||||||
|
},
|
||||||
|
expected: pkg.Package{
|
||||||
|
Name: "gpt2-small",
|
||||||
|
Version: "1.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(
|
||||||
|
pkg.NewLicenseFromFields("MIT", "", nil),
|
||||||
|
),
|
||||||
|
Metadata: pkg.GGUFFileHeader{
|
||||||
|
Architecture: "gpt2",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 50,
|
||||||
|
},
|
||||||
|
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
actual := newGGUFPackage(
|
||||||
|
tt.metadata,
|
||||||
|
tt.input.modelName,
|
||||||
|
tt.input.version,
|
||||||
|
tt.input.license,
|
||||||
|
tt.input.locations...,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Verify metadata type
|
||||||
|
_, ok := actual.Metadata.(pkg.GGUFFileHeader)
|
||||||
|
require.True(t, ok, "metadata should be GGUFFileHeader")
|
||||||
|
|
||||||
|
// Use AssertPackagesEqual for comprehensive comparison
|
||||||
|
pkgtest.AssertPackagesEqual(t, tt.expected, actual)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
63
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
63
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GGUF file format constants
|
||||||
|
const (
|
||||||
|
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
|
||||||
|
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
|
||||||
|
)
|
||||||
|
|
||||||
|
// copyHeader copies the GGUF header from the reader to the writer.
|
||||||
|
// It validates the magic number first, then copies the rest of the data.
|
||||||
|
// The reader should be wrapped with io.LimitedReader to prevent OOM issues.
|
||||||
|
func copyHeader(w io.Writer, r io.Reader) error {
|
||||||
|
// Read initial chunk to validate magic number
|
||||||
|
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||||
|
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
|
||||||
|
if _, err := io.ReadFull(r, initialBuf); err != nil {
|
||||||
|
return fmt.Errorf("failed to read GGUF header prefix: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify magic number
|
||||||
|
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
|
||||||
|
if magic != ggufMagicNumber {
|
||||||
|
return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the initial buffer to the writer
|
||||||
|
if _, err := w.Write(initialBuf); err != nil {
|
||||||
|
return fmt.Errorf("failed to write GGUF header prefix: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the rest of the header from reader to writer
|
||||||
|
// The LimitedReader will return EOF once maxHeaderSize is reached
|
||||||
|
if _, err := io.Copy(w, r); err != nil {
|
||||||
|
return fmt.Errorf("failed to copy GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper to convert gguf_parser metadata to simpler types
|
||||||
|
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
|
||||||
|
result := make(map[string]interface{})
|
||||||
|
|
||||||
|
for _, kv := range kvs {
|
||||||
|
// Skip standard fields that are extracted separately
|
||||||
|
switch kv.Key {
|
||||||
|
case "general.architecture", "general.name", "general.license",
|
||||||
|
"general.version", "general.parameter_count", "general.quantization":
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[kv.Key] = kv.Value
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
135
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
135
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/cespare/xxhash/v2"
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/internal/unknown"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
||||||
|
// This implementation only reads the header portion of the file, not the entire model.
|
||||||
|
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
|
defer internal.CloseAndLogError(reader, reader.Path())
|
||||||
|
|
||||||
|
// Create a temporary file for the library to parse
|
||||||
|
// The library requires a file path, so we create a temp file
|
||||||
|
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
tempPath := tempFile.Name()
|
||||||
|
defer os.Remove(tempPath)
|
||||||
|
|
||||||
|
// Copy and validate the GGUF file header using LimitedReader to prevent OOM
|
||||||
|
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
|
||||||
|
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
|
||||||
|
if err := copyHeader(tempFile, limitedReader); err != nil {
|
||||||
|
tempFile.Close()
|
||||||
|
return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err)
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
|
||||||
|
// Parse using gguf-parser-go with options to skip unnecessary data
|
||||||
|
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
|
||||||
|
gguf_parser.SkipLargeMetadata(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract metadata
|
||||||
|
metadata := ggufFile.Metadata()
|
||||||
|
|
||||||
|
// Extract version separately (will be set on Package.Version)
|
||||||
|
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
|
||||||
|
|
||||||
|
// Convert to syft metadata structure
|
||||||
|
syftMetadata := &pkg.GGUFFileHeader{
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
Quantization: metadata.FileTypeDescriptor,
|
||||||
|
Parameters: uint64(metadata.Parameters),
|
||||||
|
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||||
|
TensorCount: ggufFile.Header.TensorCount,
|
||||||
|
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||||
|
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
|
||||||
|
}
|
||||||
|
|
||||||
|
// If model name is not in metadata, use filename
|
||||||
|
if metadata.Name == "" {
|
||||||
|
metadata.Name = extractModelNameFromPath(reader.Path())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create package from metadata
|
||||||
|
p := newGGUFPackage(
|
||||||
|
syftMetadata,
|
||||||
|
metadata.Name,
|
||||||
|
modelVersion,
|
||||||
|
metadata.License,
|
||||||
|
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||||
|
)
|
||||||
|
|
||||||
|
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeKVMetadataHash computes a stable hash of the KV metadata for use as a global identifier
|
||||||
|
func computeKVMetadataHash(metadata gguf_parser.GGUFMetadataKVs) string {
|
||||||
|
// Sort the KV pairs by key for stable hashing
|
||||||
|
sortedKVs := make([]gguf_parser.GGUFMetadataKV, len(metadata))
|
||||||
|
copy(sortedKVs, metadata)
|
||||||
|
sort.Slice(sortedKVs, func(i, j int) bool {
|
||||||
|
return sortedKVs[i].Key < sortedKVs[j].Key
|
||||||
|
})
|
||||||
|
|
||||||
|
// Marshal sorted KVs to JSON for stable hashing
|
||||||
|
jsonBytes, err := json.Marshal(sortedKVs)
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute xxhash
|
||||||
|
hash := xxhash.Sum64(jsonBytes)
|
||||||
|
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVersion attempts to extract version from metadata KV pairs
|
||||||
|
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if kv.Key == "general.version" {
|
||||||
|
if v, ok := kv.Value.(string); ok && v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractModelNameFromPath extracts the model name from the file path
|
||||||
|
func extractModelNameFromPath(path string) string {
|
||||||
|
// Get the base filename
|
||||||
|
base := filepath.Base(path)
|
||||||
|
|
||||||
|
// Remove .gguf extension
|
||||||
|
name := strings.TrimSuffix(base, ".gguf")
|
||||||
|
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
|
||||||
|
// integrity check
|
||||||
|
var _ generic.Parser = parseGGUFModel
|
||||||
128
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
128
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GGUF type constants for test builder
|
||||||
|
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
|
||||||
|
const (
|
||||||
|
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
||||||
|
ggufTypeUint8 = 0
|
||||||
|
ggufTypeInt8 = 1
|
||||||
|
ggufTypeUint16 = 2
|
||||||
|
ggufTypeInt16 = 3
|
||||||
|
ggufTypeUint32 = 4
|
||||||
|
ggufTypeInt32 = 5
|
||||||
|
ggufTypeFloat32 = 6
|
||||||
|
ggufTypeBool = 7
|
||||||
|
ggufTypeString = 8
|
||||||
|
ggufTypeArray = 9
|
||||||
|
ggufTypeUint64 = 10
|
||||||
|
ggufTypeInt64 = 11
|
||||||
|
ggufTypeFloat64 = 12
|
||||||
|
)
|
||||||
|
|
||||||
|
// testGGUFBuilder helps build GGUF files for testing
|
||||||
|
type testGGUFBuilder struct {
|
||||||
|
buf *bytes.Buffer
|
||||||
|
version uint32
|
||||||
|
tensorCount uint64
|
||||||
|
kvPairs []testKVPair
|
||||||
|
}
|
||||||
|
|
||||||
|
type testKVPair struct {
|
||||||
|
key string
|
||||||
|
valueType uint32
|
||||||
|
value interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||||
|
return &testGGUFBuilder{
|
||||||
|
buf: new(bytes.Buffer),
|
||||||
|
version: 3,
|
||||||
|
tensorCount: 0,
|
||||||
|
kvPairs: []testKVPair{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||||
|
b.version = v
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||||
|
b.tensorCount = count
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) writeString(s string) {
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||||
|
b.buf.WriteString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) build() []byte {
|
||||||
|
// Write magic number "GGUF"
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||||
|
|
||||||
|
// Write version
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||||
|
|
||||||
|
// Write tensor count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||||
|
|
||||||
|
// Write KV count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||||
|
|
||||||
|
// Write KV pairs
|
||||||
|
for _, kv := range b.kvPairs {
|
||||||
|
// Write key
|
||||||
|
b.writeString(kv.key)
|
||||||
|
// Write value type
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||||
|
// Write value based on type
|
||||||
|
switch kv.valueType {
|
||||||
|
case ggufTypeString:
|
||||||
|
b.writeString(kv.value.(string))
|
||||||
|
case ggufTypeUint32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||||
|
case ggufTypeUint64:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||||
|
case ggufTypeUint8:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||||
|
case ggufTypeInt32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||||
|
case ggufTypeBool:
|
||||||
|
var v uint8
|
||||||
|
if kv.value.(bool) {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return b.buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildInvalidMagic creates a file with invalid magic number
|
||||||
|
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
@ -80,7 +80,7 @@ func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ f
|
|||||||
|
|
||||||
// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives
|
// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives
|
||||||
func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
|
func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
|
parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg)
|
||||||
// note: even on error, we should always run cleanup functions
|
// note: even on error, we should always run cleanup functions
|
||||||
defer cleanupFn()
|
defer cleanupFn()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -99,7 +99,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
|
|||||||
|
|
||||||
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
|
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
|
||||||
// and parse nested archives or ignore them.
|
// and parse nested archives or ignore them.
|
||||||
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
|
func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
|
||||||
// fetch the last element of the virtual path
|
// fetch the last element of the virtual path
|
||||||
virtualElements := strings.Split(reader.Path(), ":")
|
virtualElements := strings.Split(reader.Path(), ":")
|
||||||
currentFilepath := virtualElements[len(virtualElements)-1]
|
currentFilepath := virtualElements[len(virtualElements)-1]
|
||||||
@ -109,7 +109,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg
|
|||||||
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
|
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileManifest, err := intFile.NewZipFileManifest(archivePath)
|
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
|
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
|
||||||
}
|
}
|
||||||
@ -226,7 +226,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// fetch the manifest file
|
// fetch the manifest file
|
||||||
contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...)
|
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err)
|
return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err)
|
||||||
}
|
}
|
||||||
@ -387,8 +387,9 @@ type parsedPomProject struct {
|
|||||||
|
|
||||||
// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
|
// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
|
||||||
func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) {
|
func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) {
|
||||||
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
// Find the pom.properties/pom.xml if the names seem like a plausible match
|
||||||
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||||
|
projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||||
|
|
||||||
artifactsMap := j.buildArtifactsMap(properties)
|
artifactsMap := j.buildArtifactsMap(properties)
|
||||||
pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap)
|
pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap)
|
||||||
@ -519,13 +520,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
|
|||||||
var pkgs []pkg.Package
|
var pkgs []pkg.Package
|
||||||
|
|
||||||
// pom.properties
|
// pom.properties
|
||||||
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// pom.xml
|
// pom.xml
|
||||||
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -575,7 +576,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(licenseMatches) > 0 {
|
if len(licenseMatches) > 0 {
|
||||||
contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...)
|
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
|
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
|
||||||
}
|
}
|
||||||
@ -616,7 +617,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare
|
|||||||
// associating each discovered package to the given parent package.
|
// associating each discovered package to the given parent package.
|
||||||
func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
// search and parse pom.properties files & fetch the contents
|
// search and parse pom.properties files & fetch the contents
|
||||||
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
|
openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
|
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
|
||||||
}
|
}
|
||||||
@ -680,8 +681,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit
|
|||||||
return nestedPkgs, nestedRelationships, nil
|
return nestedPkgs, nestedRelationships, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
|
func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
|
||||||
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
|
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||||
}
|
}
|
||||||
@ -709,8 +710,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
|
|||||||
return propertiesByParentPath, nil
|
return propertiesByParentPath, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
|
func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
|
||||||
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
|
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -72,7 +72,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// setup parser
|
// setup parser
|
||||||
ap, cleanupFn, err := newJavaArchiveParser(
|
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||||
file.LocationReadCloser{
|
file.LocationReadCloser{
|
||||||
Location: file.NewLocation(fixture.Name()),
|
Location: file.NewLocation(fixture.Name()),
|
||||||
ReadCloser: fixture,
|
ReadCloser: fixture,
|
||||||
@ -372,7 +372,7 @@ func TestParseJar(t *testing.T) {
|
|||||||
UseNetwork: false,
|
UseNetwork: false,
|
||||||
UseMavenLocalRepository: false,
|
UseMavenLocalRepository: false,
|
||||||
}
|
}
|
||||||
parser, cleanupFn, err := newJavaArchiveParser(
|
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||||
file.LocationReadCloser{
|
file.LocationReadCloser{
|
||||||
Location: file.NewLocation(fixture.Name()),
|
Location: file.NewLocation(fixture.Name()),
|
||||||
ReadCloser: fixture,
|
ReadCloser: fixture,
|
||||||
@ -1499,7 +1499,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
|
|||||||
fixture, err := os.Open(fixturePath)
|
fixture, err := os.Open(fixturePath)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
parser, cleanupFn, err := newJavaArchiveParser(
|
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||||
file.LocationReadCloser{
|
file.LocationReadCloser{
|
||||||
Location: file.NewLocation(fixture.Name()),
|
Location: file.NewLocation(fixture.Name()),
|
||||||
ReadCloser: fixture,
|
ReadCloser: fixture,
|
||||||
@ -1636,7 +1636,7 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) {
|
|||||||
|
|
||||||
ctx := context.TODO()
|
ctx := context.TODO()
|
||||||
// setup parser
|
// setup parser
|
||||||
ap, cleanupFn, err := newJavaArchiveParser(
|
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||||
file.LocationReadCloser{
|
file.LocationReadCloser{
|
||||||
Location: file.NewLocation(fixture.Name()),
|
Location: file.NewLocation(fixture.Name()),
|
||||||
ReadCloser: fixture,
|
ReadCloser: fixture,
|
||||||
|
|||||||
@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con
|
|||||||
}
|
}
|
||||||
|
|
||||||
func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
|
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
|
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con
|
|||||||
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
|
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
|
||||||
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
|
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
|
||||||
// or archiver).
|
// or archiver).
|
||||||
fileManifest, err := intFile.NewZipFileManifest(archivePath)
|
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
|
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
37
syft/pkg/gguf.go
Normal file
37
syft/pkg/gguf.go
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
package pkg
|
||||||
|
|
||||||
|
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
|
||||||
|
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
|
||||||
|
// loading and saving of models, particularly quantized large language models.
|
||||||
|
// The Model Name, License, and Version fields have all been lifted up to be on the syft Package.
|
||||||
|
type GGUFFileHeader struct {
|
||||||
|
// GGUFVersion is the GGUF format version (e.g., 3)
|
||||||
|
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
|
||||||
|
|
||||||
|
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||||
|
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||||
|
|
||||||
|
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
|
||||||
|
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||||
|
|
||||||
|
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
|
||||||
|
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
|
||||||
|
|
||||||
|
// Parameters is the number of model parameters (if present in header)
|
||||||
|
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
|
||||||
|
|
||||||
|
// TensorCount is the number of tensors in the model
|
||||||
|
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||||
|
|
||||||
|
// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
|
||||||
|
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||||
|
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||||
|
RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||||
|
|
||||||
|
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
|
||||||
|
// This hash is computed over the complete header metadata (including the fields extracted
|
||||||
|
// into typed fields above) and provides a stable identifier for the model configuration
|
||||||
|
// across different file locations or remotes. It allows matching identical models even
|
||||||
|
// when stored in different repositories or with different filenames.
|
||||||
|
MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
|
||||||
|
}
|
||||||
@ -54,6 +54,7 @@ const (
|
|||||||
TerraformPkg Type = "terraform"
|
TerraformPkg Type = "terraform"
|
||||||
WordpressPluginPkg Type = "wordpress-plugin"
|
WordpressPluginPkg Type = "wordpress-plugin"
|
||||||
HomebrewPkg Type = "homebrew"
|
HomebrewPkg Type = "homebrew"
|
||||||
|
ModelPkg Type = "model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// AllPkgs represents all supported package types
|
// AllPkgs represents all supported package types
|
||||||
@ -98,6 +99,7 @@ var AllPkgs = []Type{
|
|||||||
TerraformPkg,
|
TerraformPkg,
|
||||||
WordpressPluginPkg,
|
WordpressPluginPkg,
|
||||||
HomebrewPkg,
|
HomebrewPkg,
|
||||||
|
ModelPkg,
|
||||||
}
|
}
|
||||||
|
|
||||||
// PackageURLType returns the PURL package type for the current package.
|
// PackageURLType returns the PURL package type for the current package.
|
||||||
|
|||||||
@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
|
|||||||
expectedTypes.Remove(string(HomebrewPkg))
|
expectedTypes.Remove(string(HomebrewPkg))
|
||||||
expectedTypes.Remove(string(TerraformPkg))
|
expectedTypes.Remove(string(TerraformPkg))
|
||||||
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
||||||
|
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
|
||||||
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
@ -4,13 +4,15 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"crypto"
|
"crypto"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/mholt/archives"
|
||||||
"github.com/opencontainers/go-digest"
|
"github.com/opencontainers/go-digest"
|
||||||
|
|
||||||
"github.com/anchore/archiver/v3"
|
|
||||||
stereoFile "github.com/anchore/stereoscope/pkg/file"
|
stereoFile "github.com/anchore/stereoscope/pkg/file"
|
||||||
intFile "github.com/anchore/syft/internal/file"
|
intFile "github.com/anchore/syft/internal/file"
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
@ -208,18 +210,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
|
|||||||
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
|
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
|
||||||
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
|
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
|
||||||
// unarchived.
|
// unarchived.
|
||||||
envelopedUnarchiver, err := archiver.ByExtension(path)
|
envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil)
|
||||||
if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok {
|
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
|
||||||
// when tar/zip files are extracted, if there are multiple entries at the same
|
|
||||||
// location, the last entry wins
|
|
||||||
// NOTE: this currently does not display any messages if an overwrite happens
|
|
||||||
switch v := unarchiver.(type) {
|
|
||||||
case *archiver.Tar:
|
|
||||||
v.OverwriteExisting = true
|
|
||||||
case *archiver.Zip:
|
|
||||||
v.OverwriteExisting = true
|
|
||||||
}
|
|
||||||
|
|
||||||
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
|
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
|
||||||
@ -246,15 +238,58 @@ func digestOfFileContents(path string) string {
|
|||||||
return di.String()
|
return di.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) {
|
func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
|
||||||
|
var cleanupFn = func() error { return nil }
|
||||||
|
archive, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err)
|
||||||
|
}
|
||||||
|
defer archive.Close()
|
||||||
|
|
||||||
tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
|
tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
|
return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanupFn := func() error {
|
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||||
|
// Protect against symlink attacks by ensuring path doesn't escape tempDir
|
||||||
|
destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if file.IsDir() {
|
||||||
|
return os.MkdirAll(destPath, file.Mode())
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
|
||||||
|
return fmt.Errorf("failed to create parent directory: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rc, err := file.Open()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open file in archive: %w", err)
|
||||||
|
}
|
||||||
|
defer rc.Close()
|
||||||
|
|
||||||
|
destFile, err := os.Create(destPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create file in destination: %w", err)
|
||||||
|
}
|
||||||
|
defer destFile.Close()
|
||||||
|
|
||||||
|
if err := destFile.Chmod(file.Mode()); err != nil {
|
||||||
|
return fmt.Errorf("failed to change mode of destination file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := io.Copy(destFile, rc); err != nil {
|
||||||
|
return fmt.Errorf("failed to copy file contents: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return tempDir, func() error {
|
||||||
return os.RemoveAll(tempDir)
|
return os.RemoveAll(tempDir)
|
||||||
}
|
}, unarchiver.Extract(context.Background(), archive, visitor)
|
||||||
|
|
||||||
return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir)
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user