diff --git a/.binny.yaml b/.binny.yaml index 69d9d3edd..033a6e90c 100644 --- a/.binny.yaml +++ b/.binny.yaml @@ -90,7 +90,7 @@ tools: # used for running all local and CI tasks - name: task version: - want: v3.45.4 + want: v3.45.5 method: github-release with: repo: go-task/task @@ -98,7 +98,7 @@ tools: # used for triggering a release - name: gh version: - want: v2.83.0 + want: v2.83.1 method: github-release with: repo: cli/cli diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 11e806fe4..bb31d3391 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -48,7 +48,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5 + uses: github/codeql-action/init@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -59,7 +59,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5 + uses: github/codeql-action/autobuild@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -73,4 +73,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5 + uses: github/codeql-action/analyze@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5 diff --git a/.gitignore b/.gitignore index e4a1f4af8..12578be80 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,5 @@ cosign.pub __pycache__/ *.py[cod] *$py.class + + diff --git a/README.md b/README.md index 52b210cb5..febec6934 100644 --- a/README.md +++ b/README.md @@ -106,8 +106,8 @@ syft -o Where the `formats` available are: - `syft-json`: Use this to get as much information out of Syft as possible! - `syft-text`: A row-oriented, human-and-machine-friendly output. -- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). -- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). +- `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). +- `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). - `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/). diff --git a/cmd/syft/internal/test/integration/catalog_packages_test.go b/cmd/syft/internal/test/integration/catalog_packages_test.go index c11be21c5..8e089c20c 100644 --- a/cmd/syft/internal/test/integration/catalog_packages_test.go +++ b/cmd/syft/internal/test/integration/catalog_packages_test.go @@ -87,6 +87,7 @@ func TestPkgCoverageImage(t *testing.T) { definedPkgs.Remove(string(pkg.TerraformPkg)) definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead definedPkgs.Remove(string(pkg.CondaPkg)) + definedPkgs.Remove(string(pkg.ModelPkg)) var cases []testCase cases = append(cases, commonTestCases...) @@ -161,6 +162,7 @@ func TestPkgCoverageDirectory(t *testing.T) { definedPkgs.Remove(string(pkg.UnknownPkg)) definedPkgs.Remove(string(pkg.CondaPkg)) definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages + definedPkgs.Remove(string(pkg.ModelPkg)) // for directory scans we should not expect to see any of the following package types definedPkgs.Remove(string(pkg.KbPkg)) diff --git a/go.mod b/go.mod index cbb025727..4ed0739c8 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,6 @@ require ( github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/acobaugh/osrelease v0.1.0 github.com/adrg/xdg v0.5.3 - github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2 @@ -90,7 +89,7 @@ require ( go.uber.org/goleak v1.3.0 go.yaml.in/yaml/v3 v3.0.4 golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b - golang.org/x/mod v0.29.0 + golang.org/x/mod v0.30.0 golang.org/x/net v0.46.0 modernc.org/sqlite v1.40.0 ) @@ -168,7 +167,6 @@ require ( github.com/goccy/go-yaml v1.18.0 github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect - github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect github.com/google/s2a-go v0.1.8 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect @@ -209,10 +207,6 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect - github.com/nwaples/rardecode v1.1.3 // indirect - github.com/nwaples/rardecode/v2 v2.2.0 // indirect - github.com/olekukonko/errors v1.1.0 // indirect - github.com/olekukonko/ll v0.1.2 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opencontainers/selinux v1.13.0 // indirect @@ -286,6 +280,11 @@ require ( modernc.org/memory v1.11.0 // indirect ) +require ( + github.com/cespare/xxhash/v2 v2.3.0 + github.com/gpustack/gguf-parser-go v0.22.1 +) + require ( cyphar.com/go-pathrs v0.2.1 // indirect github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect @@ -310,7 +309,16 @@ require ( github.com/clipperhouse/stringish v0.1.1 // indirect github.com/clipperhouse/uax29/v2 v2.2.0 // indirect github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect + github.com/henvic/httpretty v0.1.4 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/nwaples/rardecode/v2 v2.2.0 // indirect github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect + github.com/olekukonko/errors v1.1.0 // indirect + github.com/olekukonko/ll v0.1.2 // indirect + github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect + gonum.org/v1/gonum v0.15.1 // indirect ) retract ( diff --git a/go.sum b/go.sum index a02b9e998..05c2da5cd 100644 --- a/go.sum +++ b/go.sum @@ -110,8 +110,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc= -github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw= github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA= @@ -229,7 +227,6 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -480,8 +477,6 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -549,6 +544,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg= github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA= github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs= +github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E= +github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= @@ -598,6 +595,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= +github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU= +github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA= @@ -625,6 +624,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= @@ -730,9 +730,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= @@ -749,8 +751,6 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= -github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= -github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A= github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc= @@ -860,6 +860,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= +github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY= +github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0= github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik= github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= @@ -1070,8 +1072,8 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= -golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= +golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk= +golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1313,6 +1315,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= +gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= diff --git a/internal/constants.go b/internal/constants.go index 03c9318ee..16275d02f 100644 --- a/internal/constants.go +++ b/internal/constants.go @@ -3,5 +3,5 @@ package internal const ( // JSONSchemaVersion is the current schema version output by the JSON encoder // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. - JSONSchemaVersion = "16.0.42" + JSONSchemaVersion = "16.0.43" ) diff --git a/internal/file/tar_file_traversal.go b/internal/file/tar_file_traversal.go index 7d211168a..c3511a1cc 100644 --- a/internal/file/tar_file_traversal.go +++ b/internal/file/tar_file_traversal.go @@ -1,17 +1,40 @@ package file import ( + "context" "fmt" "os" "path/filepath" "github.com/bmatcuk/doublestar/v4" + "github.com/mholt/archives" - "github.com/anchore/archiver/v3" + "github.com/anchore/syft/internal" ) +// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern. +func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error { + tarReader, err := os.Open(archivePath) + if err != nil { + return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err) + } + defer internal.CloseAndLogError(tarReader, archivePath) + + format, _, err := archives.Identify(ctx, archivePath, nil) + if err != nil { + return fmt.Errorf("failed to identify tar compression format: %w", err) + } + + extractor, ok := format.(archives.Extractor) + if !ok { + return fmt.Errorf("file format does not support extraction: %s", archivePath) + } + + return extractor.Extract(ctx, tarReader, visitor) +} + // ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted. -func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) { +func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) { results := make(map[string]Opener) // don't allow for full traversal, only select traversal from given paths @@ -19,9 +42,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin return results, nil } - visitor := func(file archiver.File) error { - defer file.Close() - + visitor := func(_ context.Context, file archives.FileInfo) error { // ignore directories if file.IsDir() { return nil @@ -43,7 +64,13 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin // provides a ReadCloser. It is up to the caller to handle closing the file explicitly. defer tempFile.Close() - if err := safeCopy(tempFile, file.ReadCloser); err != nil { + packedFile, err := file.Open() + if err != nil { + return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err) + } + defer internal.CloseAndLogError(packedFile, archivePath) + + if err := safeCopy(tempFile, packedFile); err != nil { return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err) } @@ -52,7 +79,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin return nil } - return results, archiver.Walk(archivePath, visitor) + return results, TraverseFilesInTar(ctx, archivePath, visitor) } func matchesAnyGlob(name string, globs ...string) bool { diff --git a/internal/file/zip_file_manifest.go b/internal/file/zip_file_manifest.go index 346e661c6..8dcb0d2f2 100644 --- a/internal/file/zip_file_manifest.go +++ b/internal/file/zip_file_manifest.go @@ -1,10 +1,12 @@ package file import ( + "context" "os" "sort" "strings" + "github.com/mholt/archives" "github.com/scylladb/go-set/strset" "github.com/anchore/syft/internal/log" @@ -14,22 +16,25 @@ import ( type ZipFileManifest map[string]os.FileInfo // NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path. -func NewZipFileManifest(archivePath string) (ZipFileManifest, error) { - zipReader, err := OpenZip(archivePath) +func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) { + zipReader, err := os.Open(archivePath) manifest := make(ZipFileManifest) if err != nil { log.Debugf("unable to open zip archive (%s): %v", archivePath, err) return manifest, err } defer func() { - err = zipReader.Close() - if err != nil { + if err = zipReader.Close(); err != nil { log.Debugf("unable to close zip archive (%s): %+v", archivePath, err) } }() - for _, file := range zipReader.File { - manifest.Add(file.Name, file.FileInfo()) + err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error { + manifest.Add(file.NameInArchive, file.FileInfo) + return nil + }) + if err != nil { + return manifest, err } return manifest, nil } diff --git a/internal/file/zip_file_manifest_test.go b/internal/file/zip_file_manifest_test.go index 75d445228..9ebe42224 100644 --- a/internal/file/zip_file_manifest_test.go +++ b/internal/file/zip_file_manifest_test.go @@ -4,6 +4,7 @@ package file import ( + "context" "encoding/json" "os" "path" @@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) { archiveFilePath := setupZipFileTest(t, sourceDirPath, false) - actual, err := NewZipFileManifest(archiveFilePath) + actual, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) { sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source") archiveFilePath := setupZipFileTest(t, sourceDirPath, true) - actual, err := NewZipFileManifest(archiveFilePath) + actual, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) { archiveFilePath := setupZipFileTest(t, sourceDirPath, false) - z, err := NewZipFileManifest(archiveFilePath) + z, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } diff --git a/internal/file/zip_file_traversal.go b/internal/file/zip_file_traversal.go index 1b712eff5..5fc26a220 100644 --- a/internal/file/zip_file_traversal.go +++ b/internal/file/zip_file_traversal.go @@ -1,13 +1,15 @@ package file import ( - "archive/zip" "bytes" + "context" "fmt" "os" "path/filepath" "strings" + "github.com/mholt/archives" + "github.com/anchore/syft/internal/log" ) @@ -25,7 +27,7 @@ type errZipSlipDetected struct { } func (e *errZipSlipDetected) Error() string { - return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs) + return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs) } type zipTraversalRequest map[string]struct{} @@ -39,38 +41,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest { } // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern. -func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error { +func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error { request := newZipTraverseRequest(paths...) - zipReader, err := OpenZip(archivePath) + zipReader, err := os.Open(archivePath) if err != nil { return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err) } defer func() { - err = zipReader.Close() - if err != nil { + if err := zipReader.Close(); err != nil { log.Errorf("unable to close zip archive (%s): %+v", archivePath, err) } }() - for _, file := range zipReader.File { + return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error { // if no paths are given then assume that all files should be traversed if len(paths) > 0 { - if _, ok := request[file.Name]; !ok { + if _, ok := request[file.NameInArchive]; !ok { // this file path is not of interest - continue + return nil } } - if err = visitor(file); err != nil { - return err - } - } - return nil + return visitor(ctx, file) + }) } // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted. -func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) { +func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) { results := make(map[string]Opener) // don't allow for full traversal, only select traversal from given paths @@ -78,9 +76,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m return results, nil } - visitor := func(file *zip.File) error { - tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-" - + visitor := func(_ context.Context, file archives.FileInfo) error { + tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-" tempFile, err := os.CreateTemp(dir, tempfilePrefix) if err != nil { return fmt.Errorf("unable to create temp file: %w", err) @@ -92,33 +89,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } defer func() { - err := zippedFile.Close() - if err != nil { - log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err) + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) } }() - if file.FileInfo().IsDir() { - return fmt.Errorf("unable to extract directories, only files: %s", file.Name) + if file.IsDir() { + return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive) } if err := safeCopy(tempFile, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err) } - results[file.Name] = Opener{path: tempFile.Name()} + results[file.NameInArchive] = Opener{path: tempFile.Name()} return nil } - return results, TraverseFilesInZip(archivePath, visitor, paths...) + return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...) } // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path. -func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) { +func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) { results := make(map[string]string) // don't allow for full traversal, only select traversal from given paths @@ -126,37 +122,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er return results, nil } - visitor := func(file *zip.File) error { + visitor := func(_ context.Context, file archives.FileInfo) error { zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } + defer func() { + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) + } + }() - if file.FileInfo().IsDir() { - return fmt.Errorf("unable to extract directories, only files: %s", file.Name) + if file.IsDir() { + return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive) } var buffer bytes.Buffer if err := safeCopy(&buffer, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err) } - results[file.Name] = buffer.String() + results[file.NameInArchive] = buffer.String() - err = zippedFile.Close() - if err != nil { - return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) - } return nil } - return results, TraverseFilesInZip(archivePath, visitor, paths...) + return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...) } // UnzipToDir extracts a zip archive to a target directory. -func UnzipToDir(archivePath, targetDir string) error { - visitor := func(file *zip.File) error { - joinedPath, err := safeJoin(targetDir, file.Name) +func UnzipToDir(ctx context.Context, archivePath, targetDir string) error { + visitor := func(_ context.Context, file archives.FileInfo) error { + joinedPath, err := SafeJoin(targetDir, file.NameInArchive) if err != nil { return err } @@ -164,11 +161,11 @@ func UnzipToDir(archivePath, targetDir string) error { return extractSingleFile(file, joinedPath, archivePath) } - return TraverseFilesInZip(archivePath, visitor) + return TraverseFilesInZip(ctx, archivePath, visitor) } -// safeJoin ensures that any destinations do not resolve to a path above the prefix path. -func safeJoin(prefix string, dest ...string) (string, error) { +// SafeJoin ensures that any destinations do not resolve to a path above the prefix path. +func SafeJoin(prefix string, dest ...string) (string, error) { joinResult := filepath.Join(append([]string{prefix}, dest...)...) cleanJoinResult := filepath.Clean(joinResult) if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) { @@ -181,13 +178,18 @@ func safeJoin(prefix string, dest ...string) (string, error) { return joinResult, nil } -func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error { +func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error { zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } + defer func() { + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) + } + }() - if file.FileInfo().IsDir() { + if file.IsDir() { err = os.MkdirAll(expandedFilePath, file.Mode()) if err != nil { return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err) @@ -202,20 +204,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err if err != nil { return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err) } + defer func() { + if err := outputFile.Close(); err != nil { + log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err) + } + }() if err := safeCopy(outputFile, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err) - } - - err = outputFile.Close() - if err != nil { - return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err) + return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err) } } - err = zippedFile.Close() - if err != nil { - return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) - } return nil } diff --git a/internal/file/zip_file_traversal_test.go b/internal/file/zip_file_traversal_test.go index d5a81d273..812f5e450 100644 --- a/internal/file/zip_file_traversal_test.go +++ b/internal/file/zip_file_traversal_test.go @@ -4,6 +4,8 @@ package file import ( + "archive/zip" + "context" "crypto/sha256" "encoding/json" "errors" @@ -17,6 +19,7 @@ import ( "github.com/go-test/deep" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func equal(r1, r2 io.Reader) (bool, error) { @@ -55,7 +58,7 @@ func TestUnzipToDir(t *testing.T) { expectedPaths := len(expectedZipArchiveEntries) observedPaths := 0 - err = UnzipToDir(archiveFilePath, unzipDestinationDir) + err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir) if err != nil { t.Fatalf("unable to unzip archive: %+v", err) } @@ -145,7 +148,7 @@ func TestContentsFromZip(t *testing.T) { paths = append(paths, p) } - actual, err := ContentsFromZip(archivePath, paths...) + actual, err := ContentsFromZip(context.Background(), archivePath, paths...) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -307,9 +310,528 @@ func TestSafeJoin(t *testing.T) { for _, test := range tests { t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) { - actual, err := safeJoin(test.prefix, test.args...) + actual, err := SafeJoin(test.prefix, test.args...) test.errAssertion(t, err) assert.Equal(t, test.expected, actual) }) } } + +// TestSymlinkProtection demonstrates that SafeJoin protects against symlink-based +// directory traversal attacks by validating that archive entry paths cannot escape +// the extraction directory. +func TestSafeJoin_SymlinkProtection(t *testing.T) { + tests := []struct { + name string + archivePath string // Path as it would appear in the archive + expectError bool + description string + }{ + { + name: "path traversal via ../", + archivePath: "../../../outside/file.txt", + expectError: true, + description: "Archive entry with ../ trying to escape extraction dir", + }, + { + name: "absolute path symlink target", + archivePath: "../../../sensitive.txt", + expectError: true, + description: "Simulates symlink pointing outside via relative path", + }, + { + name: "safe relative path within extraction dir", + archivePath: "subdir/safe.txt", + expectError: false, + description: "Normal file path that stays within extraction directory", + }, + { + name: "safe path with internal ../", + archivePath: "dir1/../dir2/file.txt", + expectError: false, + description: "Path with ../ that still resolves within extraction dir", + }, + { + name: "deeply nested traversal", + archivePath: "../../../../../../tmp/evil.txt", + expectError: true, + description: "Multiple levels of ../ trying to escape", + }, + { + name: "single parent directory escape", + archivePath: "../", + expectError: true, + description: "Simple one-level escape attempt", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temp directories to simulate extraction scenario + tmpDir := t.TempDir() + extractDir := filepath.Join(tmpDir, "extract") + outsideDir := filepath.Join(tmpDir, "outside") + + require.NoError(t, os.MkdirAll(extractDir, 0755)) + require.NoError(t, os.MkdirAll(outsideDir, 0755)) + + // Create a file outside extraction dir that an attacker might target + outsideFile := filepath.Join(outsideDir, "sensitive.txt") + require.NoError(t, os.WriteFile(outsideFile, []byte("sensitive data"), 0644)) + + // Test SafeJoin - this is what happens when processing archive entries + result, err := SafeJoin(extractDir, tt.archivePath) + + if tt.expectError { + // Should block malicious paths + require.Error(t, err, "Expected SafeJoin to reject malicious path") + var zipSlipErr *errZipSlipDetected + assert.ErrorAs(t, err, &zipSlipErr, "Error should be errZipSlipDetected type") + assert.Empty(t, result, "Result should be empty for blocked paths") + } else { + // Should allow safe paths + require.NoError(t, err, "Expected SafeJoin to allow safe path") + assert.NotEmpty(t, result, "Result should not be empty for safe paths") + assert.True(t, strings.HasPrefix(filepath.Clean(result), filepath.Clean(extractDir)), + "Safe path should resolve within extraction directory") + } + }) + } +} + +// TestUnzipToDir_SymlinkAttacks tests UnzipToDir function with malicious ZIP archives +// containing symlink entries that attempt path traversal attacks. +// +// EXPECTED BEHAVIOR: UnzipToDir should either: +// 1. Detect and reject symlinks explicitly with a security error, OR +// 2. Extract them safely (library converts symlinks to regular files) +func TestUnzipToDir_SymlinkAttacks(t *testing.T) { + tests := []struct { + name string + symlinkName string + fileName string + errContains string + }{ + { + name: "direct symlink to outside directory", + symlinkName: "evil_link", + fileName: "evil_link/payload.txt", + errContains: "not a directory", // attempt to write through symlink leaf (which is not a directory) + }, + { + name: "directory symlink attack", + symlinkName: "safe_dir/link", + fileName: "safe_dir/link/payload.txt", + errContains: "not a directory", // attempt to write through symlink (which is not a directory) + }, + { + name: "symlink without payload file", + symlinkName: "standalone_link", + fileName: "", // no payload file + errContains: "", // no error expected, symlink without payload is safe + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + + // create outside target directory + outsideDir := filepath.Join(tempDir, "outside_target") + require.NoError(t, os.MkdirAll(outsideDir, 0755)) + + // create extraction directory + extractDir := filepath.Join(tempDir, "extract") + require.NoError(t, os.MkdirAll(extractDir, 0755)) + + maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, outsideDir, tt.fileName) + + err := UnzipToDir(context.Background(), maliciousZip, extractDir) + + // check error expectations + if tt.errContains != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errContains) + } else { + require.NoError(t, err) + } + + analyzeExtractionDirectory(t, extractDir) + + // check if payload file escaped extraction directory + if tt.fileName != "" { + maliciousFile := filepath.Join(outsideDir, filepath.Base(tt.fileName)) + checkFileOutsideExtraction(t, maliciousFile) + } + + // check if symlink was created pointing outside + symlinkPath := filepath.Join(extractDir, tt.symlinkName) + checkSymlinkCreation(t, symlinkPath, extractDir, outsideDir) + }) + } +} + +// TestContentsFromZip_SymlinkAttacks tests the ContentsFromZip function with malicious +// ZIP archives containing symlink entries. +// +// EXPECTED BEHAVIOR: ContentsFromZip should either: +// 1. Reject symlinks explicitly, OR +// 2. Return empty content for symlinks (library behavior) +// +// Though ContentsFromZip doesn't write to disk, but if symlinks are followed, it could read sensitive +// files from outside the archive. +func TestContentsFromZip_SymlinkAttacks(t *testing.T) { + tests := []struct { + name string + symlinkName string + symlinkTarget string + requestPath string + errContains string + }{ + { + name: "request symlink entry directly", + symlinkName: "evil_link", + symlinkTarget: "/etc/hosts", // attempt to read sensitive file + requestPath: "evil_link", + errContains: "", // no error expected - library returns symlink metadata + }, + { + name: "symlink in nested directory", + symlinkName: "nested/link", + symlinkTarget: "/etc/hosts", + requestPath: "nested/link", + errContains: "", // no error expected - library returns symlink metadata + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + + // create malicious ZIP with symlink entry (no payload file needed) + maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "") + + contents, err := ContentsFromZip(context.Background(), maliciousZip, tt.requestPath) + + // check error expectations + if tt.errContains != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errContains) + return + } + require.NoError(t, err) + + // verify symlink handling - library should return symlink target as content (metadata) + content, found := contents[tt.requestPath] + require.True(t, found, "symlink entry should be found in results") + + // verify symlink was NOT followed (content should be target path or empty) + if content != "" && content != tt.symlinkTarget { + // content is not empty and not the symlink target - check if actual file was read + if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil { + targetContent, readErr := os.ReadFile(tt.symlinkTarget) + if readErr == nil && string(targetContent) == content { + t.Errorf("critical issue!... symlink was FOLLOWED and external file content was read!") + t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget) + t.Logf(" content length: %d bytes", len(content)) + } + } + } + }) + } +} + +// TestExtractFromZipToUniqueTempFile_SymlinkAttacks tests the ExtractFromZipToUniqueTempFile +// function with malicious ZIP archives containing symlink entries. +// +// EXPECTED BEHAVIOR: ExtractFromZipToUniqueTempFile should either: +// 1. Reject symlinks explicitly, OR +// 2. Extract them safely (library converts to empty files, filepath.Base sanitizes names) +// +// This function uses filepath.Base() on the archive entry name for temp file prefix and +// os.CreateTemp() which creates files in the specified directory, so it should be protected. +func TestExtractFromZipToUniqueTempFile_SymlinkAttacks(t *testing.T) { + tests := []struct { + name string + symlinkName string + symlinkTarget string + requestPath string + errContains string + }{ + { + name: "extract symlink entry to temp file", + symlinkName: "evil_link", + symlinkTarget: "/etc/passwd", + requestPath: "evil_link", + errContains: "", // no error expected - library extracts symlink metadata + }, + { + name: "extract nested symlink", + symlinkName: "nested/dir/link", + symlinkTarget: "/tmp/outside", + requestPath: "nested/dir/link", + errContains: "", // no error expected + }, + { + name: "extract path traversal symlink name", + symlinkName: "../../escape", + symlinkTarget: "/tmp/outside", + requestPath: "../../escape", + errContains: "", // no error expected - filepath.Base sanitizes name + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + + maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "") + + // create temp directory for extraction + extractTempDir := filepath.Join(tempDir, "temp_extract") + require.NoError(t, os.MkdirAll(extractTempDir, 0755)) + + openers, err := ExtractFromZipToUniqueTempFile(context.Background(), maliciousZip, extractTempDir, tt.requestPath) + + // check error expectations + if tt.errContains != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errContains) + return + } + require.NoError(t, err) + + // verify symlink was extracted + opener, found := openers[tt.requestPath] + require.True(t, found, "symlink entry should be extracted") + + // verify temp file is within temp directory + tempFilePath := opener.path + cleanTempDir := filepath.Clean(extractTempDir) + cleanTempFile := filepath.Clean(tempFilePath) + require.True(t, strings.HasPrefix(cleanTempFile, cleanTempDir), + "temp file must be within temp directory: %s not in %s", cleanTempFile, cleanTempDir) + + // verify symlink was NOT followed (content should be target path or empty) + f, openErr := opener.Open() + require.NoError(t, openErr) + defer f.Close() + + content, readErr := io.ReadAll(f) + require.NoError(t, readErr) + + // check if symlink was followed (content matches actual file) + if len(content) > 0 && string(content) != tt.symlinkTarget { + if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil { + targetContent, readErr := os.ReadFile(tt.symlinkTarget) + if readErr == nil && string(targetContent) == string(content) { + t.Errorf("critical issue!... symlink was FOLLOWED and external file content was copied!") + t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget) + t.Logf(" content length: %d bytes", len(content)) + } + } + } + }) + } +} + +// forensicFindings contains the results of analyzing an extraction directory +type forensicFindings struct { + symlinksFound []forensicSymlink + regularFiles []string + directories []string + symlinkVulnerabilities []string +} + +type forensicSymlink struct { + path string + target string + escapesExtraction bool + resolvedPath string +} + +// analyzeExtractionDirectory walks the extraction directory and detects symlinks that point +// outside the extraction directory. It is silent unless vulnerabilities are found. +func analyzeExtractionDirectory(t *testing.T, extractDir string) forensicFindings { + t.Helper() + + findings := forensicFindings{} + + filepath.Walk(extractDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + // only log if there's an error walking the directory + t.Logf("Error walking %s: %v", path, err) + return nil + } + + relPath := strings.TrimPrefix(path, extractDir+"/") + if relPath == "" { + relPath = "." + } + + // use Lstat to detect symlinks without following them + linfo, lerr := os.Lstat(path) + if lerr == nil && linfo.Mode()&os.ModeSymlink != 0 { + target, _ := os.Readlink(path) + + // resolve to see where it actually points + var resolvedPath string + var escapesExtraction bool + + if filepath.IsAbs(target) { + // absolute symlink + resolvedPath = target + cleanExtractDir := filepath.Clean(extractDir) + escapesExtraction = !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) + + if escapesExtraction { + t.Errorf("critical issue!... absolute symlink created: %s → %s", relPath, target) + t.Logf(" this symlink points outside the extraction directory") + findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities, + fmt.Sprintf("absolute symlink: %s → %s", relPath, target)) + } + } else { + // relative symlink - resolve it + resolvedPath = filepath.Join(filepath.Dir(path), target) + cleanResolved := filepath.Clean(resolvedPath) + cleanExtractDir := filepath.Clean(extractDir) + + escapesExtraction = !strings.HasPrefix(cleanResolved, cleanExtractDir) + + if escapesExtraction { + t.Errorf("critical issue!... symlink escapes extraction dir: %s → %s", relPath, target) + t.Logf(" symlink resolves to: %s (outside extraction directory)", cleanResolved) + findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities, + fmt.Sprintf("relative symlink escape: %s → %s (resolves to %s)", relPath, target, cleanResolved)) + } + } + + findings.symlinksFound = append(findings.symlinksFound, forensicSymlink{ + path: relPath, + target: target, + escapesExtraction: escapesExtraction, + resolvedPath: resolvedPath, + }) + } else { + // regular file or directory - collect silently + if info.IsDir() { + findings.directories = append(findings.directories, relPath) + } else { + findings.regularFiles = append(findings.regularFiles, relPath) + } + } + return nil + }) + + return findings +} + +// checkFileOutsideExtraction checks if a file was written outside the extraction directory. +// Returns true if the file exists (vulnerability), false otherwise. Silent on success. +func checkFileOutsideExtraction(t *testing.T, filePath string) bool { + t.Helper() + + if stat, err := os.Stat(filePath); err == nil { + content, _ := os.ReadFile(filePath) + t.Errorf("critical issue!... file written OUTSIDE extraction directory!") + t.Logf(" location: %s", filePath) + t.Logf(" size: %d bytes", stat.Size()) + t.Logf(" content: %s", string(content)) + t.Logf(" ...this means an attacker can write files to arbitrary locations on the filesystem") + return true + } + // no file found outside extraction directory... + return false +} + +// checkSymlinkCreation verifies if a symlink was created at the expected path and reports +// whether it points outside the extraction directory. Silent unless a symlink is found. +func checkSymlinkCreation(t *testing.T, symlinkPath, extractDir, expectedTarget string) bool { + t.Helper() + + if linfo, err := os.Lstat(symlinkPath); err == nil { + if linfo.Mode()&os.ModeSymlink != 0 { + target, _ := os.Readlink(symlinkPath) + + if expectedTarget != "" && target == expectedTarget { + t.Errorf("critical issue!... symlink pointing outside extraction dir was created!") + t.Logf(" Symlink: %s → %s", symlinkPath, target) + return true + } + + // Check if it escapes even if target doesn't match expected + if filepath.IsAbs(target) { + cleanExtractDir := filepath.Clean(extractDir) + if !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) { + t.Errorf("critical issue!... absolute symlink escapes extraction dir!") + t.Logf(" symlink: %s → %s", symlinkPath, target) + return true + } + } + } + // if it exists but is not a symlink, that's good (attack was thwarted)... + } + + return false +} + +// createMaliciousZipWithSymlink creates a ZIP archive containing a symlink entry pointing to an arbitrary target, +// followed by a file entry that attempts to write through that symlink. +// returns the path to the created ZIP archive. +func createMaliciousZipWithSymlink(t *testing.T, tempDir, symlinkName, symlinkTarget, fileName string) string { + t.Helper() + + maliciousZip := filepath.Join(tempDir, "malicious.zip") + zipFile, err := os.Create(maliciousZip) + require.NoError(t, err) + defer zipFile.Close() + + zw := zip.NewWriter(zipFile) + + // create parent directories if the symlink is nested + if dir := filepath.Dir(symlinkName); dir != "." { + dirHeader := &zip.FileHeader{ + Name: dir + "/", + Method: zip.Store, + } + dirHeader.SetMode(os.ModeDir | 0755) + _, err = zw.CreateHeader(dirHeader) + require.NoError(t, err) + } + + // create symlink entry pointing outside extraction directory + // note: ZIP format stores symlinks as regular files with the target path as content + symlinkHeader := &zip.FileHeader{ + Name: symlinkName, + Method: zip.Store, + } + symlinkHeader.SetMode(os.ModeSymlink | 0755) + + symlinkWriter, err := zw.CreateHeader(symlinkHeader) + require.NoError(t, err) + + // write the symlink target as the file content (this is how ZIP stores symlinks) + _, err = symlinkWriter.Write([]byte(symlinkTarget)) + require.NoError(t, err) + + // create file entry that will be written through the symlink + if fileName != "" { + payloadContent := []byte("MALICIOUS PAYLOAD - This should NOT be written outside extraction dir!") + payloadHeader := &zip.FileHeader{ + Name: fileName, + Method: zip.Deflate, + } + payloadHeader.SetMode(0644) + + payloadWriter, err := zw.CreateHeader(payloadHeader) + require.NoError(t, err) + + _, err = payloadWriter.Write(payloadContent) + require.NoError(t, err) + } + + require.NoError(t, zw.Close()) + require.NoError(t, zipFile.Close()) + + return maliciousZip +} diff --git a/internal/file/zip_read_closer.go b/internal/file/zip_read_closer.go deleted file mode 100644 index fd45f52a1..000000000 --- a/internal/file/zip_read_closer.go +++ /dev/null @@ -1,229 +0,0 @@ -package file - -import ( - "archive/zip" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "os" - - "github.com/anchore/syft/internal/log" -) - -// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically: -// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go -// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go -// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function. - -const ( - directoryEndLen = 22 - directory64LocLen = 20 - directory64EndLen = 56 - directory64LocSignature = 0x07064b50 - directory64EndSignature = 0x06064b50 -) - -// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips -// that have bytes prefixed to the front of the archive (common with self-extracting jars). -type ZipReadCloser struct { - *zip.Reader - io.Closer -} - -// OpenZip provides a ZipReadCloser for the given filepath. -func OpenZip(filepath string) (*ZipReadCloser, error) { - f, err := os.Open(filepath) - if err != nil { - return nil, err - } - fi, err := f.Stat() - if err != nil { - f.Close() - return nil, err - } - - // some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first - // need to find the start of the archive and keep track of this offset. - offset, err := findArchiveStartOffset(f, fi.Size()) - if err != nil { - log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err) - return nil, err - } - - if _, err := f.Seek(0, io.SeekStart); err != nil { - return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err) - } - - if offset > math.MaxInt64 { - return nil, fmt.Errorf("archive start offset too large: %v", offset) - } - offset64 := int64(offset) - - size := fi.Size() - offset64 - - r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size) - if err != nil { - log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err) - return nil, err - } - - return &ZipReadCloser{ - Reader: r, - Closer: f, - }, nil -} - -type readBuf []byte - -func (b *readBuf) uint16() uint16 { - v := binary.LittleEndian.Uint16(*b) - *b = (*b)[2:] - return v -} - -func (b *readBuf) uint32() uint32 { - v := binary.LittleEndian.Uint32(*b) - *b = (*b)[4:] - return v -} - -func (b *readBuf) uint64() uint64 { - v := binary.LittleEndian.Uint64(*b) - *b = (*b)[8:] - return v -} - -type directoryEnd struct { - diskNbr uint32 // unused - dirDiskNbr uint32 // unused - dirRecordsThisDisk uint64 // unused - directoryRecords uint64 - directorySize uint64 - directoryOffset uint64 // relative to file -} - -// note: this is derived from readDirectoryEnd within the archive/zip package -func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) { - // look for directoryEndSignature in the last 1k, then in the last 65k - var buf []byte - var directoryEndOffset int64 - for i, bLen := range []int64{1024, 65 * 1024} { - if bLen > size { - bLen = size - } - buf = make([]byte, int(bLen)) - if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) { - return 0, err - } - if p := findSignatureInBlock(buf); p >= 0 { - buf = buf[p:] - directoryEndOffset = size - bLen + int64(p) - break - } - if i == 1 || bLen == size { - return 0, zip.ErrFormat - } - } - - if buf == nil { - // we were unable to find the directoryEndSignature block - return 0, zip.ErrFormat - } - - // read header into struct - b := readBuf(buf[4:]) // skip signature - d := &directoryEnd{ - diskNbr: uint32(b.uint16()), - dirDiskNbr: uint32(b.uint16()), - dirRecordsThisDisk: uint64(b.uint16()), - directoryRecords: uint64(b.uint16()), - directorySize: uint64(b.uint32()), - directoryOffset: uint64(b.uint32()), - } - // Calculate where the zip data actually begins - - // These values mean that the file can be a zip64 file - if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { - p, err := findDirectory64End(r, directoryEndOffset) - if err == nil && p >= 0 { - directoryEndOffset = p - err = readDirectory64End(r, p, d) - } - if err != nil { - return 0, err - } - } - startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset - - // Make sure directoryOffset points to somewhere in our file. - if d.directoryOffset >= uint64(size) { - return 0, zip.ErrFormat - } - return startOfArchive, nil -} - -// findDirectory64End tries to read the zip64 locator just before the -// directory end and returns the offset of the zip64 directory end if -// found. -func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { - locOffset := directoryEndOffset - directory64LocLen - if locOffset < 0 { - return -1, nil // no need to look for a header outside the file - } - buf := make([]byte, directory64LocLen) - if _, err := r.ReadAt(buf, locOffset); err != nil { - return -1, err - } - b := readBuf(buf) - if sig := b.uint32(); sig != directory64LocSignature { - return -1, nil - } - if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory - return -1, nil // the file is not a valid zip64-file - } - p := b.uint64() // relative offset of the zip64 end of central directory record - if b.uint32() != 1 { // total number of disks - return -1, nil // the file is not a valid zip64-file - } - return int64(p), nil -} - -// readDirectory64End reads the zip64 directory end and updates the -// directory end with the zip64 directory end values. -func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { - buf := make([]byte, directory64EndLen) - if _, err := r.ReadAt(buf, offset); err != nil { - return err - } - - b := readBuf(buf) - if sig := b.uint32(); sig != directory64EndSignature { - return errors.New("could not read directory64End") - } - - b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) - d.diskNbr = b.uint32() // number of this disk - d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory - d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk - d.directoryRecords = b.uint64() // total number of entries in the central directory - d.directorySize = b.uint64() // size of the central directory - d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number - - return nil -} - -func findSignatureInBlock(b []byte) int { - for i := len(b) - directoryEndLen; i >= 0; i-- { - // defined from directoryEndSignature - if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { - // n is length of comment - n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 - if n+directoryEndLen+i <= len(b) { - return i - } - } - } - return -1 -} diff --git a/internal/file/zip_read_closer_test.go b/internal/file/zip_read_closer_test.go deleted file mode 100644 index 349bfcc9b..000000000 --- a/internal/file/zip_read_closer_test.go +++ /dev/null @@ -1,50 +0,0 @@ -//go:build !windows -// +build !windows - -package file - -import ( - "os" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestFindArchiveStartOffset(t *testing.T) { - tests := []struct { - name string - archivePrep func(tb testing.TB) string - expected uint64 - }{ - { - name: "standard, non-nested zip", - archivePrep: prepZipSourceFixture, - expected: 0, - }, - { - name: "zip with prepended bytes", - archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."), - expected: 36, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - archivePath := test.archivePrep(t) - f, err := os.Open(archivePath) - if err != nil { - t.Fatalf("could not open archive %q: %+v", archivePath, err) - } - fi, err := os.Stat(f.Name()) - if err != nil { - t.Fatalf("unable to stat archive: %+v", err) - } - - actual, err := findArchiveStartOffset(f, fi.Size()) - if err != nil { - t.Fatalf("unable to find offset: %+v", err) - } - assert.Equal(t, test.expected, actual) - }) - } -} diff --git a/internal/packagemetadata/generated.go b/internal/packagemetadata/generated.go index d718bc6e0..7178662f7 100644 --- a/internal/packagemetadata/generated.go +++ b/internal/packagemetadata/generated.go @@ -27,6 +27,7 @@ func AllTypes() []any { pkg.ELFBinaryPackageNoteJSONPayload{}, pkg.ElixirMixLockEntry{}, pkg.ErlangRebarLockEntry{}, + pkg.GGUFFileHeader{}, pkg.GitHubActionsUseStatement{}, pkg.GolangBinaryBuildinfoEntry{}, pkg.GolangModuleEntry{}, diff --git a/internal/packagemetadata/names.go b/internal/packagemetadata/names.go index e719aa7f9..d7a1bfcf9 100644 --- a/internal/packagemetadata/names.go +++ b/internal/packagemetadata/names.go @@ -124,6 +124,7 @@ var jsonTypes = makeJSONTypes( jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"), jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"), jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"), + jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"), ) func expandLegacyNameVariants(names ...string) []string { diff --git a/internal/task/package_tasks.go b/internal/task/package_tasks.go index 79b9167ba..a50099593 100644 --- a/internal/task/package_tasks.go +++ b/internal/task/package_tasks.go @@ -3,6 +3,7 @@ package task import ( "github.com/anchore/syft/syft/cataloging/pkgcataloging" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/ai" "github.com/anchore/syft/syft/pkg/cataloger/alpine" "github.com/anchore/syft/syft/pkg/cataloger/arch" "github.com/anchore/syft/syft/pkg/cataloger/binary" @@ -178,6 +179,7 @@ func DefaultPackageTaskFactories() Factories { newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"), newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"), newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"), + newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"), // deprecated catalogers //////////////////////////////////////// // these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible) diff --git a/internal/task/unknowns_tasks.go b/internal/task/unknowns_tasks.go index 0b8959bd0..2f63ce28e 100644 --- a/internal/task/unknowns_tasks.go +++ b/internal/task/unknowns_tasks.go @@ -4,7 +4,8 @@ import ( "context" "strings" - "github.com/anchore/archiver/v3" + "github.com/mholt/archives" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/cataloging" @@ -57,9 +58,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) { } if c.IncludeUnexpandedArchives { + ctx := context.Background() for coords := range s.Artifacts.FileMetadata { - unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath) - if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) { + format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil) + if format != nil && notArchiveErr == nil && !hasPackageReference(coords) { s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged") } } diff --git a/schema/json/schema-16.0.42.json b/schema/json/schema-16.0.42.json index e27d96039..247969bc4 100644 --- a/schema/json/schema-16.0.42.json +++ b/schema/json/schema-16.0.42.json @@ -130,7 +130,8 @@ "description": "Digests contains file content hashes for integrity verification" } }, - "type": "object" + "type": "object", + "description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman." }, "ApkDbEntry": { "properties": { @@ -433,16 +434,19 @@ "CPE": { "properties": { "cpe": { - "type": "string" + "type": "string", + "description": "Value is the CPE string identifier." }, "source": { - "type": "string" + "type": "string", + "description": "Source is the source where this CPE was obtained or generated from." } }, "type": "object", "required": [ "cpe" - ] + ], + "description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases." }, "ClassifierMatch": { "properties": { @@ -747,19 +751,23 @@ "Descriptor": { "properties": { "name": { - "type": "string" + "type": "string", + "description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")." }, "version": { - "type": "string" + "type": "string", + "description": "Version is the version of the tool that generated this SBOM." }, - "configuration": true + "configuration": { + "description": "Configuration contains the tool configuration used during SBOM generation." + } }, "type": "object", "required": [ "name", "version" ], - "description": "Descriptor describes what created the document as well as surrounding metadata" + "description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation." }, "Digest": { "properties": { @@ -1285,58 +1293,71 @@ "File": { "properties": { "id": { - "type": "string" + "type": "string", + "description": "ID is a unique identifier for this file within the SBOM." }, "location": { - "$ref": "#/$defs/Coordinates" + "$ref": "#/$defs/Coordinates", + "description": "Location is the file path and layer information where this file was found." }, "metadata": { - "$ref": "#/$defs/FileMetadataEntry" + "$ref": "#/$defs/FileMetadataEntry", + "description": "Metadata contains filesystem metadata such as permissions, ownership, and file type." }, "contents": { - "type": "string" + "type": "string", + "description": "Contents is the file contents for small files." }, "digests": { "items": { "$ref": "#/$defs/Digest" }, - "type": "array" + "type": "array", + "description": "Digests contains cryptographic hashes of the file contents." }, "licenses": { "items": { "$ref": "#/$defs/FileLicense" }, - "type": "array" + "type": "array", + "description": "Licenses contains license information discovered within this file." }, "executable": { - "$ref": "#/$defs/Executable" + "$ref": "#/$defs/Executable", + "description": "Executable contains executable metadata if this file is a binary." }, "unknowns": { "items": { "type": "string" }, - "type": "array" + "type": "array", + "description": "Unknowns contains unknown fields for forward compatibility." } }, "type": "object", "required": [ "id", "location" - ] + ], + "description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages." }, "FileLicense": { "properties": { "value": { - "type": "string" + "type": "string", + "description": "Value is the raw license identifier or text as found in the file." }, "spdxExpression": { - "type": "string" + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." }, "evidence": { - "$ref": "#/$defs/FileLicenseEvidence" + "$ref": "#/$defs/FileLicenseEvidence", + "description": "Evidence contains supporting evidence for this license detection." } }, "type": "object", @@ -1344,18 +1365,22 @@ "value", "spdxExpression", "type" - ] + ], + "description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression." }, "FileLicenseEvidence": { "properties": { "confidence": { - "type": "integer" + "type": "integer", + "description": "Confidence is the confidence score for this license detection (0-100)." }, "offset": { - "type": "integer" + "type": "integer", + "description": "Offset is the byte offset where the license text starts in the file." }, "extent": { - "type": "integer" + "type": "integer", + "description": "Extent is the length of the license text in bytes." } }, "type": "object", @@ -1363,30 +1388,38 @@ "confidence", "offset", "extent" - ] + ], + "description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level." }, "FileMetadataEntry": { "properties": { "mode": { - "type": "integer" + "type": "integer", + "description": "Mode is the Unix file permission mode in octal format." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")." }, "linkDestination": { - "type": "string" + "type": "string", + "description": "LinkDestination is the target path for symbolic links." }, "userID": { - "type": "integer" + "type": "integer", + "description": "UserID is the file owner user ID." }, "groupID": { - "type": "integer" + "type": "integer", + "description": "GroupID is the file owner group ID." }, "mimeType": { - "type": "string" + "type": "string", + "description": "MIMEType is the MIME type of the file contents." }, "size": { - "type": "integer" + "type": "integer", + "description": "Size is the file size in bytes." } }, "type": "object", @@ -1397,7 +1430,8 @@ "groupID", "mimeType", "size" - ] + ], + "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file." }, "GithubActionsUseStatement": { "properties": { @@ -1545,7 +1579,8 @@ "items": { "type": "string" }, - "type": "array" + "type": "array", + "description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field." }, "JavaArchive": { "properties": { @@ -1974,28 +2009,34 @@ "License": { "properties": { "value": { - "type": "string" + "type": "string", + "description": "Value is the raw license identifier or expression as found." }, "spdxExpression": { - "type": "string" + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." }, "urls": { "items": { "type": "string" }, - "type": "array" + "type": "array", + "description": "URLs are URLs where license text or information can be found." }, "locations": { "items": { "$ref": "#/$defs/Location" }, - "type": "array" + "type": "array", + "description": "Locations are file locations where this license was discovered." }, "contents": { - "type": "string" + "type": "string", + "description": "Contents is the full license text content." } }, "type": "object", @@ -2005,7 +2046,8 @@ "type", "urls", "locations" - ] + ], + "description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations." }, "LinuxKernelArchive": { "properties": { @@ -2130,64 +2172,84 @@ "LinuxRelease": { "properties": { "prettyName": { - "type": "string" + "type": "string", + "description": "PrettyName is a human-readable operating system name with version." }, "name": { - "type": "string" + "type": "string", + "description": "Name is the operating system name without version information." }, "id": { - "type": "string" + "type": "string", + "description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")." }, "idLike": { - "$ref": "#/$defs/IDLikes" + "$ref": "#/$defs/IDLikes", + "description": "IDLike is a list of operating system IDs this distribution is similar to or derived from." }, "version": { - "type": "string" + "type": "string", + "description": "Version is the operating system version including codename if available." }, "versionID": { - "type": "string" + "type": "string", + "description": "VersionID is the operating system version number or identifier." }, "versionCodename": { - "type": "string" + "type": "string", + "description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")." }, "buildID": { - "type": "string" + "type": "string", + "description": "BuildID is a build identifier for the operating system." }, "imageID": { - "type": "string" + "type": "string", + "description": "ImageID is an identifier for container or cloud images." }, "imageVersion": { - "type": "string" + "type": "string", + "description": "ImageVersion is the version for container or cloud images." }, "variant": { - "type": "string" + "type": "string", + "description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")." }, "variantID": { - "type": "string" + "type": "string", + "description": "VariantID is the lower-case operating system variant identifier." }, "homeURL": { - "type": "string" + "type": "string", + "description": "HomeURL is the homepage URL for the operating system." }, "supportURL": { - "type": "string" + "type": "string", + "description": "SupportURL is the support or help URL for the operating system." }, "bugReportURL": { - "type": "string" + "type": "string", + "description": "BugReportURL is the bug reporting URL for the operating system." }, "privacyPolicyURL": { - "type": "string" + "type": "string", + "description": "PrivacyPolicyURL is the privacy policy URL for the operating system." }, "cpeName": { - "type": "string" + "type": "string", + "description": "CPEName is the Common Platform Enumeration name for the operating system." }, "supportEnd": { - "type": "string" + "type": "string", + "description": "SupportEnd is the end of support date or version identifier." }, "extendedSupport": { - "type": "boolean" + "type": "boolean", + "description": "ExtendedSupport indicates whether extended security or support is available." } }, - "type": "object" + "type": "object", + "description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files." }, "Location": { "properties": { @@ -2283,7 +2345,7 @@ "product_id", "kb" ], - "description": "MicrosoftKbPatch is slightly odd in how it is expected to map onto data." + "description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)." }, "NixDerivation": { "properties": { @@ -3014,7 +3076,8 @@ "type": "object", "required": [ "integrity" - ] + ], + "description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification." }, "PortageDbEntry": { "properties": { @@ -3501,22 +3564,28 @@ "Relationship": { "properties": { "parent": { - "type": "string" + "type": "string", + "description": "Parent is the ID of the parent artifact in this relationship." }, "child": { - "type": "string" + "type": "string", + "description": "Child is the ID of the child artifact in this relationship." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")." }, - "metadata": true + "metadata": { + "description": "Metadata contains additional relationship-specific metadata." + } }, "type": "object", "required": [ "parent", "child", "type" - ] + ], + "description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package." }, "RpmArchive": { "properties": { @@ -3863,17 +3932,20 @@ "Schema": { "properties": { "version": { - "type": "string" + "type": "string", + "description": "Version is the JSON schema version for this document format." }, "url": { - "type": "string" + "type": "string", + "description": "URL is the URL to the JSON schema definition document." } }, "type": "object", "required": [ "version", "url" - ] + ], + "description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format." }, "SnapEntry": { "properties": { @@ -3911,21 +3983,28 @@ "Source": { "properties": { "id": { - "type": "string" + "type": "string", + "description": "ID is a unique identifier for the analyzed source artifact." }, "name": { - "type": "string" + "type": "string", + "description": "Name is the name of the analyzed artifact (e.g., image name, directory path)." }, "version": { - "type": "string" + "type": "string", + "description": "Version is the version of the analyzed artifact (e.g., image tag)." }, "supplier": { - "type": "string" + "type": "string", + "description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")." }, - "metadata": true + "metadata": { + "description": "Metadata contains additional source-specific metadata." + } }, "type": "object", "required": [ @@ -3935,7 +4014,7 @@ "type", "metadata" ], - "description": "Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill the NTIA minimum elements." + "description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive." }, "SwiftPackageManagerLockEntry": { "properties": { diff --git a/schema/json/schema-16.0.43.json b/schema/json/schema-16.0.43.json new file mode 100644 index 000000000..00a53ffcd --- /dev/null +++ b/schema/json/schema-16.0.43.json @@ -0,0 +1,4193 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "anchore.io/schema/syft/json/16.0.43/document", + "$ref": "#/$defs/Document", + "$defs": { + "AlpmDbEntry": { + "properties": { + "basepackage": { + "type": "string", + "description": "BasePackage is the base package name this package was built from (source package in Arch build system)" + }, + "package": { + "type": "string", + "description": "Package is the package name as found in the desc file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the desc file" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture as defined in Arch architecture spec (e.g. x86_64, aarch64, or \"any\" for arch-independent packages)" + }, + "size": { + "type": "integer", + "description": "Size is the installed size in bytes" + }, + "packager": { + "type": "string", + "description": "Packager is the name and email of the person who packaged this (RFC822 format)" + }, + "url": { + "type": "string", + "description": "URL is the upstream project URL" + }, + "validation": { + "type": "string", + "description": "Validation is the validation method used for package integrity (e.g. pgp signature, sha256 checksum)" + }, + "reason": { + "type": "integer", + "description": "Reason is the installation reason tracked by pacman (0=explicitly installed by user, 1=installed as dependency)" + }, + "files": { + "items": { + "$ref": "#/$defs/AlpmFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + }, + "backup": { + "items": { + "$ref": "#/$defs/AlpmFileRecord" + }, + "type": "array", + "description": "Backup is the list of configuration files that pacman backs up before upgrades" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are virtual packages provided by this package (allows other packages to depend on capabilities rather than specific packages)" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the runtime dependencies required by this package" + } + }, + "type": "object", + "required": [ + "basepackage", + "package", + "version", + "description", + "architecture", + "size", + "packager", + "url", + "validation", + "reason", + "files", + "backup" + ], + "description": "AlpmDBEntry is a struct that represents the package data stored in the pacman flat-file stores for arch linux." + }, + "AlpmFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "type": { + "type": "string", + "description": "Type is the file type (e.g. regular file, directory, symlink)" + }, + "uid": { + "type": "string", + "description": "UID is the file owner user ID as recorded by pacman" + }, + "gid": { + "type": "string", + "description": "GID is the file owner group ID as recorded by pacman" + }, + "time": { + "type": "string", + "format": "date-time", + "description": "Time is the file modification timestamp" + }, + "size": { + "type": "string", + "description": "Size is the file size in bytes" + }, + "link": { + "type": "string", + "description": "Link is the symlink target path if this is a symlink" + }, + "digest": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array", + "description": "Digests contains file content hashes for integrity verification" + } + }, + "type": "object", + "description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman." + }, + "ApkDbEntry": { + "properties": { + "package": { + "type": "string", + "description": "Package is the package name as found in the installed file" + }, + "originPackage": { + "type": "string", + "description": "OriginPackage is the original source package name this binary was built from (used to track which aport/source built this)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is the package maintainer name and email" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the installed file" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture" + }, + "url": { + "type": "string", + "description": "URL is the upstream project URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "size": { + "type": "integer", + "description": "Size is the package archive size in bytes (.apk file size)" + }, + "installedSize": { + "type": "integer", + "description": "InstalledSize is the total size of installed files in bytes" + }, + "pullDependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the runtime dependencies required by this package" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are virtual packages provided by this package (for capability-based dependencies)" + }, + "pullChecksum": { + "type": "string", + "description": "Checksum is the package content checksum for integrity verification" + }, + "gitCommitOfApkPort": { + "type": "string", + "description": "GitCommit is the git commit hash of the APK port definition in Alpine's aports repository" + }, + "files": { + "items": { + "$ref": "#/$defs/ApkFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + } + }, + "type": "object", + "required": [ + "package", + "originPackage", + "maintainer", + "version", + "architecture", + "url", + "description", + "size", + "installedSize", + "pullDependencies", + "provides", + "pullChecksum", + "gitCommitOfApkPort", + "files" + ], + "description": "ApkDBEntry represents all captured data for the alpine linux package manager flat-file store." + }, + "ApkFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "ownerUid": { + "type": "string", + "description": "OwnerUID is the file owner user ID" + }, + "ownerGid": { + "type": "string", + "description": "OwnerGID is the file owner group ID" + }, + "permissions": { + "type": "string", + "description": "Permissions is the file permission mode string (e.g. \"0755\", \"0644\")" + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest is the file content hash for integrity verification" + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "ApkFileRecord represents a single file listing and metadata from a APK DB entry (which may have many of these file records)." + }, + "BinarySignature": { + "properties": { + "matches": { + "items": { + "$ref": "#/$defs/ClassifierMatch" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "matches" + ], + "description": "BinarySignature represents a set of matched values within a binary file." + }, + "BitnamiSbomEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the Bitnami SPDX file" + }, + "arch": { + "type": "string", + "description": "Architecture is the target CPU architecture (amd64 or arm64 in Bitnami images)" + }, + "distro": { + "type": "string", + "description": "Distro is the distribution name this package is for (base OS like debian, ubuntu, etc.)" + }, + "revision": { + "type": "string", + "description": "Revision is the Bitnami-specific package revision number (incremented for Bitnami rebuilds of same upstream version)" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the Bitnami SPDX file" + }, + "path": { + "type": "string", + "description": "Path is the installation path in the filesystem where the package is located" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files are the file paths owned by this package (tracked via SPDX relationships)" + } + }, + "type": "object", + "required": [ + "name", + "arch", + "distro", + "revision", + "version", + "path", + "files" + ], + "description": "BitnamiSBOMEntry represents all captured data from Bitnami packages described in Bitnami' SPDX files." + }, + "CConanFileEntry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConanfileEntry represents a single \"Requires\" entry from a conanfile.txt." + }, + "CConanInfoEntry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + }, + "package_id": { + "type": "string", + "description": "PackageID is a unique package variant identifier" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConaninfoEntry represents a single \"full_requires\" entry from a conaninfo.txt." + }, + "CConanLockEntry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + }, + "package_id": { + "type": "string", + "description": "PackageID is a unique package variant identifier computed from settings/options (static hash in Conan 1.x, can have collisions with complex dependency graphs)" + }, + "prev": { + "type": "string", + "description": "Prev is the previous lock entry reference for versioning" + }, + "requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Requires are the runtime package dependencies" + }, + "build_requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "BuildRequires are the build-time dependencies (e.g. cmake, compilers)" + }, + "py_requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "PythonRequires are the Python dependencies needed for Conan recipes" + }, + "options": { + "$ref": "#/$defs/KeyValues", + "description": "Options are package configuration options as key-value pairs (e.g. shared=True, fPIC=True)" + }, + "path": { + "type": "string", + "description": "Path is the filesystem path to the package in Conan cache" + }, + "context": { + "type": "string", + "description": "Context is the build context information" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConanV1LockEntry represents a single \"node\" entry from a conan.lock V1 file." + }, + "CConanLockV2Entry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + }, + "packageID": { + "type": "string", + "description": "PackageID is a unique package variant identifier (dynamic in Conan 2.0, more accurate than V1)" + }, + "username": { + "type": "string", + "description": "Username is the Conan user/organization name" + }, + "channel": { + "type": "string", + "description": "Channel is the Conan channel name indicating stability/purpose (e.g. stable, testing, experimental)" + }, + "recipeRevision": { + "type": "string", + "description": "RecipeRevision is a git-like revision hash (RREV) of the recipe" + }, + "packageRevision": { + "type": "string", + "description": "PackageRevision is a git-like revision hash of the built binary package" + }, + "timestamp": { + "type": "string", + "description": "TimeStamp is when this package was built/locked" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConanV2LockEntry represents a single \"node\" entry from a conan.lock V2 file." + }, + "CPE": { + "properties": { + "cpe": { + "type": "string", + "description": "Value is the CPE string identifier." + }, + "source": { + "type": "string", + "description": "Source is the source where this CPE was obtained or generated from." + } + }, + "type": "object", + "required": [ + "cpe" + ], + "description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases." + }, + "ClassifierMatch": { + "properties": { + "classifier": { + "type": "string" + }, + "location": { + "$ref": "#/$defs/Location" + } + }, + "type": "object", + "required": [ + "classifier", + "location" + ], + "description": "ClassifierMatch represents a single matched value within a binary file and the \"class\" name the search pattern represents." + }, + "CocoaPodfileLockEntry": { + "properties": { + "checksum": { + "type": "string", + "description": "Checksum is the SHA-1 hash of the podspec file for integrity verification (generated via `pod ipc spec ... | openssl sha1`), ensuring all team members use the same pod specification version" + } + }, + "type": "object", + "required": [ + "checksum" + ], + "description": "CocoaPodfileLockEntry represents a single entry from the \"Pods\" section of a Podfile.lock file." + }, + "CondaLink": { + "properties": { + "source": { + "type": "string", + "description": "Source is the original path where the package was extracted from cache." + }, + "type": { + "type": "integer", + "description": "Type indicates the link type (1 for hard link, 2 for soft link, 3 for copy)." + } + }, + "type": "object", + "required": [ + "source", + "type" + ], + "description": "CondaLink represents link metadata from a Conda package's link.json file describing package installation source." + }, + "CondaMetadataEntry": { + "properties": { + "arch": { + "type": "string", + "description": "Arch is the target CPU architecture for the package (e.g., \"arm64\", \"x86_64\")." + }, + "name": { + "type": "string", + "description": "Name is the package name as found in the conda-meta JSON file." + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the conda-meta JSON file." + }, + "build": { + "type": "string", + "description": "Build is the build string identifier (e.g., \"h90dfc92_1014\")." + }, + "build_number": { + "type": "integer", + "description": "BuildNumber is the sequential build number for this version." + }, + "channel": { + "type": "string", + "description": "Channel is the Conda channel URL where the package was retrieved from." + }, + "subdir": { + "type": "string", + "description": "Subdir is the subdirectory within the channel (e.g., \"osx-arm64\", \"linux-64\")." + }, + "noarch": { + "type": "string", + "description": "Noarch indicates if the package is platform-independent (e.g., \"python\", \"generic\")." + }, + "license": { + "type": "string", + "description": "License is the package license identifier." + }, + "license_family": { + "type": "string", + "description": "LicenseFamily is the general license category (e.g., \"MIT\", \"Apache\", \"GPL\")." + }, + "md5": { + "type": "string", + "description": "MD5 is the MD5 hash of the package archive." + }, + "sha256": { + "type": "string", + "description": "SHA256 is the SHA-256 hash of the package archive." + }, + "size": { + "type": "integer", + "description": "Size is the package archive size in bytes." + }, + "timestamp": { + "type": "integer", + "description": "Timestamp is the Unix timestamp when the package was built." + }, + "fn": { + "type": "string", + "description": "Filename is the original package archive filename (e.g., \"zlib-1.2.11-h90dfc92_1014.tar.bz2\")." + }, + "url": { + "type": "string", + "description": "URL is the full download URL for the package archive." + }, + "extracted_package_dir": { + "type": "string", + "description": "ExtractedPackageDir is the local cache directory where the package was extracted." + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends is the list of runtime dependencies with version constraints." + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files is the list of files installed by this package." + }, + "paths_data": { + "$ref": "#/$defs/CondaPathsData", + "description": "PathsData contains detailed file metadata from the paths.json file." + }, + "link": { + "$ref": "#/$defs/CondaLink", + "description": "Link contains installation source metadata from the link.json file." + } + }, + "type": "object", + "required": [ + "name", + "version", + "build", + "build_number" + ], + "description": "CondaMetaPackage represents metadata for a Conda package extracted from the conda-meta/*.json files." + }, + "CondaPathData": { + "properties": { + "_path": { + "type": "string", + "description": "Path is the file path relative to the Conda environment root." + }, + "path_type": { + "type": "string", + "description": "PathType indicates the link type for the file (e.g., \"hardlink\", \"softlink\", \"directory\")." + }, + "sha256": { + "type": "string", + "description": "SHA256 is the SHA-256 hash of the file contents." + }, + "sha256_in_prefix": { + "type": "string", + "description": "SHA256InPrefix is the SHA-256 hash of the file after prefix replacement during installation." + }, + "size_in_bytes": { + "type": "integer", + "description": "SizeInBytes is the file size in bytes." + } + }, + "type": "object", + "required": [ + "_path", + "path_type", + "sha256", + "sha256_in_prefix", + "size_in_bytes" + ], + "description": "CondaPathData represents metadata for a single file within a Conda package from the paths.json file." + }, + "CondaPathsData": { + "properties": { + "paths_version": { + "type": "integer", + "description": "PathsVersion is the schema version of the paths data format." + }, + "paths": { + "items": { + "$ref": "#/$defs/CondaPathData" + }, + "type": "array", + "description": "Paths is the list of file metadata entries for all files in the package." + } + }, + "type": "object", + "required": [ + "paths_version", + "paths" + ], + "description": "CondaPathsData represents the paths.json file structure from a Conda package containing file metadata." + }, + "Coordinates": { + "properties": { + "path": { + "type": "string", + "description": "RealPath is the canonical absolute form of the path accessed (all symbolic links have been followed and relative path components like '.' and '..' have been removed)." + }, + "layerID": { + "type": "string", + "description": "FileSystemID is an ID representing and entire filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank." + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "Coordinates contains the minimal information needed to describe how to find a file within any possible source object (e.g." + }, + "DartPubspec": { + "properties": { + "homepage": { + "type": "string", + "description": "Homepage is the package homepage URL" + }, + "repository": { + "type": "string", + "description": "Repository is the source code repository URL" + }, + "documentation": { + "type": "string", + "description": "Documentation is the documentation site URL" + }, + "publish_to": { + "type": "string", + "description": "PublishTo is the package repository to publish to, or \"none\" to prevent accidental publishing" + }, + "environment": { + "$ref": "#/$defs/DartPubspecEnvironment", + "description": "Environment is SDK version constraints for Dart and Flutter" + }, + "platforms": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Platforms are the supported platforms (Android, iOS, web, etc.)" + }, + "ignored_advisories": { + "items": { + "type": "string" + }, + "type": "array", + "description": "IgnoredAdvisories are the security advisories to explicitly ignore for this package" + } + }, + "type": "object", + "description": "DartPubspec is a struct that represents a package described in a pubspec.yaml file" + }, + "DartPubspecEnvironment": { + "properties": { + "sdk": { + "type": "string", + "description": "SDK is the Dart SDK version constraint (e.g. \"\u003e=2.12.0 \u003c3.0.0\")" + }, + "flutter": { + "type": "string", + "description": "Flutter is the Flutter SDK version constraint if this is a Flutter package" + } + }, + "type": "object", + "description": "DartPubspecEnvironment represents SDK version constraints from the environment section of pubspec.yaml." + }, + "DartPubspecLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the pubspec.lock file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the pubspec.lock file" + }, + "hosted_url": { + "type": "string", + "description": "HostedURL is the URL of the package repository for hosted packages (typically pub.dev, but can be custom repository identified by hosted-url). When PUB_HOSTED_URL environment variable changes, lockfile tracks the source." + }, + "vcs_url": { + "type": "string", + "description": "VcsURL is the URL of the VCS repository for git/path dependencies (for packages fetched from version control systems like Git)" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "DartPubspecLockEntry is a struct that represents a single entry found in the \"packages\" section in a Dart pubspec.lock file." + }, + "Descriptor": { + "properties": { + "name": { + "type": "string", + "description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")." + }, + "version": { + "type": "string", + "description": "Version is the version of the tool that generated this SBOM." + }, + "configuration": { + "description": "Configuration contains the tool configuration used during SBOM generation." + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation." + }, + "Digest": { + "properties": { + "algorithm": { + "type": "string", + "description": "Algorithm specifies the hash algorithm used (e.g., \"sha256\", \"md5\")." + }, + "value": { + "type": "string", + "description": "Value is the hexadecimal string representation of the hash." + } + }, + "type": "object", + "required": [ + "algorithm", + "value" + ], + "description": "Digest represents a cryptographic hash of file contents." + }, + "Document": { + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/Package" + }, + "type": "array" + }, + "artifactRelationships": { + "items": { + "$ref": "#/$defs/Relationship" + }, + "type": "array" + }, + "files": { + "items": { + "$ref": "#/$defs/File" + }, + "type": "array" + }, + "source": { + "$ref": "#/$defs/Source" + }, + "distro": { + "$ref": "#/$defs/LinuxRelease" + }, + "descriptor": { + "$ref": "#/$defs/Descriptor" + }, + "schema": { + "$ref": "#/$defs/Schema" + } + }, + "type": "object", + "required": [ + "artifacts", + "artifactRelationships", + "source", + "distro", + "descriptor", + "schema" + ], + "description": "Document represents the syft cataloging findings as a JSON document" + }, + "DotnetDepsEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the deps.json file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the deps.json file" + }, + "path": { + "type": "string", + "description": "Path is the relative path to the package within the deps structure (e.g. \"app.metrics/3.0.0\")" + }, + "sha512": { + "type": "string", + "description": "Sha512 is the SHA-512 hash of the NuGet package content WITHOUT the signed content for verification (won't match hash from NuGet API or manual calculation of .nupkg file)" + }, + "hashPath": { + "type": "string", + "description": "HashPath is the relative path to the .nupkg.sha512 hash file (e.g. \"app.metrics.3.0.0.nupkg.sha512\")" + }, + "executables": { + "patternProperties": { + ".*": { + "$ref": "#/$defs/DotnetPortableExecutableEntry" + } + }, + "type": "object", + "description": "Executables are the map of .NET Portable Executable files within this package with their version resources" + } + }, + "type": "object", + "required": [ + "name", + "version", + "path", + "sha512", + "hashPath" + ], + "description": "DotnetDepsEntry is a struct that represents a single entry found in the \"libraries\" section in a .NET [*.]deps.json file." + }, + "DotnetPackagesLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the packages.lock.json file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the packages.lock.json file" + }, + "contentHash": { + "type": "string", + "description": "ContentHash is the hash of the package content for verification" + }, + "type": { + "type": "string", + "description": "Type is the dependency type indicating how this dependency was added (Direct=explicit in project file, Transitive=pulled in by another package, Project=project reference)" + } + }, + "type": "object", + "required": [ + "name", + "version", + "contentHash", + "type" + ], + "description": "DotnetPackagesLockEntry is a struct that represents a single entry found in the \"dependencies\" section in a .NET packages.lock.json file." + }, + "DotnetPortableExecutableEntry": { + "properties": { + "assemblyVersion": { + "type": "string", + "description": "AssemblyVersion is the .NET assembly version number (strong-named version)" + }, + "legalCopyright": { + "type": "string", + "description": "LegalCopyright is the copyright notice string" + }, + "comments": { + "type": "string", + "description": "Comments are additional comments or description embedded in PE resources" + }, + "internalName": { + "type": "string", + "description": "InternalName is the internal name of the file" + }, + "companyName": { + "type": "string", + "description": "CompanyName is the company that produced the file" + }, + "productName": { + "type": "string", + "description": "ProductName is the name of the product this file is part of" + }, + "productVersion": { + "type": "string", + "description": "ProductVersion is the version of the product (may differ from AssemblyVersion)" + } + }, + "type": "object", + "required": [ + "assemblyVersion", + "legalCopyright", + "companyName", + "productName", + "productVersion" + ], + "description": "DotnetPortableExecutableEntry is a struct that represents a single entry found within \"VersionResources\" section of a .NET Portable Executable binary file." + }, + "DpkgArchiveEntry": { + "properties": { + "package": { + "type": "string", + "description": "Package is the package name as found in the status file" + }, + "source": { + "type": "string", + "description": "Source is the source package name this binary was built from (one source can produce multiple binary packages)" + }, + "version": { + "type": "string", + "description": "Version is the binary package version as found in the status file" + }, + "sourceVersion": { + "type": "string", + "description": "SourceVersion is the source package version (may differ from binary version when binNMU rebuilds occur)" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target architecture per Debian spec (specific arch like amd64/arm64, wildcard like any, architecture-independent \"all\", or \"source\" for source packages)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is the package maintainer's name and email in RFC822 format (name must come first, then email in angle brackets)" + }, + "installedSize": { + "type": "integer", + "description": "InstalledSize is the total size of installed files in kilobytes" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are the virtual packages provided by this package (allows other packages to depend on capabilities. Can include versioned provides like \"libdigest-md5-perl (= 2.55.01)\")" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the packages required for this package to function (will not be installed unless these requirements are met, creates strict ordering constraint)" + }, + "preDepends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "PreDepends are the packages that must be installed and configured BEFORE even starting installation of this package (stronger than Depends, discouraged unless absolutely necessary as it adds strict constraints for apt)" + }, + "files": { + "items": { + "$ref": "#/$defs/DpkgFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + } + }, + "type": "object", + "required": [ + "package", + "source", + "version", + "sourceVersion", + "architecture", + "maintainer", + "installedSize", + "files" + ], + "description": "DpkgArchiveEntry represents package metadata extracted from a .deb archive file." + }, + "DpkgDbEntry": { + "properties": { + "package": { + "type": "string", + "description": "Package is the package name as found in the status file" + }, + "source": { + "type": "string", + "description": "Source is the source package name this binary was built from (one source can produce multiple binary packages)" + }, + "version": { + "type": "string", + "description": "Version is the binary package version as found in the status file" + }, + "sourceVersion": { + "type": "string", + "description": "SourceVersion is the source package version (may differ from binary version when binNMU rebuilds occur)" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target architecture per Debian spec (specific arch like amd64/arm64, wildcard like any, architecture-independent \"all\", or \"source\" for source packages)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is the package maintainer's name and email in RFC822 format (name must come first, then email in angle brackets)" + }, + "installedSize": { + "type": "integer", + "description": "InstalledSize is the total size of installed files in kilobytes" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are the virtual packages provided by this package (allows other packages to depend on capabilities. Can include versioned provides like \"libdigest-md5-perl (= 2.55.01)\")" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the packages required for this package to function (will not be installed unless these requirements are met, creates strict ordering constraint)" + }, + "preDepends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "PreDepends are the packages that must be installed and configured BEFORE even starting installation of this package (stronger than Depends, discouraged unless absolutely necessary as it adds strict constraints for apt)" + }, + "files": { + "items": { + "$ref": "#/$defs/DpkgFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + } + }, + "type": "object", + "required": [ + "package", + "source", + "version", + "sourceVersion", + "architecture", + "maintainer", + "installedSize", + "files" + ], + "description": "DpkgDBEntry represents all captured data for a Debian package DB entry; available fields are described at http://manpages.ubuntu.com/manpages/xenial/man1/dpkg-query.1.html in the --showformat section." + }, + "DpkgFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest is the file content hash (typically MD5 for dpkg compatibility with legacy systems)" + }, + "isConfigFile": { + "type": "boolean", + "description": "IsConfigFile is whether this file is marked as a configuration file (dpkg will preserve user modifications during upgrades)" + } + }, + "type": "object", + "required": [ + "path", + "isConfigFile" + ], + "description": "DpkgFileRecord represents a single file attributed to a debian package." + }, + "ELFSecurityFeatures": { + "properties": { + "symbolTableStripped": { + "type": "boolean", + "description": "SymbolTableStripped indicates whether debugging symbols have been removed." + }, + "stackCanary": { + "type": "boolean", + "description": "StackCanary indicates whether stack smashing protection is enabled." + }, + "nx": { + "type": "boolean", + "description": "NoExecutable indicates whether NX (no-execute) protection is enabled for the stack." + }, + "relRO": { + "type": "string", + "description": "RelocationReadOnly indicates the RELRO protection level." + }, + "pie": { + "type": "boolean", + "description": "PositionIndependentExecutable indicates whether the binary is compiled as PIE." + }, + "dso": { + "type": "boolean", + "description": "DynamicSharedObject indicates whether the binary is a shared library." + }, + "safeStack": { + "type": "boolean", + "description": "LlvmSafeStack represents a compiler-based security mechanism that separates the stack into a safe stack for storing return addresses and other critical data, and an unsafe stack for everything else, to mitigate stack-based memory corruption errors\nsee https://clang.llvm.org/docs/SafeStack.html" + }, + "cfi": { + "type": "boolean", + "description": "ControlFlowIntegrity represents runtime checks to ensure a program's control flow adheres to the legal paths determined at compile time, thus protecting against various types of control-flow hijacking attacks\nsee https://clang.llvm.org/docs/ControlFlowIntegrity.html" + }, + "fortify": { + "type": "boolean", + "description": "ClangFortifySource is a broad suite of extensions to libc aimed at catching misuses of common library functions\nsee https://android.googlesource.com/platform//bionic/+/d192dbecf0b2a371eb127c0871f77a9caf81c4d2/docs/clang_fortify_anatomy.md" + } + }, + "type": "object", + "required": [ + "symbolTableStripped", + "nx", + "relRO", + "pie", + "dso" + ], + "description": "ELFSecurityFeatures captures security hardening and protection mechanisms in ELF binaries." + }, + "ElfBinaryPackageNoteJsonPayload": { + "properties": { + "type": { + "type": "string", + "description": "Type is the type of the package (e.g. \"rpm\", \"deb\", \"apk\", etc.)" + }, + "architecture": { + "type": "string", + "description": "Architecture of the binary package (e.g. \"amd64\", \"arm\", etc.)" + }, + "osCPE": { + "type": "string", + "description": "OSCPE is a CPE name for the OS, typically corresponding to CPE_NAME in os-release (e.g. cpe:/o:fedoraproject:fedora:33)" + }, + "os": { + "type": "string", + "description": "OS is the OS name, typically corresponding to ID in os-release (e.g. \"fedora\")" + }, + "osVersion": { + "type": "string", + "description": "osVersion is the version of the OS, typically corresponding to VERSION_ID in os-release (e.g. \"33\")" + }, + "system": { + "type": "string", + "description": "System is a context-specific name for the system that the binary package is intended to run on or a part of" + }, + "vendor": { + "type": "string", + "description": "Vendor is the individual or organization that produced the source code for the binary" + }, + "sourceRepo": { + "type": "string", + "description": "SourceRepo is the URL to the source repository for which the binary was built from" + }, + "commit": { + "type": "string", + "description": "Commit is the commit hash of the source repository for which the binary was built from" + } + }, + "type": "object", + "description": "ELFBinaryPackageNoteJSONPayload Represents metadata captured from the .note.package section of an ELF-formatted binary" + }, + "ElixirMixLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the mix.lock file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the mix.lock file" + }, + "pkgHash": { + "type": "string", + "description": "PkgHash is the outer checksum (SHA-256) of the entire Hex package tarball for integrity verification (preferred method, replaces deprecated inner checksum)" + }, + "pkgHashExt": { + "type": "string", + "description": "PkgHashExt is the extended package hash format (inner checksum is deprecated - SHA-256 of concatenated file contents excluding CHECKSUM file, now replaced by outer checksum)" + } + }, + "type": "object", + "required": [ + "name", + "version", + "pkgHash", + "pkgHashExt" + ], + "description": "ElixirMixLockEntry is a struct that represents a single entry in a mix.lock file" + }, + "ErlangRebarLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the rebar.lock file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the rebar.lock file" + }, + "pkgHash": { + "type": "string", + "description": "PkgHash is the outer checksum (SHA-256) of the entire Hex package tarball for integrity verification (preferred method over deprecated inner checksum)" + }, + "pkgHashExt": { + "type": "string", + "description": "PkgHashExt is the extended package hash format (inner checksum deprecated - was SHA-256 of concatenated file contents)" + } + }, + "type": "object", + "required": [ + "name", + "version", + "pkgHash", + "pkgHashExt" + ], + "description": "ErlangRebarLockEntry represents a single package entry from the \"deps\" section within an Erlang rebar.lock file." + }, + "Executable": { + "properties": { + "format": { + "type": "string", + "description": "Format denotes either ELF, Mach-O, or PE" + }, + "hasExports": { + "type": "boolean", + "description": "HasExports indicates whether the binary exports symbols." + }, + "hasEntrypoint": { + "type": "boolean", + "description": "HasEntrypoint indicates whether the binary has an entry point function." + }, + "importedLibraries": { + "items": { + "type": "string" + }, + "type": "array", + "description": "ImportedLibraries lists the shared libraries required by this executable." + }, + "elfSecurityFeatures": { + "$ref": "#/$defs/ELFSecurityFeatures", + "description": "ELFSecurityFeatures contains ELF-specific security hardening information when Format is ELF." + } + }, + "type": "object", + "required": [ + "format", + "hasExports", + "hasEntrypoint", + "importedLibraries" + ], + "description": "Executable contains metadata about binary files and their security features." + }, + "File": { + "properties": { + "id": { + "type": "string", + "description": "ID is a unique identifier for this file within the SBOM." + }, + "location": { + "$ref": "#/$defs/Coordinates", + "description": "Location is the file path and layer information where this file was found." + }, + "metadata": { + "$ref": "#/$defs/FileMetadataEntry", + "description": "Metadata contains filesystem metadata such as permissions, ownership, and file type." + }, + "contents": { + "type": "string", + "description": "Contents is the file contents for small files." + }, + "digests": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array", + "description": "Digests contains cryptographic hashes of the file contents." + }, + "licenses": { + "items": { + "$ref": "#/$defs/FileLicense" + }, + "type": "array", + "description": "Licenses contains license information discovered within this file." + }, + "executable": { + "$ref": "#/$defs/Executable", + "description": "Executable contains executable metadata if this file is a binary." + }, + "unknowns": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Unknowns contains unknown fields for forward compatibility." + } + }, + "type": "object", + "required": [ + "id", + "location" + ], + "description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages." + }, + "FileLicense": { + "properties": { + "value": { + "type": "string", + "description": "Value is the raw license identifier or text as found in the file." + }, + "spdxExpression": { + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." + }, + "type": { + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." + }, + "evidence": { + "$ref": "#/$defs/FileLicenseEvidence", + "description": "Evidence contains supporting evidence for this license detection." + } + }, + "type": "object", + "required": [ + "value", + "spdxExpression", + "type" + ], + "description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression." + }, + "FileLicenseEvidence": { + "properties": { + "confidence": { + "type": "integer", + "description": "Confidence is the confidence score for this license detection (0-100)." + }, + "offset": { + "type": "integer", + "description": "Offset is the byte offset where the license text starts in the file." + }, + "extent": { + "type": "integer", + "description": "Extent is the length of the license text in bytes." + } + }, + "type": "object", + "required": [ + "confidence", + "offset", + "extent" + ], + "description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level." + }, + "FileMetadataEntry": { + "properties": { + "mode": { + "type": "integer", + "description": "Mode is the Unix file permission mode in octal format." + }, + "type": { + "type": "string", + "description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")." + }, + "linkDestination": { + "type": "string", + "description": "LinkDestination is the target path for symbolic links." + }, + "userID": { + "type": "integer", + "description": "UserID is the file owner user ID." + }, + "groupID": { + "type": "integer", + "description": "GroupID is the file owner group ID." + }, + "mimeType": { + "type": "string", + "description": "MIMEType is the MIME type of the file contents." + }, + "size": { + "type": "integer", + "description": "Size is the file size in bytes." + } + }, + "type": "object", + "required": [ + "mode", + "type", + "userID", + "groupID", + "mimeType", + "size" + ], + "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file." + }, + "GgufFileHeader": { + "properties": { + "ggufVersion": { + "type": "integer", + "description": "GGUFVersion is the GGUF format version (e.g., 3)" + }, + "fileSize": { + "type": "integer", + "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)" + }, + "architecture": { + "type": "string", + "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")" + }, + "quantization": { + "type": "string", + "description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")" + }, + "parameters": { + "type": "integer", + "description": "Parameters is the number of model parameters (if present in header)" + }, + "tensorCount": { + "type": "integer", + "description": "TensorCount is the number of tensors in the model" + }, + "header": { + "type": "object", + "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication." + }, + "metadataHash": { + "type": "string", + "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames." + } + }, + "type": "object", + "required": [ + "ggufVersion", + "tensorCount" + ], + "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file." + }, + "GithubActionsUseStatement": { + "properties": { + "value": { + "type": "string", + "description": "Value is the action reference (e.g. \"actions/checkout@v3\")" + }, + "comment": { + "type": "string", + "description": "Comment is the inline comment associated with this uses statement" + } + }, + "type": "object", + "required": [ + "value" + ], + "description": "GitHubActionsUseStatement represents a single 'uses' statement in a GitHub Actions workflow file referencing an action or reusable workflow." + }, + "GoModuleBuildinfoEntry": { + "properties": { + "goBuildSettings": { + "$ref": "#/$defs/KeyValues", + "description": "BuildSettings contains the Go build settings and flags used to compile the binary (e.g., GOARCH, GOOS, CGO_ENABLED)." + }, + "goCompiledVersion": { + "type": "string", + "description": "GoCompiledVersion is the version of Go used to compile the binary." + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture for the binary (extracted from GOARCH build setting)." + }, + "h1Digest": { + "type": "string", + "description": "H1Digest is the Go module hash in h1: format for the main module from go.sum." + }, + "mainModule": { + "type": "string", + "description": "MainModule is the main module path for the binary (e.g., \"github.com/anchore/syft\")." + }, + "goCryptoSettings": { + "items": { + "type": "string" + }, + "type": "array", + "description": "GoCryptoSettings contains FIPS and cryptographic configuration settings if present." + }, + "goExperiments": { + "items": { + "type": "string" + }, + "type": "array", + "description": "GoExperiments lists experimental Go features enabled during compilation (e.g., \"arenas\", \"cgocheck2\")." + } + }, + "type": "object", + "required": [ + "goCompiledVersion", + "architecture" + ], + "description": "GolangBinaryBuildinfoEntry represents all captured data for a Golang binary" + }, + "GoModuleEntry": { + "properties": { + "h1Digest": { + "type": "string", + "description": "H1Digest is the Go module hash in h1: format from go.sum for verifying module contents." + } + }, + "type": "object", + "description": "GolangModuleEntry represents all captured data for a Golang source scan with go.mod/go.sum" + }, + "GoSourceEntry": { + "properties": { + "h1Digest": { + "type": "string", + "description": "H1Digest is the Go module hash in h1: format from go.sum for verifying module contents." + }, + "os": { + "type": "string", + "description": "OperatingSystem is the target OS for build constraints (e.g., \"linux\", \"darwin\", \"windows\")." + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture for build constraints (e.g., \"amd64\", \"arm64\")." + }, + "buildTags": { + "type": "string", + "description": "BuildTags are the build tags used to conditionally compile code (e.g., \"integration,debug\")." + }, + "cgoEnabled": { + "type": "boolean", + "description": "CgoEnabled indicates whether CGO was enabled for this package." + } + }, + "type": "object", + "required": [ + "cgoEnabled" + ], + "description": "GolangSourceEntry represents all captured data for a Golang package found through source analysis" + }, + "HaskellHackageStackEntry": { + "properties": { + "pkgHash": { + "type": "string", + "description": "PkgHash is the package content hash for verification" + } + }, + "type": "object", + "description": "HackageStackYamlEntry represents a single entry from the \"extra-deps\" section of a stack.yaml file." + }, + "HaskellHackageStackLockEntry": { + "properties": { + "pkgHash": { + "type": "string", + "description": "PkgHash is the package content hash for verification" + }, + "snapshotURL": { + "type": "string", + "description": "SnapshotURL is the URL to the Stack snapshot this package came from" + } + }, + "type": "object", + "description": "HackageStackYamlLockEntry represents a single entry from the \"packages\" section of a stack.yaml.lock file." + }, + "HomebrewFormula": { + "properties": { + "tap": { + "type": "string", + "description": "Tap is Homebrew tap this formula belongs to (e.g. \"homebrew/core\")" + }, + "homepage": { + "type": "string", + "description": "Homepage is the upstream project homepage URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable formula description" + } + }, + "type": "object", + "description": "HomebrewFormula represents metadata about a Homebrew formula package extracted from formula JSON files." + }, + "IDLikes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field." + }, + "JavaArchive": { + "properties": { + "virtualPath": { + "type": "string", + "description": "VirtualPath is path within the archive hierarchy, where nested entries are delimited with ':' (for nested JARs)" + }, + "manifest": { + "$ref": "#/$defs/JavaManifest", + "description": "Manifest is parsed META-INF/MANIFEST.MF contents" + }, + "pomProperties": { + "$ref": "#/$defs/JavaPomProperties", + "description": "PomProperties is parsed pom.properties file contents" + }, + "pomProject": { + "$ref": "#/$defs/JavaPomProject", + "description": "PomProject is parsed pom.xml file contents" + }, + "digest": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array", + "description": "ArchiveDigests is cryptographic hashes of the archive file" + } + }, + "type": "object", + "required": [ + "virtualPath" + ], + "description": "JavaArchive encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship." + }, + "JavaJvmInstallation": { + "properties": { + "release": { + "$ref": "#/$defs/JavaVMRelease", + "description": "Release is JVM release information and version details" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files are the list of files that are part of this JVM installation" + } + }, + "type": "object", + "required": [ + "release", + "files" + ], + "description": "JavaVMInstallation represents a Java Virtual Machine installation discovered on the system with its release information and file list." + }, + "JavaManifest": { + "properties": { + "main": { + "$ref": "#/$defs/KeyValues", + "description": "Main is main manifest attributes as key-value pairs" + }, + "sections": { + "items": { + "$ref": "#/$defs/KeyValues" + }, + "type": "array", + "description": "Sections are the named sections from the manifest (e.g. per-entry attributes)" + } + }, + "type": "object", + "description": "JavaManifest represents the fields of interest extracted from a Java archive's META-INF/MANIFEST.MF file." + }, + "JavaPomParent": { + "properties": { + "groupId": { + "type": "string", + "description": "GroupID is the parent Maven group identifier" + }, + "artifactId": { + "type": "string", + "description": "ArtifactID is the parent Maven artifact identifier" + }, + "version": { + "type": "string", + "description": "Version is the parent version (child inherits configuration from this specific version of parent POM)" + } + }, + "type": "object", + "required": [ + "groupId", + "artifactId", + "version" + ], + "description": "JavaPomParent contains the fields within the \u003cparent\u003e tag in a pom.xml file" + }, + "JavaPomProject": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the pom.xml file within the archive" + }, + "parent": { + "$ref": "#/$defs/JavaPomParent", + "description": "Parent is the parent POM reference for inheritance (child POMs inherit configuration from parent)" + }, + "groupId": { + "type": "string", + "description": "GroupID is Maven group identifier (reversed domain name like org.apache.maven)" + }, + "artifactId": { + "type": "string", + "description": "ArtifactID is Maven artifact identifier (project name)" + }, + "version": { + "type": "string", + "description": "Version is project version (together with groupId and artifactId forms Maven coordinates groupId:artifactId:version)" + }, + "name": { + "type": "string", + "description": "Name is a human-readable project name (displayed in Maven-generated documentation)" + }, + "description": { + "type": "string", + "description": "Description is detailed project description" + }, + "url": { + "type": "string", + "description": "URL is the project URL (typically project website or repository)" + } + }, + "type": "object", + "required": [ + "path", + "groupId", + "artifactId", + "version", + "name" + ], + "description": "JavaPomProject represents fields of interest extracted from a Java archive's pom.xml file." + }, + "JavaPomProperties": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the pom.properties file within the archive" + }, + "name": { + "type": "string", + "description": "Name is the project name" + }, + "groupId": { + "type": "string", + "description": "GroupID is Maven group identifier uniquely identifying the project across all projects (follows reversed domain name convention like com.company.project)" + }, + "artifactId": { + "type": "string", + "description": "ArtifactID is Maven artifact identifier, the name of the jar/artifact (unique within the groupId scope)" + }, + "version": { + "type": "string", + "description": "Version is artifact version" + }, + "scope": { + "type": "string", + "description": "Scope is dependency scope determining when dependency is available (compile=default all phases, test=test compilation/execution only, runtime=runtime and test not compile, provided=expected from JDK or container)" + }, + "extraFields": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Extra is additional custom properties not in standard Maven coordinates" + } + }, + "type": "object", + "required": [ + "path", + "name", + "groupId", + "artifactId", + "version" + ], + "description": "JavaPomProperties represents the fields of interest extracted from a Java archive's pom.properties file." + }, + "JavaVMRelease": { + "properties": { + "implementor": { + "type": "string", + "description": "Implementor is extracted with the `java.vendor` JVM property" + }, + "implementorVersion": { + "type": "string", + "description": "ImplementorVersion is extracted with the `java.vendor.version` JVM property" + }, + "javaRuntimeVersion": { + "type": "string", + "description": "JavaRuntimeVersion is extracted from the 'java.runtime.version' JVM property" + }, + "javaVersion": { + "type": "string", + "description": "JavaVersion matches that from `java -version` command output" + }, + "javaVersionDate": { + "type": "string", + "description": "JavaVersionDate is extracted from the 'java.version.date' JVM property" + }, + "libc": { + "type": "string", + "description": "Libc can either be 'glibc' or 'musl'" + }, + "modules": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Modules is a list of JVM modules that are packaged" + }, + "osArch": { + "type": "string", + "description": "OsArch is the target CPU architecture" + }, + "osName": { + "type": "string", + "description": "OsName is the name of the target runtime operating system environment" + }, + "osVersion": { + "type": "string", + "description": "OsVersion is the version of the target runtime operating system environment" + }, + "source": { + "type": "string", + "description": "Source refers to the origin repository of OpenJDK source" + }, + "buildSource": { + "type": "string", + "description": "BuildSource Git SHA of the build repository" + }, + "buildSourceRepo": { + "type": "string", + "description": "BuildSourceRepo refers to rhe repository URL for the build source" + }, + "sourceRepo": { + "type": "string", + "description": "SourceRepo refers to the OpenJDK repository URL" + }, + "fullVersion": { + "type": "string", + "description": "FullVersion is extracted from the 'java.runtime.version' JVM property" + }, + "semanticVersion": { + "type": "string", + "description": "SemanticVersion is derived from the OpenJDK version" + }, + "buildInfo": { + "type": "string", + "description": "BuildInfo contains additional build information" + }, + "jvmVariant": { + "type": "string", + "description": "JvmVariant specifies the JVM variant (e.g., Hotspot or OpenJ9)" + }, + "jvmVersion": { + "type": "string", + "description": "JvmVersion is extracted from the 'java.vm.version' JVM property" + }, + "imageType": { + "type": "string", + "description": "ImageType can be 'JDK' or 'JRE'" + }, + "buildType": { + "type": "string", + "description": "BuildType can be 'commercial' (used in some older oracle JDK distributions)" + } + }, + "type": "object", + "description": "JavaVMRelease represents JVM version and build information extracted from the release file in a Java installation." + }, + "JavascriptNpmPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in package.json" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in package.json" + }, + "author": { + "type": "string", + "description": "Author is package author name" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "url": { + "type": "string", + "description": "URL is repository or project URL" + }, + "private": { + "type": "boolean", + "description": "Private is whether this is a private package" + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "homepage", + "description", + "url", + "private" + ], + "description": "NpmPackage represents the contents of a javascript package.json file." + }, + "JavascriptNpmPackageLockEntry": { + "properties": { + "resolved": { + "type": "string", + "description": "Resolved is URL where this package was downloaded from (registry source)" + }, + "integrity": { + "type": "string", + "description": "Integrity is Subresource Integrity hash for verification using standard SRI format (sha512-... or sha1-...). npm changed from SHA-1 to SHA-512 in newer versions. For registry sources this is the integrity from registry, for remote tarballs it's SHA-512 of the file. npm verifies tarball matches this hash before unpacking, throwing EINTEGRITY error if mismatch detected." + }, + "dependencies": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Dependencies is a map of dependencies and their version markers, i.e. \"lodash\": \"^1.0.0\"" + } + }, + "type": "object", + "required": [ + "resolved", + "integrity", + "dependencies" + ], + "description": "NpmPackageLockEntry represents a single entry within the \"packages\" section of a package-lock.json file." + }, + "JavascriptPnpmLockEntry": { + "properties": { + "resolution": { + "$ref": "#/$defs/PnpmLockResolution", + "description": "Resolution is the resolution information for the package" + }, + "dependencies": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Dependencies is a map of dependencies and their versions" + } + }, + "type": "object", + "required": [ + "resolution", + "dependencies" + ], + "description": "PnpmLockEntry represents a single entry in the \"packages\" section of a pnpm-lock.yaml file." + }, + "JavascriptYarnLockEntry": { + "properties": { + "resolved": { + "type": "string", + "description": "Resolved is URL where this package was downloaded from" + }, + "integrity": { + "type": "string", + "description": "Integrity is Subresource Integrity hash for verification (SRI format)" + }, + "dependencies": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Dependencies is a map of dependencies and their versions" + } + }, + "type": "object", + "required": [ + "resolved", + "integrity", + "dependencies" + ], + "description": "YarnLockEntry represents a single entry section of a yarn.lock file." + }, + "KeyValue": { + "properties": { + "key": { + "type": "string", + "description": "Key is the key name" + }, + "value": { + "type": "string", + "description": "Value is the value associated with the key" + } + }, + "type": "object", + "required": [ + "key", + "value" + ], + "description": "KeyValue represents a single key-value pair." + }, + "KeyValues": { + "items": { + "$ref": "#/$defs/KeyValue" + }, + "type": "array", + "description": "KeyValues represents an ordered collection of key-value pairs that preserves insertion order." + }, + "License": { + "properties": { + "value": { + "type": "string", + "description": "Value is the raw license identifier or expression as found." + }, + "spdxExpression": { + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." + }, + "type": { + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." + }, + "urls": { + "items": { + "type": "string" + }, + "type": "array", + "description": "URLs are URLs where license text or information can be found." + }, + "locations": { + "items": { + "$ref": "#/$defs/Location" + }, + "type": "array", + "description": "Locations are file locations where this license was discovered." + }, + "contents": { + "type": "string", + "description": "Contents is the full license text content." + } + }, + "type": "object", + "required": [ + "value", + "spdxExpression", + "type", + "urls", + "locations" + ], + "description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations." + }, + "LinuxKernelArchive": { + "properties": { + "name": { + "type": "string", + "description": "Name is kernel name (typically \"Linux\")" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture" + }, + "version": { + "type": "string", + "description": "Version is kernel version string" + }, + "extendedVersion": { + "type": "string", + "description": "ExtendedVersion is additional version information" + }, + "buildTime": { + "type": "string", + "description": "BuildTime is when the kernel was built" + }, + "author": { + "type": "string", + "description": "Author is who built the kernel" + }, + "format": { + "type": "string", + "description": "Format is kernel image format (e.g. bzImage, zImage)" + }, + "rwRootFS": { + "type": "boolean", + "description": "RWRootFS is whether root filesystem is mounted read-write" + }, + "swapDevice": { + "type": "integer", + "description": "SwapDevice is swap device number" + }, + "rootDevice": { + "type": "integer", + "description": "RootDevice is root device number" + }, + "videoMode": { + "type": "string", + "description": "VideoMode is default video mode setting" + } + }, + "type": "object", + "required": [ + "name", + "architecture", + "version" + ], + "description": "LinuxKernel represents all captured data for a Linux kernel" + }, + "LinuxKernelModule": { + "properties": { + "name": { + "type": "string", + "description": "Name is module name" + }, + "version": { + "type": "string", + "description": "Version is module version string" + }, + "sourceVersion": { + "type": "string", + "description": "SourceVersion is the source code version identifier" + }, + "path": { + "type": "string", + "description": "Path is the filesystem path to the .ko kernel object file (absolute path)" + }, + "description": { + "type": "string", + "description": "Description is a human-readable module description" + }, + "author": { + "type": "string", + "description": "Author is module author name and email" + }, + "license": { + "type": "string", + "description": "License is module license (e.g. GPL, BSD) which must be compatible with kernel" + }, + "kernelVersion": { + "type": "string", + "description": "KernelVersion is kernel version this module was built for" + }, + "versionMagic": { + "type": "string", + "description": "VersionMagic is version magic string for compatibility checking (includes kernel version, SMP status, module loading capabilities like \"3.17.4-302.fc21.x86_64 SMP mod_unload modversions\"). Module will NOT load if vermagic doesn't match running kernel." + }, + "parameters": { + "patternProperties": { + ".*": { + "$ref": "#/$defs/LinuxKernelModuleParameter" + } + }, + "type": "object", + "description": "Parameters are the module parameters that can be configured at load time (user-settable values like module options)" + } + }, + "type": "object", + "description": "LinuxKernelModule represents a loadable kernel module (.ko file) with its metadata, parameters, and dependencies." + }, + "LinuxKernelModuleParameter": { + "properties": { + "type": { + "type": "string", + "description": "Type is parameter data type (e.g. int, string, bool, array types)" + }, + "description": { + "type": "string", + "description": "Description is a human-readable parameter description explaining what the parameter controls" + } + }, + "type": "object", + "description": "LinuxKernelModuleParameter represents a configurable parameter for a kernel module with its type and description." + }, + "LinuxRelease": { + "properties": { + "prettyName": { + "type": "string", + "description": "PrettyName is a human-readable operating system name with version." + }, + "name": { + "type": "string", + "description": "Name is the operating system name without version information." + }, + "id": { + "type": "string", + "description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")." + }, + "idLike": { + "$ref": "#/$defs/IDLikes", + "description": "IDLike is a list of operating system IDs this distribution is similar to or derived from." + }, + "version": { + "type": "string", + "description": "Version is the operating system version including codename if available." + }, + "versionID": { + "type": "string", + "description": "VersionID is the operating system version number or identifier." + }, + "versionCodename": { + "type": "string", + "description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")." + }, + "buildID": { + "type": "string", + "description": "BuildID is a build identifier for the operating system." + }, + "imageID": { + "type": "string", + "description": "ImageID is an identifier for container or cloud images." + }, + "imageVersion": { + "type": "string", + "description": "ImageVersion is the version for container or cloud images." + }, + "variant": { + "type": "string", + "description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")." + }, + "variantID": { + "type": "string", + "description": "VariantID is the lower-case operating system variant identifier." + }, + "homeURL": { + "type": "string", + "description": "HomeURL is the homepage URL for the operating system." + }, + "supportURL": { + "type": "string", + "description": "SupportURL is the support or help URL for the operating system." + }, + "bugReportURL": { + "type": "string", + "description": "BugReportURL is the bug reporting URL for the operating system." + }, + "privacyPolicyURL": { + "type": "string", + "description": "PrivacyPolicyURL is the privacy policy URL for the operating system." + }, + "cpeName": { + "type": "string", + "description": "CPEName is the Common Platform Enumeration name for the operating system." + }, + "supportEnd": { + "type": "string", + "description": "SupportEnd is the end of support date or version identifier." + }, + "extendedSupport": { + "type": "boolean", + "description": "ExtendedSupport indicates whether extended security or support is available." + } + }, + "type": "object", + "description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files." + }, + "Location": { + "properties": { + "path": { + "type": "string", + "description": "RealPath is the canonical absolute form of the path accessed (all symbolic links have been followed and relative path components like '.' and '..' have been removed)." + }, + "layerID": { + "type": "string", + "description": "FileSystemID is an ID representing and entire filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank." + }, + "accessPath": { + "type": "string", + "description": "AccessPath is the path used to retrieve file contents (which may or may not have hardlinks / symlinks in the path)" + }, + "annotations": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object", + "required": [ + "path", + "accessPath" + ], + "description": "Location represents a path relative to a particular filesystem resolved to a specific file.Reference." + }, + "LuarocksPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the .rockspec file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the .rockspec file" + }, + "license": { + "type": "string", + "description": "License is license identifier" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "url": { + "type": "string", + "description": "URL is the source download URL" + }, + "dependencies": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Dependencies are the map of dependency names to version constraints" + } + }, + "type": "object", + "required": [ + "name", + "version", + "license", + "homepage", + "description", + "url", + "dependencies" + ], + "description": "LuaRocksPackage represents a Lua package managed by the LuaRocks package manager with metadata from .rockspec files." + }, + "MicrosoftKbPatch": { + "properties": { + "product_id": { + "type": "string", + "description": "ProductID is MSRC Product ID (e.g. \"Windows 10 Version 1703 for 32-bit Systems\")" + }, + "kb": { + "type": "string", + "description": "Kb is Knowledge Base article number (e.g. \"5001028\")" + } + }, + "type": "object", + "required": [ + "product_id", + "kb" + ], + "description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)." + }, + "NixDerivation": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the .drv file in Nix store" + }, + "system": { + "type": "string", + "description": "System is target system string indicating where derivation can be built (e.g. \"x86_64-linux\", \"aarch64-darwin\"). Must match current system for local builds." + }, + "inputDerivations": { + "items": { + "$ref": "#/$defs/NixDerivationReference" + }, + "type": "array", + "description": "InputDerivations are the list of other derivations that were inputs to this build (dependencies)" + }, + "inputSources": { + "items": { + "type": "string" + }, + "type": "array", + "description": "InputSources are the list of source file paths that were inputs to this build" + } + }, + "type": "object", + "description": "NixDerivation represents a Nix .drv file that describes how to build a package including inputs, outputs, and build instructions." + }, + "NixDerivationReference": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the referenced .drv file" + }, + "outputs": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Outputs are which outputs of the referenced derivation were used (e.g. [\"out\"], [\"bin\", \"dev\"])" + } + }, + "type": "object", + "description": "NixDerivationReference represents a reference to another derivation used as a build input or runtime dependency." + }, + "NixStoreEntry": { + "properties": { + "path": { + "type": "string", + "description": "Path is full store path for this output (e.g. /nix/store/abc123...-package-1.0)" + }, + "output": { + "type": "string", + "description": "Output is the specific output name for multi-output packages (empty string for default \"out\" output, can be \"bin\", \"dev\", \"doc\", etc.)" + }, + "outputHash": { + "type": "string", + "description": "OutputHash is hash prefix of the store path basename (first part before the dash)" + }, + "derivation": { + "$ref": "#/$defs/NixDerivation", + "description": "Derivation is information about the .drv file that describes how this package was built" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files are the list of files under the nix/store path for this package" + } + }, + "type": "object", + "required": [ + "outputHash" + ], + "description": "NixStoreEntry represents a package in the Nix store (/nix/store) with its derivation information and metadata." + }, + "OpamPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the .opam file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the .opam file" + }, + "licenses": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Licenses are the list of applicable licenses" + }, + "url": { + "type": "string", + "description": "URL is download URL for the package source" + }, + "checksum": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Checksums are the list of checksums for verification" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the list of required dependencies" + } + }, + "type": "object", + "required": [ + "name", + "version", + "licenses", + "url", + "checksum", + "homepage", + "dependencies" + ], + "description": "OpamPackage represents an OCaml package managed by the OPAM package manager with metadata from .opam files." + }, + "Package": { + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "type": { + "type": "string" + }, + "foundBy": { + "type": "string" + }, + "locations": { + "items": { + "$ref": "#/$defs/Location" + }, + "type": "array" + }, + "licenses": { + "$ref": "#/$defs/licenses" + }, + "language": { + "type": "string" + }, + "cpes": { + "$ref": "#/$defs/cpes" + }, + "purl": { + "type": "string" + }, + "metadataType": { + "type": "string" + }, + "metadata": { + "anyOf": [ + { + "type": "null" + }, + { + "$ref": "#/$defs/AlpmDbEntry" + }, + { + "$ref": "#/$defs/ApkDbEntry" + }, + { + "$ref": "#/$defs/BinarySignature" + }, + { + "$ref": "#/$defs/BitnamiSbomEntry" + }, + { + "$ref": "#/$defs/CConanFileEntry" + }, + { + "$ref": "#/$defs/CConanInfoEntry" + }, + { + "$ref": "#/$defs/CConanLockEntry" + }, + { + "$ref": "#/$defs/CConanLockV2Entry" + }, + { + "$ref": "#/$defs/CocoaPodfileLockEntry" + }, + { + "$ref": "#/$defs/CondaMetadataEntry" + }, + { + "$ref": "#/$defs/DartPubspec" + }, + { + "$ref": "#/$defs/DartPubspecLockEntry" + }, + { + "$ref": "#/$defs/DotnetDepsEntry" + }, + { + "$ref": "#/$defs/DotnetPackagesLockEntry" + }, + { + "$ref": "#/$defs/DotnetPortableExecutableEntry" + }, + { + "$ref": "#/$defs/DpkgArchiveEntry" + }, + { + "$ref": "#/$defs/DpkgDbEntry" + }, + { + "$ref": "#/$defs/ElfBinaryPackageNoteJsonPayload" + }, + { + "$ref": "#/$defs/ElixirMixLockEntry" + }, + { + "$ref": "#/$defs/ErlangRebarLockEntry" + }, + { + "$ref": "#/$defs/GgufFileHeader" + }, + { + "$ref": "#/$defs/GithubActionsUseStatement" + }, + { + "$ref": "#/$defs/GoModuleBuildinfoEntry" + }, + { + "$ref": "#/$defs/GoModuleEntry" + }, + { + "$ref": "#/$defs/GoSourceEntry" + }, + { + "$ref": "#/$defs/HaskellHackageStackEntry" + }, + { + "$ref": "#/$defs/HaskellHackageStackLockEntry" + }, + { + "$ref": "#/$defs/HomebrewFormula" + }, + { + "$ref": "#/$defs/JavaArchive" + }, + { + "$ref": "#/$defs/JavaJvmInstallation" + }, + { + "$ref": "#/$defs/JavascriptNpmPackage" + }, + { + "$ref": "#/$defs/JavascriptNpmPackageLockEntry" + }, + { + "$ref": "#/$defs/JavascriptPnpmLockEntry" + }, + { + "$ref": "#/$defs/JavascriptYarnLockEntry" + }, + { + "$ref": "#/$defs/LinuxKernelArchive" + }, + { + "$ref": "#/$defs/LinuxKernelModule" + }, + { + "$ref": "#/$defs/LuarocksPackage" + }, + { + "$ref": "#/$defs/MicrosoftKbPatch" + }, + { + "$ref": "#/$defs/NixStoreEntry" + }, + { + "$ref": "#/$defs/OpamPackage" + }, + { + "$ref": "#/$defs/PeBinary" + }, + { + "$ref": "#/$defs/PhpComposerInstalledEntry" + }, + { + "$ref": "#/$defs/PhpComposerLockEntry" + }, + { + "$ref": "#/$defs/PhpPearEntry" + }, + { + "$ref": "#/$defs/PhpPeclEntry" + }, + { + "$ref": "#/$defs/PortageDbEntry" + }, + { + "$ref": "#/$defs/PythonPackage" + }, + { + "$ref": "#/$defs/PythonPdmLockEntry" + }, + { + "$ref": "#/$defs/PythonPipRequirementsEntry" + }, + { + "$ref": "#/$defs/PythonPipfileLockEntry" + }, + { + "$ref": "#/$defs/PythonPoetryLockEntry" + }, + { + "$ref": "#/$defs/PythonUvLockEntry" + }, + { + "$ref": "#/$defs/RDescription" + }, + { + "$ref": "#/$defs/RpmArchive" + }, + { + "$ref": "#/$defs/RpmDbEntry" + }, + { + "$ref": "#/$defs/RubyGemspec" + }, + { + "$ref": "#/$defs/RustCargoAuditEntry" + }, + { + "$ref": "#/$defs/RustCargoLockEntry" + }, + { + "$ref": "#/$defs/SnapEntry" + }, + { + "$ref": "#/$defs/SwiftPackageManagerLockEntry" + }, + { + "$ref": "#/$defs/SwiplpackPackage" + }, + { + "$ref": "#/$defs/TerraformLockProviderEntry" + }, + { + "$ref": "#/$defs/WordpressPluginEntry" + } + ] + } + }, + "type": "object", + "required": [ + "id", + "name", + "version", + "type", + "foundBy", + "locations", + "licenses", + "language", + "cpes", + "purl" + ], + "description": "Package represents a pkg.Package object specialized for JSON marshaling and unmarshalling." + }, + "PeBinary": { + "properties": { + "VersionResources": { + "$ref": "#/$defs/KeyValues", + "description": "VersionResources contains key-value pairs extracted from the PE file's version resource section (e.g., FileVersion, ProductName, CompanyName)." + } + }, + "type": "object", + "required": [ + "VersionResources" + ], + "description": "PEBinary represents metadata captured from a Portable Executable formatted binary (dll, exe, etc.)" + }, + "PhpComposerAuthors": { + "properties": { + "name": { + "type": "string", + "description": "Name is author's full name" + }, + "email": { + "type": "string", + "description": "Email is author's email address" + }, + "homepage": { + "type": "string", + "description": "Homepage is author's personal or company website" + } + }, + "type": "object", + "required": [ + "name" + ], + "description": "PhpComposerAuthors represents author information for a PHP Composer package from the authors field in composer.json." + }, + "PhpComposerExternalReference": { + "properties": { + "type": { + "type": "string", + "description": "Type is reference type (git for source VCS, zip/tar for dist archives)" + }, + "url": { + "type": "string", + "description": "URL is the URL to the resource (git repository URL or archive download URL)" + }, + "reference": { + "type": "string", + "description": "Reference is git commit hash or version tag for source, or archive version for dist" + }, + "shasum": { + "type": "string", + "description": "Shasum is SHA hash of the archive file for integrity verification (dist only)" + } + }, + "type": "object", + "required": [ + "type", + "url", + "reference" + ], + "description": "PhpComposerExternalReference represents source or distribution information for a PHP package, indicating where the package code is retrieved from." + }, + "PhpComposerInstalledEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is package name in vendor/package format (e.g. symfony/console)" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "source": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Source is the source repository information for development (typically git repo, used when passing --prefer-source). Originates from source code repository." + }, + "dist": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Dist is distribution archive information for production (typically zip/tar, default install method). Packaged version of released code." + }, + "require": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Require is runtime dependencies with version constraints (package will not install unless these requirements can be met)" + }, + "provide": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Provide is virtual packages/functionality provided by this package (allows other packages to depend on capabilities)" + }, + "require-dev": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "RequireDev is development-only dependencies (not installed in production, only when developing this package or running tests)" + }, + "suggest": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Suggest is optional but recommended dependencies (suggestions for packages that would extend functionality)" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of license identifiers (SPDX format)" + }, + "type": { + "type": "string", + "description": "Type is package type indicating purpose (library=reusable code, project=application, metapackage=aggregates dependencies, etc.)" + }, + "notification-url": { + "type": "string", + "description": "NotificationURL is the URL to notify when package is installed (for tracking/statistics)" + }, + "bin": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Bin is the list of binary/executable files that should be added to PATH" + }, + "authors": { + "items": { + "$ref": "#/$defs/PhpComposerAuthors" + }, + "type": "array", + "description": "Authors are the list of package authors with name/email/homepage" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "keywords": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Keywords are the list of keywords for package discovery/search" + }, + "time": { + "type": "string", + "description": "Time is timestamp when this package version was released" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "dist" + ], + "description": "PhpComposerInstalledEntry represents a single package entry from a composer v1/v2 \"installed.json\" files (very similar to composer.lock files)." + }, + "PhpComposerLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is package name in vendor/package format (e.g. symfony/console)" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "source": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Source is the source repository information for development (typically git repo, used when passing --prefer-source). Originates from source code repository." + }, + "dist": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Dist is distribution archive information for production (typically zip/tar, default install method). Packaged version of released code." + }, + "require": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Require is runtime dependencies with version constraints (package will not install unless these requirements can be met)" + }, + "provide": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Provide is virtual packages/functionality provided by this package (allows other packages to depend on capabilities)" + }, + "require-dev": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "RequireDev is development-only dependencies (not installed in production, only when developing this package or running tests)" + }, + "suggest": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object", + "description": "Suggest is optional but recommended dependencies (suggestions for packages that would extend functionality)" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of license identifiers (SPDX format)" + }, + "type": { + "type": "string", + "description": "Type is package type indicating purpose (library=reusable code, project=application, metapackage=aggregates dependencies, etc.)" + }, + "notification-url": { + "type": "string", + "description": "NotificationURL is the URL to notify when package is installed (for tracking/statistics)" + }, + "bin": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Bin is the list of binary/executable files that should be added to PATH" + }, + "authors": { + "items": { + "$ref": "#/$defs/PhpComposerAuthors" + }, + "type": "array", + "description": "Authors are the list of package authors with name/email/homepage" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "keywords": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Keywords are the list of keywords for package discovery/search" + }, + "time": { + "type": "string", + "description": "Time is timestamp when this package version was released" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "dist" + ], + "description": "PhpComposerLockEntry represents a single package entry found from a composer.lock file." + }, + "PhpPearEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name" + }, + "channel": { + "type": "string", + "description": "Channel is PEAR channel this package is from" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of applicable licenses" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "PhpPearEntry represents a single package entry found within php pear metadata files." + }, + "PhpPeclEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name" + }, + "channel": { + "type": "string", + "description": "Channel is PEAR channel this package is from" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of applicable licenses" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "PhpPeclEntry represents a single package entry found within php pecl metadata files." + }, + "PnpmLockResolution": { + "properties": { + "integrity": { + "type": "string", + "description": "Integrity is Subresource Integrity hash for verification (SRI format)" + } + }, + "type": "object", + "required": [ + "integrity" + ], + "description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification." + }, + "PortageDbEntry": { + "properties": { + "installedSize": { + "type": "integer", + "description": "InstalledSize is total size of installed files in bytes" + }, + "licenses": { + "type": "string", + "description": "Licenses is license string which may be an expression (e.g. \"GPL-2 OR Apache-2.0\")" + }, + "files": { + "items": { + "$ref": "#/$defs/PortageFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package (tracked in CONTENTS file)" + } + }, + "type": "object", + "required": [ + "installedSize", + "files" + ], + "description": "PortageEntry represents a single package entry in the portage DB flat-file store." + }, + "PortageFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest is file content hash (MD5 for regular files in CONTENTS format: \"obj filename md5hash mtime\")" + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "PortageFileRecord represents a single file attributed to a portage package." + }, + "PythonDirectURLOriginInfo": { + "properties": { + "url": { + "type": "string", + "description": "URL is the source URL from which the package was installed." + }, + "commitId": { + "type": "string", + "description": "CommitID is the VCS commit hash if installed from version control." + }, + "vcs": { + "type": "string", + "description": "VCS is the version control system type (e.g., \"git\", \"hg\")." + } + }, + "type": "object", + "required": [ + "url" + ], + "description": "PythonDirectURLOriginInfo represents installation source metadata from direct_url.json for packages installed from VCS or direct URLs." + }, + "PythonFileDigest": { + "properties": { + "algorithm": { + "type": "string", + "description": "Algorithm is the hash algorithm used (e.g., \"sha256\")." + }, + "value": { + "type": "string", + "description": "Value is the hex-encoded hash digest value." + } + }, + "type": "object", + "required": [ + "algorithm", + "value" + ], + "description": "PythonFileDigest represents the file metadata for a single file attributed to a python package." + }, + "PythonFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the installed file path from the RECORD file." + }, + "digest": { + "$ref": "#/$defs/PythonFileDigest", + "description": "Digest contains the hash algorithm and value for file integrity verification." + }, + "size": { + "type": "string", + "description": "Size is the file size in bytes as a string." + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package" + }, + "PythonPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name from the Name field in PKG-INFO or METADATA." + }, + "version": { + "type": "string", + "description": "Version is the package version from the Version field in PKG-INFO or METADATA." + }, + "author": { + "type": "string", + "description": "Author is the package author name from the Author field." + }, + "authorEmail": { + "type": "string", + "description": "AuthorEmail is the package author's email address from the Author-Email field." + }, + "platform": { + "type": "string", + "description": "Platform indicates the target platform for the package (e.g., \"any\", \"linux\", \"win32\")." + }, + "files": { + "items": { + "$ref": "#/$defs/PythonFileRecord" + }, + "type": "array", + "description": "Files are the installed files listed in the RECORD file for wheels or installed-files.txt for eggs." + }, + "sitePackagesRootPath": { + "type": "string", + "description": "SitePackagesRootPath is the root directory path containing the package (e.g., \"/usr/lib/python3.9/site-packages\")." + }, + "topLevelPackages": { + "items": { + "type": "string" + }, + "type": "array", + "description": "TopLevelPackages are the top-level Python module names from top_level.txt file." + }, + "directUrlOrigin": { + "$ref": "#/$defs/PythonDirectURLOriginInfo", + "description": "DirectURLOrigin contains VCS or direct URL installation information from direct_url.json." + }, + "requiresPython": { + "type": "string", + "description": "RequiresPython specifies the Python version requirement (e.g., \"\u003e=3.6\")." + }, + "requiresDist": { + "items": { + "type": "string" + }, + "type": "array", + "description": "RequiresDist lists the package dependencies with version specifiers from Requires-Dist fields." + }, + "providesExtra": { + "items": { + "type": "string" + }, + "type": "array", + "description": "ProvidesExtra lists optional feature names that can be installed via extras (e.g., \"dev\", \"test\")." + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "authorEmail", + "platform", + "sitePackagesRootPath" + ], + "description": "PythonPackage represents all captured data for a python egg or wheel package (specifically as outlined in the PyPA core metadata specification https://packaging.python.org/en/latest/specifications/core-metadata/)." + }, + "PythonPdmLockEntry": { + "properties": { + "summary": { + "type": "string", + "description": "Summary provides a description of the package" + }, + "files": { + "items": { + "$ref": "#/$defs/PythonFileRecord" + }, + "type": "array", + "description": "Files are the package files with their paths and hash digests" + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the dependency specifications, without environment qualifiers" + } + }, + "type": "object", + "required": [ + "summary", + "files", + "dependencies" + ], + "description": "PythonPdmLockEntry represents a single package entry within a pdm.lock file." + }, + "PythonPipRequirementsEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name from the requirements file." + }, + "extras": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Extras are the optional features to install from the package (e.g., package[dev,test])." + }, + "versionConstraint": { + "type": "string", + "description": "VersionConstraint specifies version requirements (e.g., \"\u003e=1.0,\u003c2.0\")." + }, + "url": { + "type": "string", + "description": "URL is the direct download URL or VCS URL if specified instead of a PyPI package." + }, + "markers": { + "type": "string", + "description": "Markers are environment marker expressions for conditional installation (e.g., \"python_version \u003e= '3.8'\")." + } + }, + "type": "object", + "required": [ + "name", + "versionConstraint" + ], + "description": "PythonRequirementsEntry represents a single entry within a [*-]requirements.txt file." + }, + "PythonPipfileLockEntry": { + "properties": { + "hashes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Hashes are the package file hash values in the format \"algorithm:digest\" for integrity verification." + }, + "index": { + "type": "string", + "description": "Index is the PyPI index name where the package should be fetched from." + } + }, + "type": "object", + "required": [ + "hashes", + "index" + ], + "description": "PythonPipfileLockEntry represents a single package entry within a Pipfile.lock file." + }, + "PythonPoetryLockDependencyEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the dependency package name." + }, + "version": { + "type": "string", + "description": "Version is the locked version or version constraint for the dependency." + }, + "optional": { + "type": "boolean", + "description": "Optional indicates whether this dependency is optional (only needed for certain extras)." + }, + "markers": { + "type": "string", + "description": "Markers are environment marker expressions that conditionally enable the dependency (e.g., \"python_version \u003e= '3.8'\")." + }, + "extras": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Extras are the optional feature names from the dependency that should be installed." + } + }, + "type": "object", + "required": [ + "name", + "version", + "optional" + ], + "description": "PythonPoetryLockDependencyEntry represents a single dependency entry within a Poetry lock file." + }, + "PythonPoetryLockEntry": { + "properties": { + "index": { + "type": "string", + "description": "Index is the package repository name where the package should be fetched from." + }, + "dependencies": { + "items": { + "$ref": "#/$defs/PythonPoetryLockDependencyEntry" + }, + "type": "array", + "description": "Dependencies are the package's runtime dependencies with version constraints." + }, + "extras": { + "items": { + "$ref": "#/$defs/PythonPoetryLockExtraEntry" + }, + "type": "array", + "description": "Extras are optional feature groups that include additional dependencies." + } + }, + "type": "object", + "required": [ + "index", + "dependencies" + ], + "description": "PythonPoetryLockEntry represents a single package entry within a Pipfile.lock file." + }, + "PythonPoetryLockExtraEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the optional feature name (e.g., \"dev\", \"test\")." + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the package names required when this extra is installed." + } + }, + "type": "object", + "required": [ + "name", + "dependencies" + ], + "description": "PythonPoetryLockExtraEntry represents an optional feature group in a Poetry lock file." + }, + "PythonUvLockDependencyEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the dependency package name." + }, + "optional": { + "type": "boolean", + "description": "Optional indicates whether this dependency is optional (only needed for certain extras)." + }, + "markers": { + "type": "string", + "description": "Markers are environment marker expressions that conditionally enable the dependency (e.g., \"python_version \u003e= '3.8'\")." + }, + "extras": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Extras are the optional feature names from the dependency that should be installed." + } + }, + "type": "object", + "required": [ + "name", + "optional" + ], + "description": "PythonUvLockDependencyEntry represents a single dependency entry within a uv lock file." + }, + "PythonUvLockEntry": { + "properties": { + "index": { + "type": "string", + "description": "Index is the package repository name where the package should be fetched from." + }, + "dependencies": { + "items": { + "$ref": "#/$defs/PythonUvLockDependencyEntry" + }, + "type": "array", + "description": "Dependencies are the package's runtime dependencies with version constraints." + }, + "extras": { + "items": { + "$ref": "#/$defs/PythonUvLockExtraEntry" + }, + "type": "array", + "description": "Extras are optional feature groups that include additional dependencies." + } + }, + "type": "object", + "required": [ + "index", + "dependencies" + ], + "description": "PythonUvLockEntry represents a single package entry within a uv.lock file." + }, + "PythonUvLockExtraEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the optional feature name (e.g., \"dev\", \"test\")." + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the package names required when this extra is installed." + } + }, + "type": "object", + "required": [ + "name", + "dependencies" + ], + "description": "PythonUvLockExtraEntry represents an optional feature group in a uv lock file." + }, + "RDescription": { + "properties": { + "title": { + "type": "string", + "description": "Title is short one-line package title" + }, + "description": { + "type": "string", + "description": "Description is detailed package description" + }, + "author": { + "type": "string", + "description": "Author is package author(s)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is current package maintainer" + }, + "url": { + "items": { + "type": "string" + }, + "type": "array", + "description": "URL is the list of related URLs" + }, + "repository": { + "type": "string", + "description": "Repository is CRAN or other repository name" + }, + "built": { + "type": "string", + "description": "Built is R version and platform this was built with" + }, + "needsCompilation": { + "type": "boolean", + "description": "NeedsCompilation is whether this package requires compilation" + }, + "imports": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Imports are the packages imported in the NAMESPACE" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the packages this package depends on" + }, + "suggests": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Suggests are the optional packages that extend functionality" + } + }, + "type": "object", + "description": "RDescription represents metadata from an R package DESCRIPTION file containing package information, dependencies, and author details." + }, + "Relationship": { + "properties": { + "parent": { + "type": "string", + "description": "Parent is the ID of the parent artifact in this relationship." + }, + "child": { + "type": "string", + "description": "Child is the ID of the child artifact in this relationship." + }, + "type": { + "type": "string", + "description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")." + }, + "metadata": { + "description": "Metadata contains additional relationship-specific metadata." + } + }, + "type": "object", + "required": [ + "parent", + "child", + "type" + ], + "description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package." + }, + "RpmArchive": { + "properties": { + "name": { + "type": "string", + "description": "Name is the RPM package name as found in the RPM database." + }, + "version": { + "type": "string", + "description": "Version is the upstream version of the package." + }, + "epoch": { + "oneOf": [ + { + "type": "integer", + "description": "Epoch is the version epoch used to force upgrade ordering (null if not set)." + }, + { + "type": "null" + } + ] + }, + "architecture": { + "type": "string", + "description": "Arch is the target CPU architecture (e.g., \"x86_64\", \"aarch64\", \"noarch\")." + }, + "release": { + "type": "string", + "description": "Release is the package release number or distribution-specific version suffix." + }, + "sourceRpm": { + "type": "string", + "description": "SourceRpm is the source RPM filename that was used to build this package." + }, + "signatures": { + "items": { + "$ref": "#/$defs/RpmSignature" + }, + "type": "array", + "description": "Signatures contains GPG signature metadata for package verification." + }, + "size": { + "type": "integer", + "description": "Size is the total installed size of the package in bytes." + }, + "vendor": { + "type": "string", + "description": "Vendor is the organization that packaged the software." + }, + "modularityLabel": { + "type": "string", + "description": "ModularityLabel identifies the module stream for modular RPM packages (e.g., \"nodejs:12:20200101\")." + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides lists the virtual packages and capabilities this package provides." + }, + "requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Requires lists the dependencies required by this package." + }, + "files": { + "items": { + "$ref": "#/$defs/RpmFileRecord" + }, + "type": "array", + "description": "Files are the file records for all files owned by this package." + } + }, + "type": "object", + "required": [ + "name", + "version", + "epoch", + "architecture", + "release", + "sourceRpm", + "size", + "vendor", + "files" + ], + "description": "RpmArchive represents package metadata extracted directly from a .rpm archive file, containing the same information as an RPM database entry." + }, + "RpmDbEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the RPM package name as found in the RPM database." + }, + "version": { + "type": "string", + "description": "Version is the upstream version of the package." + }, + "epoch": { + "oneOf": [ + { + "type": "integer", + "description": "Epoch is the version epoch used to force upgrade ordering (null if not set)." + }, + { + "type": "null" + } + ] + }, + "architecture": { + "type": "string", + "description": "Arch is the target CPU architecture (e.g., \"x86_64\", \"aarch64\", \"noarch\")." + }, + "release": { + "type": "string", + "description": "Release is the package release number or distribution-specific version suffix." + }, + "sourceRpm": { + "type": "string", + "description": "SourceRpm is the source RPM filename that was used to build this package." + }, + "signatures": { + "items": { + "$ref": "#/$defs/RpmSignature" + }, + "type": "array", + "description": "Signatures contains GPG signature metadata for package verification." + }, + "size": { + "type": "integer", + "description": "Size is the total installed size of the package in bytes." + }, + "vendor": { + "type": "string", + "description": "Vendor is the organization that packaged the software." + }, + "modularityLabel": { + "type": "string", + "description": "ModularityLabel identifies the module stream for modular RPM packages (e.g., \"nodejs:12:20200101\")." + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides lists the virtual packages and capabilities this package provides." + }, + "requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Requires lists the dependencies required by this package." + }, + "files": { + "items": { + "$ref": "#/$defs/RpmFileRecord" + }, + "type": "array", + "description": "Files are the file records for all files owned by this package." + } + }, + "type": "object", + "required": [ + "name", + "version", + "epoch", + "architecture", + "release", + "sourceRpm", + "size", + "vendor", + "files" + ], + "description": "RpmDBEntry represents all captured data from a RPM DB package entry." + }, + "RpmFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the absolute file path where the file is installed." + }, + "mode": { + "type": "integer", + "description": "Mode is the file permission mode bits following Unix stat.h conventions." + }, + "size": { + "type": "integer", + "description": "Size is the file size in bytes." + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest contains the hash algorithm and value for file integrity verification." + }, + "userName": { + "type": "string", + "description": "UserName is the owner username for the file." + }, + "groupName": { + "type": "string", + "description": "GroupName is the group name for the file." + }, + "flags": { + "type": "string", + "description": "Flags indicates the file type (e.g., \"%config\", \"%doc\", \"%ghost\")." + } + }, + "type": "object", + "required": [ + "path", + "mode", + "size", + "digest", + "userName", + "groupName", + "flags" + ], + "description": "RpmFileRecord represents the file metadata for a single file attributed to a RPM package." + }, + "RpmSignature": { + "properties": { + "algo": { + "type": "string", + "description": "PublicKeyAlgorithm is the public key algorithm used for signing (e.g., \"RSA\")." + }, + "hash": { + "type": "string", + "description": "HashAlgorithm is the hash algorithm used for the signature (e.g., \"SHA256\")." + }, + "created": { + "type": "string", + "description": "Created is the timestamp when the signature was created." + }, + "issuer": { + "type": "string", + "description": "IssuerKeyID is the GPG key ID that created the signature." + } + }, + "type": "object", + "required": [ + "algo", + "hash", + "created", + "issuer" + ], + "description": "RpmSignature represents a GPG signature for an RPM package used for authenticity verification." + }, + "RubyGemspec": { + "properties": { + "name": { + "type": "string", + "description": "Name is gem name as specified in the gemspec" + }, + "version": { + "type": "string", + "description": "Version is gem version as specified in the gemspec" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files is logical list of files in the gem (NOT directly usable as filesystem paths. Example: bundler gem lists \"lib/bundler/vendor/uri/lib/uri/ldap.rb\" but actual path is \"/usr/local/lib/ruby/3.2.0/bundler/vendor/uri/lib/uri/ldap.rb\". Would need gem installation path, ruby version, and env vars like GEM_HOME to resolve actual paths.)" + }, + "authors": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Authors are the list of gem authors (stored as array regardless of using `author` or `authors` method in gemspec)" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "RubyGemspec represents all metadata parsed from the *.gemspec file" + }, + "RustCargoAuditEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is crate name as specified in audit section of the build binary" + }, + "version": { + "type": "string", + "description": "Version is crate version as specified in audit section of the build binary" + }, + "source": { + "type": "string", + "description": "Source is the source registry or repository where this crate came from" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source" + ], + "description": "RustBinaryAuditEntry represents Rust crate metadata extracted from a compiled binary using cargo-auditable format." + }, + "RustCargoLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is crate name as specified in Cargo.toml" + }, + "version": { + "type": "string", + "description": "Version is crate version as specified in Cargo.toml" + }, + "source": { + "type": "string", + "description": "Source is the source registry or repository URL in format \"registry+https://github.com/rust-lang/crates.io-index\" for registry packages" + }, + "checksum": { + "type": "string", + "description": "Checksum is content checksum for registry packages only (hexadecimal string). Cargo doesn't require or include checksums for git dependencies. Used to detect MITM attacks by verifying downloaded crate matches lockfile checksum." + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the list of dependencies with version constraints" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "checksum", + "dependencies" + ], + "description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information." + }, + "Schema": { + "properties": { + "version": { + "type": "string", + "description": "Version is the JSON schema version for this document format." + }, + "url": { + "type": "string", + "description": "URL is the URL to the JSON schema definition document." + } + }, + "type": "object", + "required": [ + "version", + "url" + ], + "description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format." + }, + "SnapEntry": { + "properties": { + "snapType": { + "type": "string", + "description": "SnapType indicates the snap type (base, kernel, app, gadget, or snapd)." + }, + "base": { + "type": "string", + "description": "Base is the base snap name that this snap depends on (e.g., \"core20\", \"core22\")." + }, + "snapName": { + "type": "string", + "description": "SnapName is the snap package name." + }, + "snapVersion": { + "type": "string", + "description": "SnapVersion is the snap package version." + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture (e.g., \"amd64\", \"arm64\")." + } + }, + "type": "object", + "required": [ + "snapType", + "base", + "snapName", + "snapVersion", + "architecture" + ], + "description": "SnapEntry represents metadata for a Snap package extracted from snap.yaml or snapcraft.yaml files." + }, + "Source": { + "properties": { + "id": { + "type": "string", + "description": "ID is a unique identifier for the analyzed source artifact." + }, + "name": { + "type": "string", + "description": "Name is the name of the analyzed artifact (e.g., image name, directory path)." + }, + "version": { + "type": "string", + "description": "Version is the version of the analyzed artifact (e.g., image tag)." + }, + "supplier": { + "type": "string", + "description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance." + }, + "type": { + "type": "string", + "description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")." + }, + "metadata": { + "description": "Metadata contains additional source-specific metadata." + } + }, + "type": "object", + "required": [ + "id", + "name", + "version", + "type", + "metadata" + ], + "description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive." + }, + "SwiftPackageManagerLockEntry": { + "properties": { + "revision": { + "type": "string", + "description": "Revision is git commit hash of the resolved package" + } + }, + "type": "object", + "required": [ + "revision" + ], + "description": "SwiftPackageManagerResolvedEntry represents a resolved dependency from a Package.resolved file with its locked version and source location." + }, + "SwiplpackPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the .toml file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the .toml file" + }, + "author": { + "type": "string", + "description": "Author is author name" + }, + "authorEmail": { + "type": "string", + "description": "AuthorEmail is author email address" + }, + "packager": { + "type": "string", + "description": "Packager is packager name (if different from author)" + }, + "packagerEmail": { + "type": "string", + "description": "PackagerEmail is packager email address" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the list of required dependencies" + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "authorEmail", + "packager", + "packagerEmail", + "homepage", + "dependencies" + ], + "description": "SwiplPackEntry represents a SWI-Prolog package from the pack system with metadata about the package and its dependencies." + }, + "TerraformLockProviderEntry": { + "properties": { + "url": { + "type": "string", + "description": "URL is the provider source address (e.g., \"registry.terraform.io/hashicorp/aws\")." + }, + "constraints": { + "type": "string", + "description": "Constraints specifies the version constraints for the provider (e.g., \"~\u003e 4.0\")." + }, + "version": { + "type": "string", + "description": "Version is the locked provider version selected during terraform init." + }, + "hashes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Hashes are cryptographic checksums for the provider plugin archives across different platforms." + } + }, + "type": "object", + "required": [ + "url", + "constraints", + "version", + "hashes" + ], + "description": "TerraformLockProviderEntry represents a single provider entry in a Terraform dependency lock file (.terraform.lock.hcl)." + }, + "WordpressPluginEntry": { + "properties": { + "pluginInstallDirectory": { + "type": "string", + "description": "PluginInstallDirectory is directory name where the plugin is installed" + }, + "author": { + "type": "string", + "description": "Author is plugin author name" + }, + "authorUri": { + "type": "string", + "description": "AuthorURI is author's website URL" + } + }, + "type": "object", + "required": [ + "pluginInstallDirectory" + ], + "description": "WordpressPluginEntry represents all metadata parsed from the wordpress plugin file" + }, + "cpes": { + "items": { + "$ref": "#/$defs/CPE" + }, + "type": "array" + }, + "licenses": { + "items": { + "$ref": "#/$defs/License" + }, + "type": "array" + } + } +} diff --git a/schema/json/schema-latest.json b/schema/json/schema-latest.json index e27d96039..00a53ffcd 100644 --- a/schema/json/schema-latest.json +++ b/schema/json/schema-latest.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "anchore.io/schema/syft/json/16.0.42/document", + "$id": "anchore.io/schema/syft/json/16.0.43/document", "$ref": "#/$defs/Document", "$defs": { "AlpmDbEntry": { @@ -130,7 +130,8 @@ "description": "Digests contains file content hashes for integrity verification" } }, - "type": "object" + "type": "object", + "description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman." }, "ApkDbEntry": { "properties": { @@ -433,16 +434,19 @@ "CPE": { "properties": { "cpe": { - "type": "string" + "type": "string", + "description": "Value is the CPE string identifier." }, "source": { - "type": "string" + "type": "string", + "description": "Source is the source where this CPE was obtained or generated from." } }, "type": "object", "required": [ "cpe" - ] + ], + "description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases." }, "ClassifierMatch": { "properties": { @@ -747,19 +751,23 @@ "Descriptor": { "properties": { "name": { - "type": "string" + "type": "string", + "description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")." }, "version": { - "type": "string" + "type": "string", + "description": "Version is the version of the tool that generated this SBOM." }, - "configuration": true + "configuration": { + "description": "Configuration contains the tool configuration used during SBOM generation." + } }, "type": "object", "required": [ "name", "version" ], - "description": "Descriptor describes what created the document as well as surrounding metadata" + "description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation." }, "Digest": { "properties": { @@ -1285,58 +1293,71 @@ "File": { "properties": { "id": { - "type": "string" + "type": "string", + "description": "ID is a unique identifier for this file within the SBOM." }, "location": { - "$ref": "#/$defs/Coordinates" + "$ref": "#/$defs/Coordinates", + "description": "Location is the file path and layer information where this file was found." }, "metadata": { - "$ref": "#/$defs/FileMetadataEntry" + "$ref": "#/$defs/FileMetadataEntry", + "description": "Metadata contains filesystem metadata such as permissions, ownership, and file type." }, "contents": { - "type": "string" + "type": "string", + "description": "Contents is the file contents for small files." }, "digests": { "items": { "$ref": "#/$defs/Digest" }, - "type": "array" + "type": "array", + "description": "Digests contains cryptographic hashes of the file contents." }, "licenses": { "items": { "$ref": "#/$defs/FileLicense" }, - "type": "array" + "type": "array", + "description": "Licenses contains license information discovered within this file." }, "executable": { - "$ref": "#/$defs/Executable" + "$ref": "#/$defs/Executable", + "description": "Executable contains executable metadata if this file is a binary." }, "unknowns": { "items": { "type": "string" }, - "type": "array" + "type": "array", + "description": "Unknowns contains unknown fields for forward compatibility." } }, "type": "object", "required": [ "id", "location" - ] + ], + "description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages." }, "FileLicense": { "properties": { "value": { - "type": "string" + "type": "string", + "description": "Value is the raw license identifier or text as found in the file." }, "spdxExpression": { - "type": "string" + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." }, "evidence": { - "$ref": "#/$defs/FileLicenseEvidence" + "$ref": "#/$defs/FileLicenseEvidence", + "description": "Evidence contains supporting evidence for this license detection." } }, "type": "object", @@ -1344,18 +1365,22 @@ "value", "spdxExpression", "type" - ] + ], + "description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression." }, "FileLicenseEvidence": { "properties": { "confidence": { - "type": "integer" + "type": "integer", + "description": "Confidence is the confidence score for this license detection (0-100)." }, "offset": { - "type": "integer" + "type": "integer", + "description": "Offset is the byte offset where the license text starts in the file." }, "extent": { - "type": "integer" + "type": "integer", + "description": "Extent is the length of the license text in bytes." } }, "type": "object", @@ -1363,30 +1388,38 @@ "confidence", "offset", "extent" - ] + ], + "description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level." }, "FileMetadataEntry": { "properties": { "mode": { - "type": "integer" + "type": "integer", + "description": "Mode is the Unix file permission mode in octal format." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")." }, "linkDestination": { - "type": "string" + "type": "string", + "description": "LinkDestination is the target path for symbolic links." }, "userID": { - "type": "integer" + "type": "integer", + "description": "UserID is the file owner user ID." }, "groupID": { - "type": "integer" + "type": "integer", + "description": "GroupID is the file owner group ID." }, "mimeType": { - "type": "string" + "type": "string", + "description": "MIMEType is the MIME type of the file contents." }, "size": { - "type": "integer" + "type": "integer", + "description": "Size is the file size in bytes." } }, "type": "object", @@ -1397,7 +1430,50 @@ "groupID", "mimeType", "size" - ] + ], + "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file." + }, + "GgufFileHeader": { + "properties": { + "ggufVersion": { + "type": "integer", + "description": "GGUFVersion is the GGUF format version (e.g., 3)" + }, + "fileSize": { + "type": "integer", + "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)" + }, + "architecture": { + "type": "string", + "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")" + }, + "quantization": { + "type": "string", + "description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")" + }, + "parameters": { + "type": "integer", + "description": "Parameters is the number of model parameters (if present in header)" + }, + "tensorCount": { + "type": "integer", + "description": "TensorCount is the number of tensors in the model" + }, + "header": { + "type": "object", + "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication." + }, + "metadataHash": { + "type": "string", + "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames." + } + }, + "type": "object", + "required": [ + "ggufVersion", + "tensorCount" + ], + "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file." }, "GithubActionsUseStatement": { "properties": { @@ -1545,7 +1621,8 @@ "items": { "type": "string" }, - "type": "array" + "type": "array", + "description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field." }, "JavaArchive": { "properties": { @@ -1974,28 +2051,34 @@ "License": { "properties": { "value": { - "type": "string" + "type": "string", + "description": "Value is the raw license identifier or expression as found." }, "spdxExpression": { - "type": "string" + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." }, "urls": { "items": { "type": "string" }, - "type": "array" + "type": "array", + "description": "URLs are URLs where license text or information can be found." }, "locations": { "items": { "$ref": "#/$defs/Location" }, - "type": "array" + "type": "array", + "description": "Locations are file locations where this license was discovered." }, "contents": { - "type": "string" + "type": "string", + "description": "Contents is the full license text content." } }, "type": "object", @@ -2005,7 +2088,8 @@ "type", "urls", "locations" - ] + ], + "description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations." }, "LinuxKernelArchive": { "properties": { @@ -2130,64 +2214,84 @@ "LinuxRelease": { "properties": { "prettyName": { - "type": "string" + "type": "string", + "description": "PrettyName is a human-readable operating system name with version." }, "name": { - "type": "string" + "type": "string", + "description": "Name is the operating system name without version information." }, "id": { - "type": "string" + "type": "string", + "description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")." }, "idLike": { - "$ref": "#/$defs/IDLikes" + "$ref": "#/$defs/IDLikes", + "description": "IDLike is a list of operating system IDs this distribution is similar to or derived from." }, "version": { - "type": "string" + "type": "string", + "description": "Version is the operating system version including codename if available." }, "versionID": { - "type": "string" + "type": "string", + "description": "VersionID is the operating system version number or identifier." }, "versionCodename": { - "type": "string" + "type": "string", + "description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")." }, "buildID": { - "type": "string" + "type": "string", + "description": "BuildID is a build identifier for the operating system." }, "imageID": { - "type": "string" + "type": "string", + "description": "ImageID is an identifier for container or cloud images." }, "imageVersion": { - "type": "string" + "type": "string", + "description": "ImageVersion is the version for container or cloud images." }, "variant": { - "type": "string" + "type": "string", + "description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")." }, "variantID": { - "type": "string" + "type": "string", + "description": "VariantID is the lower-case operating system variant identifier." }, "homeURL": { - "type": "string" + "type": "string", + "description": "HomeURL is the homepage URL for the operating system." }, "supportURL": { - "type": "string" + "type": "string", + "description": "SupportURL is the support or help URL for the operating system." }, "bugReportURL": { - "type": "string" + "type": "string", + "description": "BugReportURL is the bug reporting URL for the operating system." }, "privacyPolicyURL": { - "type": "string" + "type": "string", + "description": "PrivacyPolicyURL is the privacy policy URL for the operating system." }, "cpeName": { - "type": "string" + "type": "string", + "description": "CPEName is the Common Platform Enumeration name for the operating system." }, "supportEnd": { - "type": "string" + "type": "string", + "description": "SupportEnd is the end of support date or version identifier." }, "extendedSupport": { - "type": "boolean" + "type": "boolean", + "description": "ExtendedSupport indicates whether extended security or support is available." } }, - "type": "object" + "type": "object", + "description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files." }, "Location": { "properties": { @@ -2283,7 +2387,7 @@ "product_id", "kb" ], - "description": "MicrosoftKbPatch is slightly odd in how it is expected to map onto data." + "description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)." }, "NixDerivation": { "properties": { @@ -2517,6 +2621,9 @@ { "$ref": "#/$defs/ErlangRebarLockEntry" }, + { + "$ref": "#/$defs/GgufFileHeader" + }, { "$ref": "#/$defs/GithubActionsUseStatement" }, @@ -3014,7 +3121,8 @@ "type": "object", "required": [ "integrity" - ] + ], + "description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification." }, "PortageDbEntry": { "properties": { @@ -3501,22 +3609,28 @@ "Relationship": { "properties": { "parent": { - "type": "string" + "type": "string", + "description": "Parent is the ID of the parent artifact in this relationship." }, "child": { - "type": "string" + "type": "string", + "description": "Child is the ID of the child artifact in this relationship." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")." }, - "metadata": true + "metadata": { + "description": "Metadata contains additional relationship-specific metadata." + } }, "type": "object", "required": [ "parent", "child", "type" - ] + ], + "description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package." }, "RpmArchive": { "properties": { @@ -3863,17 +3977,20 @@ "Schema": { "properties": { "version": { - "type": "string" + "type": "string", + "description": "Version is the JSON schema version for this document format." }, "url": { - "type": "string" + "type": "string", + "description": "URL is the URL to the JSON schema definition document." } }, "type": "object", "required": [ "version", "url" - ] + ], + "description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format." }, "SnapEntry": { "properties": { @@ -3911,21 +4028,28 @@ "Source": { "properties": { "id": { - "type": "string" + "type": "string", + "description": "ID is a unique identifier for the analyzed source artifact." }, "name": { - "type": "string" + "type": "string", + "description": "Name is the name of the analyzed artifact (e.g., image name, directory path)." }, "version": { - "type": "string" + "type": "string", + "description": "Version is the version of the analyzed artifact (e.g., image tag)." }, "supplier": { - "type": "string" + "type": "string", + "description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance." }, "type": { - "type": "string" + "type": "string", + "description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")." }, - "metadata": true + "metadata": { + "description": "Metadata contains additional source-specific metadata." + } }, "type": "object", "required": [ @@ -3935,7 +4059,7 @@ "type", "metadata" ], - "description": "Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill the NTIA minimum elements." + "description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive." }, "SwiftPackageManagerLockEntry": { "properties": { diff --git a/syft/format/cpes/decoder.go b/syft/format/cpes/decoder.go new file mode 100644 index 000000000..b5d3ab62e --- /dev/null +++ b/syft/format/cpes/decoder.go @@ -0,0 +1,95 @@ +package cpes + +import ( + "bufio" + "errors" + "fmt" + "io" + "strings" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/format/internal" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +const ID sbom.FormatID = "cpes" +const version = "1" + +var _ sbom.FormatDecoder = (*decoder)(nil) + +type decoder struct{} + +func NewFormatDecoder() sbom.FormatDecoder { + return decoder{} +} + +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + if r == nil { + return nil, "", "", fmt.Errorf("no reader provided") + } + s, err := toSyftModel(r) + return s, ID, version, err +} + +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + if r == nil { + return "", "" + } + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + // skip whitespace only lines + continue + } + + err := cpe.ValidateString(line) + if err != nil { + return "", "" + } + + return ID, version + } + + return "", "" +} + +func toSyftModel(r io.Reader) (*sbom.SBOM, error) { + var errs []error + pkgs := pkg.NewCollection() + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + if line == "" { + continue + } + + // skip invalid CPEs + c, err := cpe.New(line, "") + if err != nil { + log.WithFields("error", err, "line", line).Debug("unable to parse cpe") + continue + } + + p := pkg.Package{ + Name: c.Attributes.Product, + Version: c.Attributes.Version, + CPEs: []cpe.CPE{c}, + } + + internal.Backfill(&p) + p.SetID() + pkgs.Add(p) + } + + return &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkgs, + }, + }, errors.Join(errs...) +} diff --git a/syft/format/cpes/decoder_test.go b/syft/format/cpes/decoder_test.go new file mode 100644 index 000000000..6ad14b966 --- /dev/null +++ b/syft/format/cpes/decoder_test.go @@ -0,0 +1,171 @@ +package cpes + +import ( + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +func Test_CPEProvider(t *testing.T) { + tests := []struct { + name string + userInput string + sbom *sbom.SBOM + }{ + { + name: "takes a single cpe", + userInput: "cpe:/a:apache:log4j:2.14.1", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "log4j", + Version: "2.14.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:/a:apache:log4j:2.14.1", ""), + }, + }), + }, + }, + }, + { + name: "takes multiple cpes", + userInput: `cpe:/a:apache:log4j:2.14.1 + cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*; + cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*; + cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;`, + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection( + pkg.Package{ + Name: "log4j", + Version: "2.14.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:/a:apache:log4j:2.14.1", ""), + }, + }, + pkg.Package{ + Name: "nginx", + Version: "", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;", ""), + }, + }, + pkg.Package{ + Name: "nginx", + Version: "0.5.2", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;", ""), + }, + }, + pkg.Package{ + Name: "nginx", + Version: "0.5.3", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;", ""), + }, + }, + ), + }, + }, + }, + { + name: "takes cpe with no version", + userInput: "cpe:/a:apache:log4j", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "log4j", + CPEs: []cpe.CPE{ + cpe.Must("cpe:/a:apache:log4j", ""), + }, + }), + }, + }, + }, + { + name: "takes CPE 2.3 format", + userInput: "cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "log4j", + Version: "2.14.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*", ""), + }, + }), + }, + }, + }, + { + name: "deduces target SW from CPE - known target_sw", + userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "opensearch", + Type: pkg.GemPkg, + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""), + }, + }), + }, + }, + }, + { + name: "handles unknown target_sw CPE field", + userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "opensearch", + Type: "", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*", ""), + }, + }), + }, + }, + }, + { + name: "invalid prefix", + userInput: "dir:test-fixtures/cpe", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(), + }, + }, + }, + } + + syftPkgOpts := []cmp.Option{ + cmpopts.IgnoreFields(pkg.Package{}, "id", "Language"), + cmpopts.IgnoreUnexported(pkg.Package{}, file.LocationSet{}, pkg.LicenseSet{}), + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + dec := NewFormatDecoder() + + decodedSBOM, _, _, err := dec.Decode(strings.NewReader(tc.userInput)) + require.NoError(t, err) + + gotSyftPkgs := decodedSBOM.Artifacts.Packages.Sorted() + wantSyftPkgs := tc.sbom.Artifacts.Packages.Sorted() + require.Equal(t, len(gotSyftPkgs), len(wantSyftPkgs)) + for idx, wantPkg := range wantSyftPkgs { + if d := cmp.Diff(wantPkg, gotSyftPkgs[idx], syftPkgOpts...); d != "" { + t.Errorf("unexpected Syft Package (-want +got):\n%s", d) + } + } + }) + } +} diff --git a/syft/format/decoders.go b/syft/format/decoders.go index 6ca1f94a2..48dba03e9 100644 --- a/syft/format/decoders.go +++ b/syft/format/decoders.go @@ -3,6 +3,7 @@ package format import ( "io" + "github.com/anchore/syft/syft/format/cpes" "github.com/anchore/syft/syft/format/cyclonedxjson" "github.com/anchore/syft/syft/format/cyclonedxxml" "github.com/anchore/syft/syft/format/purls" @@ -26,6 +27,7 @@ func Decoders() []sbom.FormatDecoder { spdxtagvalue.NewFormatDecoder(), spdxjson.NewFormatDecoder(), purls.NewFormatDecoder(), + cpes.NewFormatDecoder(), } } diff --git a/syft/format/github/internal/model/model.go b/syft/format/github/internal/model/model.go index 69d2b9876..b2aa0d23a 100644 --- a/syft/format/github/internal/model/model.go +++ b/syft/format/github/internal/model/model.go @@ -1,11 +1,13 @@ package model import ( + "context" "fmt" "strings" "time" - "github.com/anchore/archiver/v3" + "github.com/mholt/archives" + "github.com/anchore/packageurl-go" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" @@ -153,8 +155,8 @@ func trimRelative(s string) string { // isArchive returns true if the path appears to be an archive func isArchive(path string) bool { - _, err := archiver.ByExtension(path) - return err == nil + format, _, err := archives.Identify(context.Background(), path, nil) + return err == nil && format != nil } func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { diff --git a/syft/format/internal/backfill.go b/syft/format/internal/backfill.go index 6e5544b95..230aac18a 100644 --- a/syft/format/internal/backfill.go +++ b/syft/format/internal/backfill.go @@ -10,13 +10,31 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/pkg" + cataloger "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" ) // Backfill takes all information present in the package and attempts to fill in any missing information -// from any available sources, such as the Metadata and PURL. +// from any available sources, such as the Metadata, PURL, or CPEs. // // Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date func Backfill(p *pkg.Package) { + backfillFromPurl(p) + backfillFromCPE(p) +} + +func backfillFromCPE(p *pkg.Package) { + if len(p.CPEs) == 0 { + return + } + + c := p.CPEs[0] + + if p.Type == "" { + p.Type = cataloger.TargetSoftwareToPackageType(c.Attributes.TargetSW) + } +} + +func backfillFromPurl(p *pkg.Package) { if p.PURL == "" { return } diff --git a/syft/format/internal/backfill_test.go b/syft/format/internal/backfill_test.go index 7e396e2b5..79918d6e9 100644 --- a/syft/format/internal/backfill_test.go +++ b/syft/format/internal/backfill_test.go @@ -121,6 +121,20 @@ func Test_Backfill(t *testing.T) { Metadata: pkg.JavaArchive{}, }, }, + { + name: "target-sw from CPE", + in: pkg.Package{ + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""), + }, + }, + expected: pkg.Package{ + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""), + }, + Type: pkg.GemPkg, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/syft/format/internal/cyclonedxutil/helpers/component.go b/syft/format/internal/cyclonedxutil/helpers/component.go index a8ce96686..4a681db3e 100644 --- a/syft/format/internal/cyclonedxutil/helpers/component.go +++ b/syft/format/internal/cyclonedxutil/helpers/component.go @@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi } componentType := cyclonedx.ComponentTypeLibrary - if p.Type == pkg.BinaryPkg { + switch p.Type { + case pkg.BinaryPkg: componentType = cyclonedx.ComponentTypeApplication + case pkg.ModelPkg: + componentType = cyclonedx.ComponentTypeMachineLearningModel } return cyclonedx.Component{ diff --git a/syft/format/internal/cyclonedxutil/helpers/decoder.go b/syft/format/internal/cyclonedxutil/helpers/decoder.go index 3dca5d5fc..43470494b 100644 --- a/syft/format/internal/cyclonedxutil/helpers/decoder.go +++ b/syft/format/internal/cyclonedxutil/helpers/decoder.go @@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str switch component.Type { case cyclonedx.ComponentTypeOS: case cyclonedx.ComponentTypeContainer: - case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary: + case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel: p := decodeComponent(component) idMap[component.BOMRef] = p if component.BOMRef != "" { diff --git a/syft/format/internal/spdxutil/helpers/originator_supplier_test.go b/syft/format/internal/spdxutil/helpers/originator_supplier_test.go index d8950f4ac..ed32427a0 100644 --- a/syft/format/internal/spdxutil/helpers/originator_supplier_test.go +++ b/syft/format/internal/spdxutil/helpers/originator_supplier_test.go @@ -55,6 +55,7 @@ func Test_OriginatorSupplier(t *testing.T) { pkg.OpamPackage{}, pkg.YarnLockEntry{}, pkg.TerraformLockProviderEntry{}, + pkg.GGUFFileHeader{}, ) tests := []struct { name string diff --git a/syft/format/internal/spdxutil/helpers/source_info.go b/syft/format/internal/spdxutil/helpers/source_info.go index 5d36a600d..6729e6d49 100644 --- a/syft/format/internal/spdxutil/helpers/source_info.go +++ b/syft/format/internal/spdxutil/helpers/source_info.go @@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string { answer = "acquired package info from Homebrew formula" case pkg.TerraformPkg: answer = "acquired package info from Terraform dependency lock file" + case pkg.ModelPkg: + answer = "acquired package info from AI artifact (e.g. GGUF File" default: answer = "acquired package info from the following paths" } diff --git a/syft/format/internal/spdxutil/helpers/source_info_test.go b/syft/format/internal/spdxutil/helpers/source_info_test.go index 2502dfe8c..13338f8c4 100644 --- a/syft/format/internal/spdxutil/helpers/source_info_test.go +++ b/syft/format/internal/spdxutil/helpers/source_info_test.go @@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) { "acquired package info from Terraform dependency lock file", }, }, + { + input: pkg.Package{ + Type: pkg.ModelPkg, + }, + expected: []string{ + "", + }, + }, } var pkgTypes []pkg.Type for _, test := range tests { diff --git a/syft/format/syftjson/model/document.go b/syft/format/syftjson/model/document.go index c1cb0e381..5bfbcdf06 100644 --- a/syft/format/syftjson/model/document.go +++ b/syft/format/syftjson/model/document.go @@ -35,14 +35,23 @@ func (d *Document) UnmarshalJSON(data []byte) error { return nil } -// Descriptor describes what created the document as well as surrounding metadata +// Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation. type Descriptor struct { - Name string `json:"name"` - Version string `json:"version"` + // Name is the name of the tool that generated this SBOM (e.g., "syft"). + Name string `json:"name"` + + // Version is the version of the tool that generated this SBOM. + Version string `json:"version"` + + // Configuration contains the tool configuration used during SBOM generation. Configuration interface{} `json:"configuration,omitempty"` } +// Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format. type Schema struct { + // Version is the JSON schema version for this document format. Version string `json:"version"` - URL string `json:"url"` + + // URL is the URL to the JSON schema definition document. + URL string `json:"url"` } diff --git a/syft/format/syftjson/model/file.go b/syft/format/syftjson/model/file.go index 87b4754e4..cfe89e71e 100644 --- a/syft/format/syftjson/model/file.go +++ b/syft/format/syftjson/model/file.go @@ -10,25 +10,55 @@ import ( "github.com/anchore/syft/syft/license" ) +// File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages. type File struct { - ID string `json:"id"` - Location file.Coordinates `json:"location"` - Metadata *FileMetadataEntry `json:"metadata,omitempty"` - Contents string `json:"contents,omitempty"` - Digests []file.Digest `json:"digests,omitempty"` - Licenses []FileLicense `json:"licenses,omitempty"` - Executable *file.Executable `json:"executable,omitempty"` - Unknowns []string `json:"unknowns,omitempty"` + // ID is a unique identifier for this file within the SBOM. + ID string `json:"id"` + + // Location is the file path and layer information where this file was found. + Location file.Coordinates `json:"location"` + + // Metadata contains filesystem metadata such as permissions, ownership, and file type. + Metadata *FileMetadataEntry `json:"metadata,omitempty"` + + // Contents is the file contents for small files. + Contents string `json:"contents,omitempty"` + + // Digests contains cryptographic hashes of the file contents. + Digests []file.Digest `json:"digests,omitempty"` + + // Licenses contains license information discovered within this file. + Licenses []FileLicense `json:"licenses,omitempty"` + + // Executable contains executable metadata if this file is a binary. + Executable *file.Executable `json:"executable,omitempty"` + + // Unknowns contains unknown fields for forward compatibility. + Unknowns []string `json:"unknowns,omitempty"` } +// FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file. type FileMetadataEntry struct { - Mode int `json:"mode"` - Type string `json:"type"` + // Mode is the Unix file permission mode in octal format. + Mode int `json:"mode"` + + // Type is the file type (e.g., "RegularFile", "Directory", "SymbolicLink"). + Type string `json:"type"` + + // LinkDestination is the target path for symbolic links. LinkDestination string `json:"linkDestination,omitempty"` - UserID int `json:"userID"` - GroupID int `json:"groupID"` - MIMEType string `json:"mimeType"` - Size int64 `json:"size"` + + // UserID is the file owner user ID. + UserID int `json:"userID"` + + // GroupID is the file owner group ID. + GroupID int `json:"groupID"` + + // MIMEType is the MIME type of the file contents. + MIMEType string `json:"mimeType"` + + // Size is the file size in bytes. + Size int64 `json:"size"` } type auxFileMetadataEntry FileMetadataEntry @@ -82,17 +112,31 @@ type sbomImportLegacyFileMetadataEntry struct { Size int64 `json:"Size"` } +// FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression. type FileLicense struct { - Value string `json:"value"` - SPDXExpression string `json:"spdxExpression"` - Type license.Type `json:"type"` - Evidence *FileLicenseEvidence `json:"evidence,omitempty"` + // Value is the raw license identifier or text as found in the file. + Value string `json:"value"` + + // SPDXExpression is the parsed SPDX license expression. + SPDXExpression string `json:"spdxExpression"` + + // Type is the license type classification (e.g., declared, concluded, discovered). + Type license.Type `json:"type"` + + // Evidence contains supporting evidence for this license detection. + Evidence *FileLicenseEvidence `json:"evidence,omitempty"` } +// FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level. type FileLicenseEvidence struct { + // Confidence is the confidence score for this license detection (0-100). Confidence int `json:"confidence"` - Offset int `json:"offset"` - Extent int `json:"extent"` + + // Offset is the byte offset where the license text starts in the file. + Offset int `json:"offset"` + + // Extent is the length of the license text in bytes. + Extent int `json:"extent"` } type intOrStringFileType struct { diff --git a/syft/format/syftjson/model/linux_release.go b/syft/format/syftjson/model/linux_release.go index a544c250c..6482890fb 100644 --- a/syft/format/syftjson/model/linux_release.go +++ b/syft/format/syftjson/model/linux_release.go @@ -4,28 +4,67 @@ import ( "encoding/json" ) +// IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field. type IDLikes []string +// LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files. type LinuxRelease struct { - PrettyName string `json:"prettyName,omitempty"` - Name string `json:"name,omitempty"` - ID string `json:"id,omitempty"` - IDLike IDLikes `json:"idLike,omitempty"` - Version string `json:"version,omitempty"` - VersionID string `json:"versionID,omitempty"` - VersionCodename string `json:"versionCodename,omitempty"` - BuildID string `json:"buildID,omitempty"` - ImageID string `json:"imageID,omitempty"` - ImageVersion string `json:"imageVersion,omitempty"` - Variant string `json:"variant,omitempty"` - VariantID string `json:"variantID,omitempty"` - HomeURL string `json:"homeURL,omitempty"` - SupportURL string `json:"supportURL,omitempty"` - BugReportURL string `json:"bugReportURL,omitempty"` - PrivacyPolicyURL string `json:"privacyPolicyURL,omitempty"` - CPEName string `json:"cpeName,omitempty"` - SupportEnd string `json:"supportEnd,omitempty"` - ExtendedSupport bool `json:"extendedSupport,omitempty"` + // PrettyName is a human-readable operating system name with version. + PrettyName string `json:"prettyName,omitempty"` + + // Name is the operating system name without version information. + Name string `json:"name,omitempty"` + + // ID is the lower-case operating system identifier (e.g., "ubuntu", "rhel"). + ID string `json:"id,omitempty"` + + // IDLike is a list of operating system IDs this distribution is similar to or derived from. + IDLike IDLikes `json:"idLike,omitempty"` + + // Version is the operating system version including codename if available. + Version string `json:"version,omitempty"` + + // VersionID is the operating system version number or identifier. + VersionID string `json:"versionID,omitempty"` + + // VersionCodename is the operating system release codename (e.g., "jammy", "bullseye"). + VersionCodename string `json:"versionCodename,omitempty"` + + // BuildID is a build identifier for the operating system. + BuildID string `json:"buildID,omitempty"` + + // ImageID is an identifier for container or cloud images. + ImageID string `json:"imageID,omitempty"` + + // ImageVersion is the version for container or cloud images. + ImageVersion string `json:"imageVersion,omitempty"` + + // Variant is the operating system variant name (e.g., "Server", "Workstation"). + Variant string `json:"variant,omitempty"` + + // VariantID is the lower-case operating system variant identifier. + VariantID string `json:"variantID,omitempty"` + + // HomeURL is the homepage URL for the operating system. + HomeURL string `json:"homeURL,omitempty"` + + // SupportURL is the support or help URL for the operating system. + SupportURL string `json:"supportURL,omitempty"` + + // BugReportURL is the bug reporting URL for the operating system. + BugReportURL string `json:"bugReportURL,omitempty"` + + // PrivacyPolicyURL is the privacy policy URL for the operating system. + PrivacyPolicyURL string `json:"privacyPolicyURL,omitempty"` + + // CPEName is the Common Platform Enumeration name for the operating system. + CPEName string `json:"cpeName,omitempty"` + + // SupportEnd is the end of support date or version identifier. + SupportEnd string `json:"supportEnd,omitempty"` + + // ExtendedSupport indicates whether extended security or support is available. + ExtendedSupport bool `json:"extendedSupport,omitempty"` } func (s *IDLikes) UnmarshalJSON(data []byte) error { diff --git a/syft/format/syftjson/model/package.go b/syft/format/syftjson/model/package.go index 420376e29..eb59e78a7 100644 --- a/syft/format/syftjson/model/package.go +++ b/syft/format/syftjson/model/package.go @@ -36,22 +36,40 @@ type PackageBasicData struct { PURL string `json:"purl"` } +// cpes is a collection of Common Platform Enumeration identifiers for a package. type cpes []CPE +// CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases. type CPE struct { - Value string `json:"cpe"` + // Value is the CPE string identifier. + Value string `json:"cpe"` + + // Source is the source where this CPE was obtained or generated from. Source string `json:"source,omitempty"` } +// licenses is a collection of license findings associated with a package. type licenses []License +// License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations. type License struct { - Value string `json:"value"` - SPDXExpression string `json:"spdxExpression"` - Type license.Type `json:"type"` - URLs []string `json:"urls"` - Locations []file.Location `json:"locations"` - Contents string `json:"contents,omitempty"` + // Value is the raw license identifier or expression as found. + Value string `json:"value"` + + // SPDXExpression is the parsed SPDX license expression. + SPDXExpression string `json:"spdxExpression"` + + // Type is the license type classification (e.g., declared, concluded, discovered). + Type license.Type `json:"type"` + + // URLs are URLs where license text or information can be found. + URLs []string `json:"urls"` + + // Locations are file locations where this license was discovered. + Locations []file.Location `json:"locations"` + + // Contents is the full license text content. + Contents string `json:"contents,omitempty"` } func newModelLicensesFromValues(licenses []string) (ml []License) { diff --git a/syft/format/syftjson/model/relationship.go b/syft/format/syftjson/model/relationship.go index 46f6da22d..e17a5bd97 100644 --- a/syft/format/syftjson/model/relationship.go +++ b/syft/format/syftjson/model/relationship.go @@ -1,8 +1,16 @@ package model +// Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package. type Relationship struct { - Parent string `json:"parent"` - Child string `json:"child"` - Type string `json:"type"` + // Parent is the ID of the parent artifact in this relationship. + Parent string `json:"parent"` + + // Child is the ID of the child artifact in this relationship. + Child string `json:"child"` + + // Type is the relationship type (e.g., "contains", "dependency-of", "ancestor-of"). + Type string `json:"type"` + + // Metadata contains additional relationship-specific metadata. Metadata interface{} `json:"metadata,omitempty"` } diff --git a/syft/format/syftjson/model/source.go b/syft/format/syftjson/model/source.go index 750e31956..83a2d87f0 100644 --- a/syft/format/syftjson/model/source.go +++ b/syft/format/syftjson/model/source.go @@ -11,18 +11,25 @@ import ( "github.com/anchore/syft/syft/source" ) -// Source object represents the thing that was cataloged -// Note: syft currently makes no claims or runs any logic to determine the Supplier field below - -// Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill -// the NTIA minimum elements. For mor information see the NTIA framing document below -// https://www.ntia.gov/files/ntia/publications/framingsbom_20191112.pdf +// Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive. +// The Supplier field can be provided by users to fulfill NTIA minimum elements requirements. type Source struct { - ID string `json:"id"` - Name string `json:"name"` - Version string `json:"version"` - Supplier string `json:"supplier,omitempty"` - Type string `json:"type"` + // ID is a unique identifier for the analyzed source artifact. + ID string `json:"id"` + + // Name is the name of the analyzed artifact (e.g., image name, directory path). + Name string `json:"name"` + + // Version is the version of the analyzed artifact (e.g., image tag). + Version string `json:"version"` + + // Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance. + Supplier string `json:"supplier,omitempty"` + + // Type is the source type (e.g., "image", "directory", "file"). + Type string `json:"type"` + + // Metadata contains additional source-specific metadata. Metadata interface{} `json:"metadata"` } diff --git a/syft/pkg/alpm.go b/syft/pkg/alpm.go index e716c26c2..dce03a281 100644 --- a/syft/pkg/alpm.go +++ b/syft/pkg/alpm.go @@ -58,6 +58,7 @@ type AlpmDBEntry struct { Depends []string `mapstructure:"depends" json:"depends,omitempty"` } +// AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman. type AlpmFileRecord struct { // Path is the file path relative to the filesystem root Path string `mapstruture:"path" json:"path,omitempty"` diff --git a/syft/pkg/cataloger/ai/cataloger.go b/syft/pkg/cataloger/ai/cataloger.go new file mode 100644 index 000000000..cca60ac87 --- /dev/null +++ b/syft/pkg/cataloger/ai/cataloger.go @@ -0,0 +1,16 @@ +/* +Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models, +including support for GGUF (GPT-Generated Unified Format) model files. +*/ +package ai + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +// NewGGUFCataloger returns a new cataloger instance for GGUF model files. +func NewGGUFCataloger() pkg.Cataloger { + return generic.NewCataloger("gguf-cataloger"). + WithParserByGlobs(parseGGUFModel, "**/*.gguf") +} diff --git a/syft/pkg/cataloger/ai/cataloger_test.go b/syft/pkg/cataloger/ai/cataloger_test.go new file mode 100644 index 000000000..cb4b7573f --- /dev/null +++ b/syft/pkg/cataloger/ai/cataloger_test.go @@ -0,0 +1,140 @@ +package ai + +import ( + "os" + "path/filepath" + "testing" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestGGUFCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain gguf files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "models/model.gguf", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewGGUFCataloger()) + }) + } +} + +func TestGGUFCataloger(t *testing.T) { + tests := []struct { + name string + setup func(t *testing.T) string + expectedPackages []pkg.Package + expectedRelationships []artifact.Relationship + }{ + { + name: "catalog single GGUF file", + setup: func(t *testing.T) string { + dir := t.TempDir() + data := newTestGGUFBuilder(). + withVersion(3). + withStringKV("general.architecture", "llama"). + withStringKV("general.name", "llama3-8b"). + withStringKV("general.version", "3.0"). + withStringKV("general.license", "Apache-2.0"). + withStringKV("general.quantization", "Q4_K_M"). + withUint64KV("general.parameter_count", 8030000000). + withStringKV("general.some_random_kv", "foobar"). + build() + + path := filepath.Join(dir, "llama3-8b.gguf") + os.WriteFile(path, data, 0644) + return dir + }, + expectedPackages: []pkg.Package{ + { + Name: "llama3-8b", + Version: "3.0", + Type: pkg.ModelPkg, + Licenses: pkg.NewLicenseSet( + pkg.NewLicenseFromFields("Apache-2.0", "", nil), + ), + Metadata: pkg.GGUFFileHeader{ + Architecture: "llama", + Quantization: "Unknown", + Parameters: 0, + GGUFVersion: 3, + TensorCount: 0, + MetadataKeyValuesHash: "6e3d368066455ce4", + RemainingKeyValues: map[string]interface{}{ + "general.some_random_kv": "foobar", + }, + }, + }, + }, + expectedRelationships: nil, + }, + { + name: "catalog GGUF file with minimal metadata", + setup: func(t *testing.T) string { + dir := t.TempDir() + data := newTestGGUFBuilder(). + withVersion(3). + withStringKV("general.architecture", "gpt2"). + withStringKV("general.name", "gpt2-small"). + withStringKV("gpt2.context_length", "1024"). + withUint32KV("gpt2.embedding_length", 768). + build() + + path := filepath.Join(dir, "gpt2-small.gguf") + os.WriteFile(path, data, 0644) + return dir + }, + expectedPackages: []pkg.Package{ + { + Name: "gpt2-small", + Version: "", + Type: pkg.ModelPkg, + Licenses: pkg.NewLicenseSet(), + Metadata: pkg.GGUFFileHeader{ + Architecture: "gpt2", + Quantization: "Unknown", + Parameters: 0, + GGUFVersion: 3, + TensorCount: 0, + MetadataKeyValuesHash: "9dc6f23591062a27", + RemainingKeyValues: map[string]interface{}{ + "gpt2.context_length": "1024", + "gpt2.embedding_length": uint32(768), + }, + }, + }, + }, + expectedRelationships: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fixtureDir := tt.setup(t) + + // Use pkgtest to catalog and compare + pkgtest.NewCatalogTester(). + FromDirectory(t, fixtureDir). + Expects(tt.expectedPackages, tt.expectedRelationships). + IgnoreLocationLayer(). + IgnorePackageFields("FoundBy", "Locations"). + TestCataloger(t, NewGGUFCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/ai/package.go b/syft/pkg/cataloger/ai/package.go new file mode 100644 index 000000000..67c6570ae --- /dev/null +++ b/syft/pkg/cataloger/ai/package.go @@ -0,0 +1,22 @@ +package ai + +import ( + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" +) + +func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package { + p := pkg.Package{ + Name: modelName, + Version: version, + Locations: file.NewLocationSet(locations...), + Type: pkg.ModelPkg, + Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...), + Metadata: *metadata, + // NOTE: PURL is intentionally not set as the package-url spec + // has not yet finalized support for ML model packages + } + p.SetID() + + return p +} diff --git a/syft/pkg/cataloger/ai/package_test.go b/syft/pkg/cataloger/ai/package_test.go new file mode 100644 index 000000000..ea58304d6 --- /dev/null +++ b/syft/pkg/cataloger/ai/package_test.go @@ -0,0 +1,121 @@ +package ai + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestNewGGUFPackage(t *testing.T) { + tests := []struct { + name string + metadata *pkg.GGUFFileHeader + input struct { + modelName string + version string + license string + locations []file.Location + } + expected pkg.Package + }{ + { + name: "complete GGUF package with all fields", + input: struct { + modelName string + version string + license string + locations []file.Location + }{ + modelName: "llama3-8b", + version: "3.0", + license: "Apache-2.0", + locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")}, + }, + metadata: &pkg.GGUFFileHeader{ + Architecture: "llama", + Quantization: "Q4_K_M", + Parameters: 8030000000, + GGUFVersion: 3, + TensorCount: 291, + RemainingKeyValues: map[string]any{ + "general.random_kv": "foobar", + }, + }, + expected: pkg.Package{ + Name: "llama3-8b", + Version: "3.0", + Type: pkg.ModelPkg, + Licenses: pkg.NewLicenseSet( + pkg.NewLicenseFromFields("Apache-2.0", "", nil), + ), + Metadata: pkg.GGUFFileHeader{ + Architecture: "llama", + Quantization: "Q4_K_M", + Parameters: 8030000000, + GGUFVersion: 3, + TensorCount: 291, + RemainingKeyValues: map[string]any{ + "general.random_kv": "foobar", + }, + }, + Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")), + }, + }, + { + name: "minimal GGUF package", + input: struct { + modelName string + version string + license string + locations []file.Location + }{ + modelName: "gpt2-small", + version: "1.0", + license: "MIT", + locations: []file.Location{file.NewLocation("/models/simple.gguf")}, + }, + metadata: &pkg.GGUFFileHeader{ + Architecture: "gpt2", + GGUFVersion: 3, + TensorCount: 50, + }, + expected: pkg.Package{ + Name: "gpt2-small", + Version: "1.0", + Type: pkg.ModelPkg, + Licenses: pkg.NewLicenseSet( + pkg.NewLicenseFromFields("MIT", "", nil), + ), + Metadata: pkg.GGUFFileHeader{ + Architecture: "gpt2", + GGUFVersion: 3, + TensorCount: 50, + }, + Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := newGGUFPackage( + tt.metadata, + tt.input.modelName, + tt.input.version, + tt.input.license, + tt.input.locations..., + ) + + // Verify metadata type + _, ok := actual.Metadata.(pkg.GGUFFileHeader) + require.True(t, ok, "metadata should be GGUFFileHeader") + + // Use AssertPackagesEqual for comprehensive comparison + pkgtest.AssertPackagesEqual(t, tt.expected, actual) + }) + } +} diff --git a/syft/pkg/cataloger/ai/parse_gguf.go b/syft/pkg/cataloger/ai/parse_gguf.go new file mode 100644 index 000000000..3a1eb473f --- /dev/null +++ b/syft/pkg/cataloger/ai/parse_gguf.go @@ -0,0 +1,63 @@ +package ai + +import ( + "encoding/binary" + "fmt" + "io" + + gguf_parser "github.com/gpustack/gguf-parser-go" +) + +// GGUF file format constants +const ( + ggufMagicNumber = 0x46554747 // "GGUF" in little-endian + maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies +) + +// copyHeader copies the GGUF header from the reader to the writer. +// It validates the magic number first, then copies the rest of the data. +// The reader should be wrapped with io.LimitedReader to prevent OOM issues. +func copyHeader(w io.Writer, r io.Reader) error { + // Read initial chunk to validate magic number + // GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info + initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count + if _, err := io.ReadFull(r, initialBuf); err != nil { + return fmt.Errorf("failed to read GGUF header prefix: %w", err) + } + + // Verify magic number + magic := binary.LittleEndian.Uint32(initialBuf[0:4]) + if magic != ggufMagicNumber { + return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic) + } + + // Write the initial buffer to the writer + if _, err := w.Write(initialBuf); err != nil { + return fmt.Errorf("failed to write GGUF header prefix: %w", err) + } + + // Copy the rest of the header from reader to writer + // The LimitedReader will return EOF once maxHeaderSize is reached + if _, err := io.Copy(w, r); err != nil { + return fmt.Errorf("failed to copy GGUF header: %w", err) + } + + return nil +} + +// Helper to convert gguf_parser metadata to simpler types +func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} { + result := make(map[string]interface{}) + + for _, kv := range kvs { + // Skip standard fields that are extracted separately + switch kv.Key { + case "general.architecture", "general.name", "general.license", + "general.version", "general.parameter_count", "general.quantization": + continue + } + result[kv.Key] = kv.Value + } + + return result +} diff --git a/syft/pkg/cataloger/ai/parse_gguf_model.go b/syft/pkg/cataloger/ai/parse_gguf_model.go new file mode 100644 index 000000000..74deb4199 --- /dev/null +++ b/syft/pkg/cataloger/ai/parse_gguf_model.go @@ -0,0 +1,135 @@ +package ai + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/cespare/xxhash/v2" + gguf_parser "github.com/gpustack/gguf-parser-go" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/unknown" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +// parseGGUFModel parses a GGUF model file and returns the discovered package. +// This implementation only reads the header portion of the file, not the entire model. +func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + defer internal.CloseAndLogError(reader, reader.Path()) + + // Create a temporary file for the library to parse + // The library requires a file path, so we create a temp file + tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf") + if err != nil { + return nil, nil, fmt.Errorf("failed to create temp file: %w", err) + } + tempPath := tempFile.Name() + defer os.Remove(tempPath) + + // Copy and validate the GGUF file header using LimitedReader to prevent OOM + // We use LimitedReader to cap reads at maxHeaderSize (50MB) + limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize} + if err := copyHeader(tempFile, limitedReader); err != nil { + tempFile.Close() + return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err) + } + tempFile.Close() + + // Parse using gguf-parser-go with options to skip unnecessary data + ggufFile, err := gguf_parser.ParseGGUFFile(tempPath, + gguf_parser.SkipLargeMetadata(), + ) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err) + } + + // Extract metadata + metadata := ggufFile.Metadata() + + // Extract version separately (will be set on Package.Version) + modelVersion := extractVersion(ggufFile.Header.MetadataKV) + + // Convert to syft metadata structure + syftMetadata := &pkg.GGUFFileHeader{ + Architecture: metadata.Architecture, + Quantization: metadata.FileTypeDescriptor, + Parameters: uint64(metadata.Parameters), + GGUFVersion: uint32(ggufFile.Header.Version), + TensorCount: ggufFile.Header.TensorCount, + RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV), + MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV), + } + + // If model name is not in metadata, use filename + if metadata.Name == "" { + metadata.Name = extractModelNameFromPath(reader.Path()) + } + + // Create package from metadata + p := newGGUFPackage( + syftMetadata, + metadata.Name, + modelVersion, + metadata.License, + reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ) + + return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file") +} + +// computeKVMetadataHash computes a stable hash of the KV metadata for use as a global identifier +func computeKVMetadataHash(metadata gguf_parser.GGUFMetadataKVs) string { + // Sort the KV pairs by key for stable hashing + sortedKVs := make([]gguf_parser.GGUFMetadataKV, len(metadata)) + copy(sortedKVs, metadata) + sort.Slice(sortedKVs, func(i, j int) bool { + return sortedKVs[i].Key < sortedKVs[j].Key + }) + + // Marshal sorted KVs to JSON for stable hashing + jsonBytes, err := json.Marshal(sortedKVs) + if err != nil { + log.Debugf("failed to marshal metadata for hashing: %v", err) + return "" + } + + // Compute xxhash + hash := xxhash.Sum64(jsonBytes) + return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits) +} + +// extractVersion attempts to extract version from metadata KV pairs +func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string { + for _, kv := range kvs { + if kv.Key == "general.version" { + if v, ok := kv.Value.(string); ok && v != "" { + return v + } + } + } + return "" +} + +// extractModelNameFromPath extracts the model name from the file path +func extractModelNameFromPath(path string) string { + // Get the base filename + base := filepath.Base(path) + + // Remove .gguf extension + name := strings.TrimSuffix(base, ".gguf") + + return name +} + +// integrity check +var _ generic.Parser = parseGGUFModel diff --git a/syft/pkg/cataloger/ai/test-fixtures/glob-paths/models/model.gguf b/syft/pkg/cataloger/ai/test-fixtures/glob-paths/models/model.gguf new file mode 100644 index 000000000..e69de29bb diff --git a/syft/pkg/cataloger/ai/test_helpers_test.go b/syft/pkg/cataloger/ai/test_helpers_test.go new file mode 100644 index 000000000..aeca0dc63 --- /dev/null +++ b/syft/pkg/cataloger/ai/test_helpers_test.go @@ -0,0 +1,128 @@ +package ai + +import ( + "bytes" + "encoding/binary" +) + +// GGUF type constants for test builder +// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md +const ( + ggufMagic = 0x46554747 // "GGUF" in little-endian + ggufTypeUint8 = 0 + ggufTypeInt8 = 1 + ggufTypeUint16 = 2 + ggufTypeInt16 = 3 + ggufTypeUint32 = 4 + ggufTypeInt32 = 5 + ggufTypeFloat32 = 6 + ggufTypeBool = 7 + ggufTypeString = 8 + ggufTypeArray = 9 + ggufTypeUint64 = 10 + ggufTypeInt64 = 11 + ggufTypeFloat64 = 12 +) + +// testGGUFBuilder helps build GGUF files for testing +type testGGUFBuilder struct { + buf *bytes.Buffer + version uint32 + tensorCount uint64 + kvPairs []testKVPair +} + +type testKVPair struct { + key string + valueType uint32 + value interface{} +} + +func newTestGGUFBuilder() *testGGUFBuilder { + return &testGGUFBuilder{ + buf: new(bytes.Buffer), + version: 3, + tensorCount: 0, + kvPairs: []testKVPair{}, + } +} + +func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder { + b.version = v + return b +} + +func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder { + b.tensorCount = count + return b +} + +func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder { + b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value}) + return b +} + +func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder { + b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value}) + return b +} + +func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder { + b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value}) + return b +} + +func (b *testGGUFBuilder) writeString(s string) { + binary.Write(b.buf, binary.LittleEndian, uint64(len(s))) + b.buf.WriteString(s) +} + +func (b *testGGUFBuilder) build() []byte { + // Write magic number "GGUF" + binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic)) + + // Write version + binary.Write(b.buf, binary.LittleEndian, b.version) + + // Write tensor count + binary.Write(b.buf, binary.LittleEndian, b.tensorCount) + + // Write KV count + binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs))) + + // Write KV pairs + for _, kv := range b.kvPairs { + // Write key + b.writeString(kv.key) + // Write value type + binary.Write(b.buf, binary.LittleEndian, kv.valueType) + // Write value based on type + switch kv.valueType { + case ggufTypeString: + b.writeString(kv.value.(string)) + case ggufTypeUint32: + binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32)) + case ggufTypeUint64: + binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64)) + case ggufTypeUint8: + binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8)) + case ggufTypeInt32: + binary.Write(b.buf, binary.LittleEndian, kv.value.(int32)) + case ggufTypeBool: + var v uint8 + if kv.value.(bool) { + v = 1 + } + binary.Write(b.buf, binary.LittleEndian, v) + } + } + + return b.buf.Bytes() +} + +// buildInvalidMagic creates a file with invalid magic number +func (b *testGGUFBuilder) buildInvalidMagic() []byte { + buf := new(bytes.Buffer) + binary.Write(buf, binary.LittleEndian, uint32(0x12345678)) + return buf.Bytes() +} diff --git a/syft/pkg/cataloger/common/cpe/target_software_to_pkg_type.go b/syft/pkg/cataloger/common/cpe/target_software_to_pkg_type.go new file mode 100644 index 000000000..d3846f47b --- /dev/null +++ b/syft/pkg/cataloger/common/cpe/target_software_to_pkg_type.go @@ -0,0 +1,58 @@ +package cpe + +import ( + "strings" + + "github.com/anchore/syft/syft/pkg" +) + +// TargetSoftwareToPackageType is derived from looking at target_software attributes in the NVD dataset +// TODO: ideally this would be driven from the store, where we can resolve ecosystem aliases directly +func TargetSoftwareToPackageType(tsw string) pkg.Type { + tsw = strings.NewReplacer("-", "_", " ", "_").Replace(strings.ToLower(tsw)) + switch tsw { + case "alpine", "apk": + return pkg.ApkPkg + case "debian", "dpkg": + return pkg.DebPkg + case "java", "maven", "ant", "gradle", "jenkins", "jenkins_ci", "kafka", "logstash", "mule", "nifi", "solr", "spark", "storm", "struts", "tomcat", "zookeeper", "log4j": + return pkg.JavaPkg + case "javascript", "node", "nodejs", "node.js", "npm", "yarn", "apache", "jquery", "next.js", "prismjs": + return pkg.NpmPkg + case "c", "c++", "c/c++", "conan", "gnu_c++", "qt": + return pkg.ConanPkg + case "dart": + return pkg.DartPubPkg + case "redhat", "rpm", "redhat_enterprise_linux", "rhel", "suse", "suse_linux", "opensuse", "opensuse_linux", "fedora", "centos", "oracle_linux", "ol": + return pkg.RpmPkg + case "elixir", "hex": + return pkg.HexPkg + case "erlang": + return pkg.ErlangOTPPkg + case ".net", ".net_framework", "asp", "asp.net", "dotnet", "dotnet_framework", "c#", "csharp", "nuget": + return pkg.DotnetPkg + case "ruby", "gem", "nokogiri", "ruby_on_rails": + return pkg.GemPkg + case "rust", "cargo", "crates": + return pkg.RustPkg + case "python", "pip", "pypi", "flask": + return pkg.PythonPkg + case "kb", "knowledgebase", "msrc", "mskb", "microsoft": + return pkg.KbPkg + case "portage", "gentoo": + return pkg.PortagePkg + case "go", "golang", "gomodule": + return pkg.GoModulePkg + case "linux_kernel", "linux", "z/linux": + return pkg.LinuxKernelPkg + case "php": + return pkg.PhpComposerPkg + case "swift": + return pkg.SwiftPkg + case "wordpress", "wordpress_plugin", "wordpress_": + return pkg.WordpressPluginPkg + case "lua", "luarocks": + return pkg.LuaRocksPkg + } + return "" +} diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 27d4f164f..d44ec5d3a 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -80,7 +80,7 @@ func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ f // processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { - parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg) + parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg) // note: even on error, we should always run cleanup functions defer cleanupFn() if err != nil { @@ -99,7 +99,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string { // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // and parse nested archives or ignore them. -func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { +func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { // fetch the last element of the virtual path virtualElements := strings.Split(reader.Path(), ":") currentFilepath := virtualElements[len(virtualElements)-1] @@ -109,7 +109,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) } - fileManifest, err := intFile.NewZipFileManifest(archivePath) + fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath) if err != nil { return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) } @@ -226,7 +226,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package, } // fetch the manifest file - contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...) + contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...) if err != nil { return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err) } @@ -387,8 +387,9 @@ type parsedPomProject struct { // discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) { - properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) - projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) + // Find the pom.properties/pom.xml if the names seem like a plausible match + properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) + projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) artifactsMap := j.buildArtifactsMap(properties) pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap) @@ -519,13 +520,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren var pkgs []pkg.Package // pom.properties - properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) + properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) if err != nil { return nil, err } // pom.xml - projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) + projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) if err != nil { return nil, err } @@ -575,7 +576,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg. } if len(licenseMatches) > 0 { - contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...) + contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...) if err != nil { return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err) } @@ -616,7 +617,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare // associating each discovered package to the given parent package. func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { // search and parse pom.properties files & fetch the contents - openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) + openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) if err != nil { return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) } @@ -680,8 +681,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit return nestedPkgs, nestedRelationships, nil } -func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { - contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) +func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { + contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...) if err != nil { return nil, fmt.Errorf("unable to extract maven files: %w", err) } @@ -709,8 +710,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra return propertiesByParentPath, nil } -func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { - contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) +func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { + contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...) if err != nil { return nil, fmt.Errorf("unable to extract maven files: %w", err) } diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index 76187e5fd..c5f559d65 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -72,7 +72,7 @@ func TestSearchMavenForLicenses(t *testing.T) { require.NoError(t, err) // setup parser - ap, cleanupFn, err := newJavaArchiveParser( + ap, cleanupFn, err := newJavaArchiveParser(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -372,7 +372,7 @@ func TestParseJar(t *testing.T) { UseNetwork: false, UseMavenLocalRepository: false, } - parser, cleanupFn, err := newJavaArchiveParser( + parser, cleanupFn, err := newJavaArchiveParser(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -1499,7 +1499,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { fixture, err := os.Open(fixturePath) require.NoError(t, err) - parser, cleanupFn, err := newJavaArchiveParser( + parser, cleanupFn, err := newJavaArchiveParser(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -1636,7 +1636,7 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) { ctx := context.TODO() // setup parser - ap, cleanupFn, err := newJavaArchiveParser( + ap, cleanupFn, err := newJavaArchiveParser(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go index 5af4f0b3f..4c4edc595 100644 --- a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go @@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con } func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { - openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...) + openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...) if err != nil { return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err) } diff --git a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go index 3dd1d2524..e515f4f90 100644 --- a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go @@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con // functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central // header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib // or archiver). - fileManifest, err := intFile.NewZipFileManifest(archivePath) + fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath) if err != nil { return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err) } diff --git a/syft/pkg/gguf.go b/syft/pkg/gguf.go new file mode 100644 index 000000000..59c30e075 --- /dev/null +++ b/syft/pkg/gguf.go @@ -0,0 +1,37 @@ +package pkg + +// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file. +// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast +// loading and saving of models, particularly quantized large language models. +// The Model Name, License, and Version fields have all been lifted up to be on the syft Package. +type GGUFFileHeader struct { + // GGUFVersion is the GGUF format version (e.g., 3) + GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"` + + // FileSize is the size of the GGUF file in bytes (best-effort if available from resolver) + FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"` + + // Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama") + Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"` + + // Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M") + Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"` + + // Parameters is the number of model parameters (if present in header) + Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"` + + // TensorCount is the number of tensors in the model + TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"` + + // RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already + // represented as typed fields above. This preserves additional metadata fields for reference + // (namespaced with general.*, llama.*, etc.) while avoiding duplication. + RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"` + + // MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata. + // This hash is computed over the complete header metadata (including the fields extracted + // into typed fields above) and provides a stable identifier for the model configuration + // across different file locations or remotes. It allows matching identical models even + // when stored in different repositories or with different filenames. + MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"` +} diff --git a/syft/pkg/microsoft.go b/syft/pkg/microsoft.go index 80c345c28..88dab5ba4 100644 --- a/syft/pkg/microsoft.go +++ b/syft/pkg/microsoft.go @@ -1,10 +1,7 @@ package pkg -// MicrosoftKbPatch is slightly odd in how it is expected to map onto data. -// This is critical to grasp because there is no MSRC cataloger. The `ProductID` -// field is expected to be the MSRC Product ID, for example: -// "Windows 10 Version 1703 for 32-bit Systems". -// `Kb` is expected to be the actual KB number, for example "5001028" +// MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center). +// This type captures both the product being patched and the KB article number for the update. type MicrosoftKbPatch struct { // ProductID is MSRC Product ID (e.g. "Windows 10 Version 1703 for 32-bit Systems") ProductID string `toml:"product_id" json:"product_id"` diff --git a/syft/pkg/npm.go b/syft/pkg/npm.go index d5fdf85d7..24d7f4d3a 100644 --- a/syft/pkg/npm.go +++ b/syft/pkg/npm.go @@ -48,6 +48,7 @@ type YarnLockEntry struct { Dependencies map[string]string `mapstructure:"dependencies" json:"dependencies"` } +// PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification. type PnpmLockResolution struct { // Integrity is Subresource Integrity hash for verification (SRI format) Integrity string `mapstructure:"integrity" json:"integrity"` diff --git a/syft/pkg/type.go b/syft/pkg/type.go index 6ac815f0e..b9ee19357 100644 --- a/syft/pkg/type.go +++ b/syft/pkg/type.go @@ -54,6 +54,7 @@ const ( TerraformPkg Type = "terraform" WordpressPluginPkg Type = "wordpress-plugin" HomebrewPkg Type = "homebrew" + ModelPkg Type = "model" ) // AllPkgs represents all supported package types @@ -98,6 +99,7 @@ var AllPkgs = []Type{ TerraformPkg, WordpressPluginPkg, HomebrewPkg, + ModelPkg, } // PackageURLType returns the PURL package type for the current package. diff --git a/syft/pkg/type_test.go b/syft/pkg/type_test.go index a0695b5bf..b58e4ecd5 100644 --- a/syft/pkg/type_test.go +++ b/syft/pkg/type_test.go @@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) { expectedTypes.Remove(string(HomebrewPkg)) expectedTypes.Remove(string(TerraformPkg)) expectedTypes.Remove(string(GraalVMNativeImagePkg)) + expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package for _, test := range tests { diff --git a/syft/source/filesource/file_source.go b/syft/source/filesource/file_source.go index da2be0e19..0517be04a 100644 --- a/syft/source/filesource/file_source.go +++ b/syft/source/filesource/file_source.go @@ -4,13 +4,15 @@ import ( "context" "crypto" "fmt" + "io" "os" "path" + "path/filepath" "sync" + "github.com/mholt/archives" "github.com/opencontainers/go-digest" - "github.com/anchore/archiver/v3" stereoFile "github.com/anchore/stereoscope/pkg/file" intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" @@ -208,18 +210,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // unarchived. - envelopedUnarchiver, err := archiver.ByExtension(path) - if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { - // when tar/zip files are extracted, if there are multiple entries at the same - // location, the last entry wins - // NOTE: this currently does not display any messages if an overwrite happens - switch v := unarchiver.(type) { - case *archiver.Tar: - v.OverwriteExisting = true - case *archiver.Zip: - v.OverwriteExisting = true - } - + envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil) + if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) if err != nil { return "", nil, fmt.Errorf("unable to unarchive source file: %w", err) @@ -246,15 +238,58 @@ func digestOfFileContents(path string) string { return di.String() } -func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) { +func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) { + var cleanupFn = func() error { return nil } + archive, err := os.Open(path) + if err != nil { + return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err) + } + defer archive.Close() + tempDir, err := os.MkdirTemp("", "syft-archive-contents-") if err != nil { - return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err) + return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err) } - cleanupFn := func() error { + visitor := func(_ context.Context, file archives.FileInfo) error { + // Protect against symlink attacks by ensuring path doesn't escape tempDir + destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive) + if err != nil { + return err + } + + if file.IsDir() { + return os.MkdirAll(destPath, file.Mode()) + } + + if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil { + return fmt.Errorf("failed to create parent directory: %w", err) + } + + rc, err := file.Open() + if err != nil { + return fmt.Errorf("failed to open file in archive: %w", err) + } + defer rc.Close() + + destFile, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("failed to create file in destination: %w", err) + } + defer destFile.Close() + + if err := destFile.Chmod(file.Mode()); err != nil { + return fmt.Errorf("failed to change mode of destination file: %w", err) + } + + if _, err := io.Copy(destFile, rc); err != nil { + return fmt.Errorf("failed to copy file contents: %w", err) + } + + return nil + } + + return tempDir, func() error { return os.RemoveAll(tempDir) - } - - return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) + }, unarchiver.Extract(context.Background(), archive, visitor) }