Compare commits

...

14 Commits

Author SHA1 Message Date
Alex Goodman
2b9700f1ee attempt to bring down ci times
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-14 16:39:03 -05:00
Alex Goodman
1b386e2711 improve workflow dispatch calls
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-14 15:12:02 -05:00
Alex Goodman
ee39d2ca75 make artifacts executable
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-14 15:03:06 -05:00
Alex Goodman
c1ddbdb136 remove logging
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-14 13:47:11 -05:00
Alex Goodman
62e54030ae add logging
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-14 13:29:20 -05:00
Alex Goodman
2c195d5e5f Merge remote-tracking branch 'origin/main' into feature/migrate-to-runs-on
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-14 10:25:27 -05:00
anchore-actions-token-generator[bot]
6a21b5e5e2
chore(deps): update tools to latest versions (#4365)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: spiffcs <32073428+spiffcs@users.noreply.github.com>
2025-11-14 09:25:27 -05:00
dependabot[bot]
6480c8a425
chore(deps): bump github/codeql-action from 4.31.2 to 4.31.3 (#4366)
Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.2 to 4.31.3.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](0499de31b9...014f16e7ab)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 4.31.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-14 09:25:08 -05:00
Kudryavcev Nikolay
89842bd2f6
chore: migrate syft to use mholt/archives instead of anchore fork (#4029)
---------
Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>
Signed-off-by: Christopher Phillips <spiffcs@users.noreply.github.com>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-13 23:04:43 +00:00
Christopher Angelo Phillips
4a60c41f38
feat: 4184 gguf parser (ai artifact cataloger) part 1 (#4279)
---------
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-11-13 17:43:48 -05:00
anchore-actions-token-generator[bot]
2e100f33f3
chore(deps): update tools to latest versions (#4358)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: spiffcs <32073428+spiffcs@users.noreply.github.com>
2025-11-12 13:27:47 -05:00
dependabot[bot]
b444f0c2ed
chore(deps): bump golang.org/x/mod from 0.29.0 to 0.30.0 (#4359)
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.29.0 to 0.30.0.
- [Commits](https://github.com/golang/mod/compare/v0.29.0...v0.30.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-version: 0.30.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-12 13:27:33 -05:00
Adam Chovanec
102d362daf
feat: CPEs format decoder (#4207)
Signed-off-by: Adam Chovanec <git@adamchovanec.cz>
2025-11-12 10:45:09 -05:00
Alex Goodman
66c78d44af
Document additional json schema fields (#4356)
* add documentation to key fields

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* regenerate json schema

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-10 16:29:06 -05:00
67 changed files with 6565 additions and 809 deletions

View File

@ -90,7 +90,7 @@ tools:
# used for running all local and CI tasks
- name: task
version:
want: v3.45.4
want: v3.45.5
method: github-release
with:
repo: go-task/task
@ -98,7 +98,7 @@ tools:
# used for triggering a release
- name: gh
version:
want: v2.83.0
want: v2.83.1
method: github-release
with:
repo: cli/cli

View File

@ -6,6 +6,7 @@
name: "CodeQL Security Scan"
on:
workflow_dispatch:
push:
branches:
# only run when there are pushes to the main branch (not on PRs)
@ -48,7 +49,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5
uses: github/codeql-action/init@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@ -59,7 +60,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5
uses: github/codeql-action/autobuild@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@ -73,4 +74,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5
uses: github/codeql-action/analyze@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5

View File

@ -7,5 +7,4 @@ permissions:
jobs:
run:
# Runner definition: workflows/.github/runs-on.yml
uses: anchore/workflows/.github/workflows/dependabot-automation.yaml@main

View File

@ -13,7 +13,6 @@ on:
jobs:
run:
# Runner definition: workflows/.github/runs-on.yml
uses: "anchore/workflows/.github/workflows/oss-project-board-add.yaml@main"
secrets:
token: ${{ secrets.OSS_PROJECT_GH_TOKEN }}

View File

@ -192,7 +192,6 @@ jobs:
release-install-script:
needs: [release]
if: ${{ needs.release.result == 'success' }}
# Runner definition: workflows/.github/runs-on.yml
uses: "anchore/workflows/.github/workflows/release-install-script.yaml@main"
with:
tag: ${{ github.event.inputs.version }}

View File

@ -10,7 +10,6 @@ jobs:
contents: read
issues: write
pull-requests: write
# Runner definition: workflows/.github/runs-on.yml
uses: "anchore/workflows/.github/workflows/remove-awaiting-response-label.yaml@main"
secrets:
token: ${{ secrets.OSS_PROJECT_GH_TOKEN }}

View File

@ -15,7 +15,7 @@ jobs:
name: "Publish test fixture image cache"
# we use this runner to get enough storage space for docker images and fixture cache
# Runner definition: workflows/.github/runs-on.yml
runs-on: runs-on=${{ github.run_id }}/runner=release
runs-on: runs-on=${{ github.run_id }}/runner=build/disk=large
if: github.repository == 'anchore/syft' # only run for main repo
permissions:
packages: write

View File

@ -1,6 +1,7 @@
name: "Validate GitHub Actions"
on:
workflow_dispatch:
pull_request:
paths:
- '.github/workflows/**'

View File

@ -32,7 +32,7 @@ jobs:
name: "Unit tests"
# we need more storage than what's on the default runner
# Runner definition: workflows/.github/runs-on.yml
runs-on: runs-on=${{ github.run_id }}/runner=small
runs-on: runs-on=${{ github.run_id }}/runner=medium
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #v5.0.0
with:
@ -50,7 +50,7 @@ jobs:
# Note: changing this job name requires making the same update in the .github/workflows/release.yaml pipeline
name: "Integration tests"
# Runner definition: workflows/.github/runs-on.yml
runs-on: runs-on=${{ github.run_id }}/runner=small
runs-on: runs-on=${{ github.run_id }}/runner=medium
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #v5.0.0
with:
@ -97,100 +97,12 @@ jobs:
path: snapshot/
retention-days: 30
# # upload each platform artifact individually so downstream jobs can download only what they need
# - run: npm install @actions/artifact@2.3.2
#
# - name: Upload individual platform artifacts
# uses: actions/github-script@v8
# env:
# ACTIONS_ARTIFACT_UPLOAD_CONCURRENCY: 10
# with:
# script: |
# const { readdirSync } = require('fs')
# const { DefaultArtifactClient } = require('@actions/artifact')
# const artifact = new DefaultArtifactClient()
# const ls = d => readdirSync(d, { withFileTypes: true })
# const baseDir = "./snapshot"
# const dirs = ls(baseDir).filter(f => f.isDirectory()).map(f => f.name)
# const uploads = []
#
# // filter to only amd64 and arm64 architectures
# const supportedArchs = ['amd64', 'arm64']
# const filteredDirs = dirs.filter(dir =>
# supportedArchs.some(arch => dir.includes(arch))
# )
#
# // upload platform subdirectories
# for (const dir of filteredDirs) {
# // uploadArtifact returns Promise<{id, size}>
# uploads.push(artifact.uploadArtifact(
# // name of the archive:
# `${dir}`,
# // array of all files to include:
# ls(`${baseDir}/${dir}`).map(f => `${baseDir}/${dir}/${f.name}`),
# // base directory to trim from entries:
# `${baseDir}/${dir}`,
# { retentionDays: 30 }
# ))
# }
#
# // upload RPM and DEB packages for supported architectures
# const packageFiles = ls(baseDir).filter(f =>
# f.isFile() &&
# (f.name.endsWith('.deb') || f.name.endsWith('.rpm')) &&
# supportedArchs.some(arch => f.name.includes(`_${arch}.`))
# )
# for (const file of packageFiles) {
# uploads.push(artifact.uploadArtifact(
# file.name,
# [`${baseDir}/${file.name}`],
# baseDir,
# { retentionDays: 30 }
# ))
# }
#
# // upload SBOM files for supported architectures
# const sbomFiles = ls(baseDir).filter(f =>
# f.isFile() &&
# f.name.endsWith('.sbom') &&
# supportedArchs.some(arch => f.name.includes(`_${arch}.`))
# )
# for (const file of sbomFiles) {
# uploads.push(artifact.uploadArtifact(
# file.name,
# [`${baseDir}/${file.name}`],
# baseDir,
# { retentionDays: 30 }
# ))
# }
#
# // upload checksums file (needed by install tests)
# const rootFiles = ls(baseDir).filter(f => f.isFile() && f.name.match(/syft_.*_checksums\.txt$/))
# if (rootFiles.length > 0) {
# const checksumsFile = rootFiles[0].name
# uploads.push(artifact.uploadArtifact(
# 'syft_checksums.txt',
# [`${baseDir}/${checksumsFile}`],
# baseDir,
# { retentionDays: 30 }
# ))
# }
#
# // wait for all uploads to finish
# try {
# const results = await Promise.all(uploads)
# console.log(`Successfully uploaded ${results.length} artifacts`)
# } catch (error) {
# console.error('Upload failed:', error)
# throw error
# }
Acceptance-Linux:
# Note: changing this job name requires making the same update in the .github/workflows/release.yaml pipeline
name: "Acceptance tests (Linux)"
needs: [Build-Snapshot-Artifacts]
# Runner definition: workflows/.github/runs-on.yml
runs-on: runs-on=${{ github.run_id }}/runner=small
runs-on: runs-on=${{ github.run_id }}/runner=medium
steps:
# required for magic-cache from runs-on to function with artifact upload/download (see https://runs-on.com/caching/magic-cache/#actionsupload-artifact-compatibility)
- uses: runs-on/action@v2
@ -210,35 +122,8 @@ jobs:
name: snapshot
path: snapshot
# - name: Download checksums file
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# name: syft_checksums.txt
# path: snapshot
#
# - name: Download Linux amd64 snapshot
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# name: linux-build_linux_amd64_v1
# path: snapshot/linux-build_linux_amd64_v1
#
# - name: Download Linux amd64 deb
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# pattern: syft_*_linux_amd64.deb
# path: snapshot
#
# - name: Download Linux amd64 rpm
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# pattern: syft_*_linux_amd64.rpm
# path: snapshot
#
# - name: Download Linux amd64 sbom
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# pattern: syft_*_linux_amd64.sbom
# path: snapshot
- name: Restore binary permissions
run: chmod +x snapshot/*/syft snapshot/*/*.exe 2>/dev/null || true
- name: Run comparison tests (Linux)
run: make compare-linux
@ -281,23 +166,8 @@ jobs:
name: snapshot
path: snapshot
# - name: Download checksums file
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# name: syft_checksums.txt
# path: snapshot
#
# - name: Download macOS Intel snapshot
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# name: darwin-build_darwin_amd64_v1
# path: snapshot/darwin-build_darwin_amd64_v1
#
# - name: Download macOS amd64 sbom
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# pattern: syft_*_darwin_amd64.sbom
# path: snapshot
- name: Restore binary permissions
run: chmod +x snapshot/*/syft 2>/dev/null || true
- name: Run comparison tests (Mac)
run: make compare-mac
@ -310,7 +180,7 @@ jobs:
name: "CLI tests (Linux)"
needs: [Build-Snapshot-Artifacts]
# Runner definition: workflows/.github/runs-on.yml
runs-on: runs-on=${{ github.run_id }}/runner=small
runs-on: runs-on=${{ github.run_id }}/runner=medium
steps:
# required for magic-cache from runs-on to function with artifact upload/download (see https://runs-on.com/caching/magic-cache/#actionsupload-artifact-compatibility)
- uses: runs-on/action@v2
@ -330,11 +200,8 @@ jobs:
name: snapshot
path: snapshot
# - name: Download Linux amd64 snapshot
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
# with:
# name: linux-build_linux_amd64_v1
# path: snapshot/linux-build_linux_amd64_v1
- name: Restore binary permissions
run: chmod +x snapshot/*/syft snapshot/*/*.exe 2>/dev/null || true
- name: Run CLI Tests (Linux)
run: make cli

2
.gitignore vendored
View File

@ -73,3 +73,5 @@ cosign.pub
__pycache__/
*.py[cod]
*$py.class

View File

@ -106,8 +106,8 @@ syft <image> -o <format>
Where the `formats` available are:
- `syft-json`: Use this to get as much information out of Syft as possible!
- `syft-text`: A row-oriented, human-and-machine-friendly output.
- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/).

View File

@ -39,9 +39,9 @@ vars:
# e.g. when installing snapshot debs from a local path, ./ forces the deb to be installed in the current working directory instead of referencing a package name
SNAPSHOT_DIR: ./snapshot
SNAPSHOT_BIN: "{{ .PROJECT_ROOT }}/{{ .SNAPSHOT_DIR }}/{{ .OS }}-build_{{ .OS }}_{{ .ARCH }}/{{ .PROJECT }}"
SNAPSHOT_CMD: "{{ .TOOL_DIR }}/goreleaser release --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --skip=publish --skip=sign"
BUILD_CMD: "{{ .TOOL_DIR }}/goreleaser build --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --single-target"
RELEASE_CMD: "{{ .TOOL_DIR }}/goreleaser release --clean --release-notes {{ .CHANGELOG }}"
SNAPSHOT_CMD: "{{ .TOOL_DIR }}/goreleaser release --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --skip=publish --skip=sign --parallelism=12"
BUILD_CMD: "{{ .TOOL_DIR }}/goreleaser build --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --single-target --parallelism=12"
RELEASE_CMD: "{{ .TOOL_DIR }}/goreleaser release --clean --release-notes {{ .CHANGELOG }} --parallelism=12"
VERSION:
sh: git describe --dirty --always --tags
@ -218,14 +218,14 @@ tasks:
# unit test coverage threshold (in % coverage)
COVERAGE_THRESHOLD: 62
cmds:
- "go test -coverprofile {{ .TMP_DIR }}/unit-coverage-details.txt {{ .TEST_PKGS }}"
- "go test -p=4 -parallel=8 -coverprofile {{ .TMP_DIR }}/unit-coverage-details.txt {{ .TEST_PKGS }}"
- cmd: ".github/scripts/coverage.py {{ .COVERAGE_THRESHOLD }} {{ .TMP_DIR }}/unit-coverage-details.txt"
silent: true
integration:
desc: Run integration tests
cmds:
- "go test -v ./cmd/syft/internal/test/integration"
- "go test -v -p=4 -parallel=8 ./cmd/syft/internal/test/integration"
# exercise most of the CLI with the data race detector
# we use a larger image to ensure we're using multiple catalogers at a time
- "go run -race cmd/syft/main.go anchore/test_images:grype-quality-dotnet-69f15d2"

View File

@ -87,6 +87,7 @@ func TestPkgCoverageImage(t *testing.T) {
definedPkgs.Remove(string(pkg.TerraformPkg))
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
definedPkgs.Remove(string(pkg.CondaPkg))
definedPkgs.Remove(string(pkg.ModelPkg))
var cases []testCase
cases = append(cases, commonTestCases...)
@ -161,6 +162,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
definedPkgs.Remove(string(pkg.UnknownPkg))
definedPkgs.Remove(string(pkg.CondaPkg))
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
definedPkgs.Remove(string(pkg.ModelPkg))
// for directory scans we should not expect to see any of the following package types
definedPkgs.Remove(string(pkg.KbPkg))

22
go.mod
View File

@ -11,7 +11,6 @@ require (
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
github.com/acobaugh/osrelease v0.1.0
github.com/adrg/xdg v0.5.3
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
@ -90,7 +89,7 @@ require (
go.uber.org/goleak v1.3.0
go.yaml.in/yaml/v3 v3.0.4
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b
golang.org/x/mod v0.29.0
golang.org/x/mod v0.30.0
golang.org/x/net v0.46.0
modernc.org/sqlite v1.40.0
)
@ -168,7 +167,6 @@ require (
github.com/goccy/go-yaml v1.18.0
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
github.com/google/s2a-go v0.1.8 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
@ -209,10 +207,6 @@ require (
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/nwaples/rardecode v1.1.3 // indirect
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
github.com/olekukonko/errors v1.1.0 // indirect
github.com/olekukonko/ll v0.1.2 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect
github.com/opencontainers/selinux v1.13.0 // indirect
@ -286,6 +280,11 @@ require (
modernc.org/memory v1.11.0 // indirect
)
require (
github.com/cespare/xxhash/v2 v2.3.0
github.com/gpustack/gguf-parser-go v0.22.1
)
require (
cyphar.com/go-pathrs v0.2.1 // indirect
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
@ -310,7 +309,16 @@ require (
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
github.com/henvic/httpretty v0.1.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
github.com/olekukonko/errors v1.1.0 // indirect
github.com/olekukonko/ll v0.1.2 // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
gonum.org/v1/gonum v0.15.1 // indirect
)
retract (

22
go.sum
View File

@ -110,8 +110,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
@ -229,7 +227,6 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
@ -480,8 +477,6 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@ -549,6 +544,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
@ -598,6 +595,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
@ -625,6 +624,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@ -730,9 +730,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
@ -749,8 +751,6 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A=
github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=
@ -860,6 +860,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
@ -1070,8 +1072,8 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -1313,6 +1315,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=

View File

@ -3,5 +3,5 @@ package internal
const (
// JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.42"
JSONSchemaVersion = "16.0.43"
)

View File

@ -1,17 +1,40 @@
package file
import (
"context"
"fmt"
"os"
"path/filepath"
"github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archives"
"github.com/anchore/archiver/v3"
"github.com/anchore/syft/internal"
)
// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
tarReader, err := os.Open(archivePath)
if err != nil {
return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
}
defer internal.CloseAndLogError(tarReader, archivePath)
format, _, err := archives.Identify(ctx, archivePath, nil)
if err != nil {
return fmt.Errorf("failed to identify tar compression format: %w", err)
}
extractor, ok := format.(archives.Extractor)
if !ok {
return fmt.Errorf("file format does not support extraction: %s", archivePath)
}
return extractor.Extract(ctx, tarReader, visitor)
}
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) {
func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths
@ -19,9 +42,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
return results, nil
}
visitor := func(file archiver.File) error {
defer file.Close()
visitor := func(_ context.Context, file archives.FileInfo) error {
// ignore directories
if file.IsDir() {
return nil
@ -43,7 +64,13 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close()
if err := safeCopy(tempFile, file.ReadCloser); err != nil {
packedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
}
defer internal.CloseAndLogError(packedFile, archivePath)
if err := safeCopy(tempFile, packedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
}
@ -52,7 +79,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
return nil
}
return results, archiver.Walk(archivePath, visitor)
return results, TraverseFilesInTar(ctx, archivePath, visitor)
}
func matchesAnyGlob(name string, globs ...string) bool {

View File

@ -1,10 +1,12 @@
package file
import (
"context"
"os"
"sort"
"strings"
"github.com/mholt/archives"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log"
@ -14,22 +16,25 @@ import (
type ZipFileManifest map[string]os.FileInfo
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) {
zipReader, err := OpenZip(archivePath)
func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) {
zipReader, err := os.Open(archivePath)
manifest := make(ZipFileManifest)
if err != nil {
log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
return manifest, err
}
defer func() {
err = zipReader.Close()
if err != nil {
if err = zipReader.Close(); err != nil {
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
}
}()
for _, file := range zipReader.File {
manifest.Add(file.Name, file.FileInfo())
err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error {
manifest.Add(file.NameInArchive, file.FileInfo)
return nil
})
if err != nil {
return manifest, err
}
return manifest, nil
}

View File

@ -4,6 +4,7 @@
package file
import (
"context"
"encoding/json"
"os"
"path"
@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) {
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
actual, err := NewZipFileManifest(archiveFilePath)
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err)
}
@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) {
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
actual, err := NewZipFileManifest(archiveFilePath)
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err)
}
@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
z, err := NewZipFileManifest(archiveFilePath)
z, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err)
}

View File

@ -1,13 +1,15 @@
package file
import (
"archive/zip"
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/mholt/archives"
"github.com/anchore/syft/internal/log"
)
@ -25,7 +27,7 @@ type errZipSlipDetected struct {
}
func (e *errZipSlipDetected) Error() string {
return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
}
type zipTraversalRequest map[string]struct{}
@ -39,38 +41,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest {
}
// TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error {
func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error {
request := newZipTraverseRequest(paths...)
zipReader, err := OpenZip(archivePath)
zipReader, err := os.Open(archivePath)
if err != nil {
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
}
defer func() {
err = zipReader.Close()
if err != nil {
if err := zipReader.Close(); err != nil {
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
}
}()
for _, file := range zipReader.File {
return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
// if no paths are given then assume that all files should be traversed
if len(paths) > 0 {
if _, ok := request[file.Name]; !ok {
if _, ok := request[file.NameInArchive]; !ok {
// this file path is not of interest
continue
return nil
}
}
if err = visitor(file); err != nil {
return err
}
}
return nil
return visitor(ctx, file)
})
}
// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) {
results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths
@ -78,9 +76,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
return results, nil
}
visitor := func(file *zip.File) error {
tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-"
visitor := func(_ context.Context, file archives.FileInfo) error {
tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-"
tempFile, err := os.CreateTemp(dir, tempfilePrefix)
if err != nil {
return fmt.Errorf("unable to create temp file: %w", err)
@ -92,33 +89,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
zippedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
}
defer func() {
err := zippedFile.Close()
if err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
if err := zippedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if file.FileInfo().IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
if file.IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
}
if err := safeCopy(tempFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
}
results[file.Name] = Opener{path: tempFile.Name()}
results[file.NameInArchive] = Opener{path: tempFile.Name()}
return nil
}
return results, TraverseFilesInZip(archivePath, visitor, paths...)
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
}
// ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) {
func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) {
results := make(map[string]string)
// don't allow for full traversal, only select traversal from given paths
@ -126,37 +122,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er
return results, nil
}
visitor := func(file *zip.File) error {
visitor := func(_ context.Context, file archives.FileInfo) error {
zippedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
}
defer func() {
if err := zippedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if file.FileInfo().IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
if file.IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
}
var buffer bytes.Buffer
if err := safeCopy(&buffer, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
}
results[file.Name] = buffer.String()
results[file.NameInArchive] = buffer.String()
err = zippedFile.Close()
if err != nil {
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
}
return nil
}
return results, TraverseFilesInZip(archivePath, visitor, paths...)
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
}
// UnzipToDir extracts a zip archive to a target directory.
func UnzipToDir(archivePath, targetDir string) error {
visitor := func(file *zip.File) error {
joinedPath, err := safeJoin(targetDir, file.Name)
func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
visitor := func(_ context.Context, file archives.FileInfo) error {
joinedPath, err := SafeJoin(targetDir, file.NameInArchive)
if err != nil {
return err
}
@ -164,11 +161,11 @@ func UnzipToDir(archivePath, targetDir string) error {
return extractSingleFile(file, joinedPath, archivePath)
}
return TraverseFilesInZip(archivePath, visitor)
return TraverseFilesInZip(ctx, archivePath, visitor)
}
// safeJoin ensures that any destinations do not resolve to a path above the prefix path.
func safeJoin(prefix string, dest ...string) (string, error) {
// SafeJoin ensures that any destinations do not resolve to a path above the prefix path.
func SafeJoin(prefix string, dest ...string) (string, error) {
joinResult := filepath.Join(append([]string{prefix}, dest...)...)
cleanJoinResult := filepath.Clean(joinResult)
if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
@ -181,13 +178,18 @@ func safeJoin(prefix string, dest ...string) (string, error) {
return joinResult, nil
}
func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error {
func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error {
zippedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
}
defer func() {
if err := zippedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if file.FileInfo().IsDir() {
if file.IsDir() {
err = os.MkdirAll(expandedFilePath, file.Mode())
if err != nil {
return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
@ -202,20 +204,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err
if err != nil {
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
}
defer func() {
if err := outputFile.Close(); err != nil {
log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err)
}
}()
if err := safeCopy(outputFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err)
}
err = outputFile.Close()
if err != nil {
return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err)
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err)
}
}
err = zippedFile.Close()
if err != nil {
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
}
return nil
}

View File

@ -4,6 +4,8 @@
package file
import (
"archive/zip"
"context"
"crypto/sha256"
"encoding/json"
"errors"
@ -17,6 +19,7 @@ import (
"github.com/go-test/deep"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func equal(r1, r2 io.Reader) (bool, error) {
@ -55,7 +58,7 @@ func TestUnzipToDir(t *testing.T) {
expectedPaths := len(expectedZipArchiveEntries)
observedPaths := 0
err = UnzipToDir(archiveFilePath, unzipDestinationDir)
err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir)
if err != nil {
t.Fatalf("unable to unzip archive: %+v", err)
}
@ -145,7 +148,7 @@ func TestContentsFromZip(t *testing.T) {
paths = append(paths, p)
}
actual, err := ContentsFromZip(archivePath, paths...)
actual, err := ContentsFromZip(context.Background(), archivePath, paths...)
if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err)
}
@ -307,9 +310,528 @@ func TestSafeJoin(t *testing.T) {
for _, test := range tests {
t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) {
actual, err := safeJoin(test.prefix, test.args...)
actual, err := SafeJoin(test.prefix, test.args...)
test.errAssertion(t, err)
assert.Equal(t, test.expected, actual)
})
}
}
// TestSymlinkProtection demonstrates that SafeJoin protects against symlink-based
// directory traversal attacks by validating that archive entry paths cannot escape
// the extraction directory.
func TestSafeJoin_SymlinkProtection(t *testing.T) {
tests := []struct {
name string
archivePath string // Path as it would appear in the archive
expectError bool
description string
}{
{
name: "path traversal via ../",
archivePath: "../../../outside/file.txt",
expectError: true,
description: "Archive entry with ../ trying to escape extraction dir",
},
{
name: "absolute path symlink target",
archivePath: "../../../sensitive.txt",
expectError: true,
description: "Simulates symlink pointing outside via relative path",
},
{
name: "safe relative path within extraction dir",
archivePath: "subdir/safe.txt",
expectError: false,
description: "Normal file path that stays within extraction directory",
},
{
name: "safe path with internal ../",
archivePath: "dir1/../dir2/file.txt",
expectError: false,
description: "Path with ../ that still resolves within extraction dir",
},
{
name: "deeply nested traversal",
archivePath: "../../../../../../tmp/evil.txt",
expectError: true,
description: "Multiple levels of ../ trying to escape",
},
{
name: "single parent directory escape",
archivePath: "../",
expectError: true,
description: "Simple one-level escape attempt",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create temp directories to simulate extraction scenario
tmpDir := t.TempDir()
extractDir := filepath.Join(tmpDir, "extract")
outsideDir := filepath.Join(tmpDir, "outside")
require.NoError(t, os.MkdirAll(extractDir, 0755))
require.NoError(t, os.MkdirAll(outsideDir, 0755))
// Create a file outside extraction dir that an attacker might target
outsideFile := filepath.Join(outsideDir, "sensitive.txt")
require.NoError(t, os.WriteFile(outsideFile, []byte("sensitive data"), 0644))
// Test SafeJoin - this is what happens when processing archive entries
result, err := SafeJoin(extractDir, tt.archivePath)
if tt.expectError {
// Should block malicious paths
require.Error(t, err, "Expected SafeJoin to reject malicious path")
var zipSlipErr *errZipSlipDetected
assert.ErrorAs(t, err, &zipSlipErr, "Error should be errZipSlipDetected type")
assert.Empty(t, result, "Result should be empty for blocked paths")
} else {
// Should allow safe paths
require.NoError(t, err, "Expected SafeJoin to allow safe path")
assert.NotEmpty(t, result, "Result should not be empty for safe paths")
assert.True(t, strings.HasPrefix(filepath.Clean(result), filepath.Clean(extractDir)),
"Safe path should resolve within extraction directory")
}
})
}
}
// TestUnzipToDir_SymlinkAttacks tests UnzipToDir function with malicious ZIP archives
// containing symlink entries that attempt path traversal attacks.
//
// EXPECTED BEHAVIOR: UnzipToDir should either:
// 1. Detect and reject symlinks explicitly with a security error, OR
// 2. Extract them safely (library converts symlinks to regular files)
func TestUnzipToDir_SymlinkAttacks(t *testing.T) {
tests := []struct {
name string
symlinkName string
fileName string
errContains string
}{
{
name: "direct symlink to outside directory",
symlinkName: "evil_link",
fileName: "evil_link/payload.txt",
errContains: "not a directory", // attempt to write through symlink leaf (which is not a directory)
},
{
name: "directory symlink attack",
symlinkName: "safe_dir/link",
fileName: "safe_dir/link/payload.txt",
errContains: "not a directory", // attempt to write through symlink (which is not a directory)
},
{
name: "symlink without payload file",
symlinkName: "standalone_link",
fileName: "", // no payload file
errContains: "", // no error expected, symlink without payload is safe
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
// create outside target directory
outsideDir := filepath.Join(tempDir, "outside_target")
require.NoError(t, os.MkdirAll(outsideDir, 0755))
// create extraction directory
extractDir := filepath.Join(tempDir, "extract")
require.NoError(t, os.MkdirAll(extractDir, 0755))
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, outsideDir, tt.fileName)
err := UnzipToDir(context.Background(), maliciousZip, extractDir)
// check error expectations
if tt.errContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errContains)
} else {
require.NoError(t, err)
}
analyzeExtractionDirectory(t, extractDir)
// check if payload file escaped extraction directory
if tt.fileName != "" {
maliciousFile := filepath.Join(outsideDir, filepath.Base(tt.fileName))
checkFileOutsideExtraction(t, maliciousFile)
}
// check if symlink was created pointing outside
symlinkPath := filepath.Join(extractDir, tt.symlinkName)
checkSymlinkCreation(t, symlinkPath, extractDir, outsideDir)
})
}
}
// TestContentsFromZip_SymlinkAttacks tests the ContentsFromZip function with malicious
// ZIP archives containing symlink entries.
//
// EXPECTED BEHAVIOR: ContentsFromZip should either:
// 1. Reject symlinks explicitly, OR
// 2. Return empty content for symlinks (library behavior)
//
// Though ContentsFromZip doesn't write to disk, but if symlinks are followed, it could read sensitive
// files from outside the archive.
func TestContentsFromZip_SymlinkAttacks(t *testing.T) {
tests := []struct {
name string
symlinkName string
symlinkTarget string
requestPath string
errContains string
}{
{
name: "request symlink entry directly",
symlinkName: "evil_link",
symlinkTarget: "/etc/hosts", // attempt to read sensitive file
requestPath: "evil_link",
errContains: "", // no error expected - library returns symlink metadata
},
{
name: "symlink in nested directory",
symlinkName: "nested/link",
symlinkTarget: "/etc/hosts",
requestPath: "nested/link",
errContains: "", // no error expected - library returns symlink metadata
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
// create malicious ZIP with symlink entry (no payload file needed)
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
contents, err := ContentsFromZip(context.Background(), maliciousZip, tt.requestPath)
// check error expectations
if tt.errContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errContains)
return
}
require.NoError(t, err)
// verify symlink handling - library should return symlink target as content (metadata)
content, found := contents[tt.requestPath]
require.True(t, found, "symlink entry should be found in results")
// verify symlink was NOT followed (content should be target path or empty)
if content != "" && content != tt.symlinkTarget {
// content is not empty and not the symlink target - check if actual file was read
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
if readErr == nil && string(targetContent) == content {
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was read!")
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
t.Logf(" content length: %d bytes", len(content))
}
}
}
})
}
}
// TestExtractFromZipToUniqueTempFile_SymlinkAttacks tests the ExtractFromZipToUniqueTempFile
// function with malicious ZIP archives containing symlink entries.
//
// EXPECTED BEHAVIOR: ExtractFromZipToUniqueTempFile should either:
// 1. Reject symlinks explicitly, OR
// 2. Extract them safely (library converts to empty files, filepath.Base sanitizes names)
//
// This function uses filepath.Base() on the archive entry name for temp file prefix and
// os.CreateTemp() which creates files in the specified directory, so it should be protected.
func TestExtractFromZipToUniqueTempFile_SymlinkAttacks(t *testing.T) {
tests := []struct {
name string
symlinkName string
symlinkTarget string
requestPath string
errContains string
}{
{
name: "extract symlink entry to temp file",
symlinkName: "evil_link",
symlinkTarget: "/etc/passwd",
requestPath: "evil_link",
errContains: "", // no error expected - library extracts symlink metadata
},
{
name: "extract nested symlink",
symlinkName: "nested/dir/link",
symlinkTarget: "/tmp/outside",
requestPath: "nested/dir/link",
errContains: "", // no error expected
},
{
name: "extract path traversal symlink name",
symlinkName: "../../escape",
symlinkTarget: "/tmp/outside",
requestPath: "../../escape",
errContains: "", // no error expected - filepath.Base sanitizes name
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
// create temp directory for extraction
extractTempDir := filepath.Join(tempDir, "temp_extract")
require.NoError(t, os.MkdirAll(extractTempDir, 0755))
openers, err := ExtractFromZipToUniqueTempFile(context.Background(), maliciousZip, extractTempDir, tt.requestPath)
// check error expectations
if tt.errContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errContains)
return
}
require.NoError(t, err)
// verify symlink was extracted
opener, found := openers[tt.requestPath]
require.True(t, found, "symlink entry should be extracted")
// verify temp file is within temp directory
tempFilePath := opener.path
cleanTempDir := filepath.Clean(extractTempDir)
cleanTempFile := filepath.Clean(tempFilePath)
require.True(t, strings.HasPrefix(cleanTempFile, cleanTempDir),
"temp file must be within temp directory: %s not in %s", cleanTempFile, cleanTempDir)
// verify symlink was NOT followed (content should be target path or empty)
f, openErr := opener.Open()
require.NoError(t, openErr)
defer f.Close()
content, readErr := io.ReadAll(f)
require.NoError(t, readErr)
// check if symlink was followed (content matches actual file)
if len(content) > 0 && string(content) != tt.symlinkTarget {
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
if readErr == nil && string(targetContent) == string(content) {
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was copied!")
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
t.Logf(" content length: %d bytes", len(content))
}
}
}
})
}
}
// forensicFindings contains the results of analyzing an extraction directory
type forensicFindings struct {
symlinksFound []forensicSymlink
regularFiles []string
directories []string
symlinkVulnerabilities []string
}
type forensicSymlink struct {
path string
target string
escapesExtraction bool
resolvedPath string
}
// analyzeExtractionDirectory walks the extraction directory and detects symlinks that point
// outside the extraction directory. It is silent unless vulnerabilities are found.
func analyzeExtractionDirectory(t *testing.T, extractDir string) forensicFindings {
t.Helper()
findings := forensicFindings{}
filepath.Walk(extractDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
// only log if there's an error walking the directory
t.Logf("Error walking %s: %v", path, err)
return nil
}
relPath := strings.TrimPrefix(path, extractDir+"/")
if relPath == "" {
relPath = "."
}
// use Lstat to detect symlinks without following them
linfo, lerr := os.Lstat(path)
if lerr == nil && linfo.Mode()&os.ModeSymlink != 0 {
target, _ := os.Readlink(path)
// resolve to see where it actually points
var resolvedPath string
var escapesExtraction bool
if filepath.IsAbs(target) {
// absolute symlink
resolvedPath = target
cleanExtractDir := filepath.Clean(extractDir)
escapesExtraction = !strings.HasPrefix(filepath.Clean(target), cleanExtractDir)
if escapesExtraction {
t.Errorf("critical issue!... absolute symlink created: %s → %s", relPath, target)
t.Logf(" this symlink points outside the extraction directory")
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
fmt.Sprintf("absolute symlink: %s → %s", relPath, target))
}
} else {
// relative symlink - resolve it
resolvedPath = filepath.Join(filepath.Dir(path), target)
cleanResolved := filepath.Clean(resolvedPath)
cleanExtractDir := filepath.Clean(extractDir)
escapesExtraction = !strings.HasPrefix(cleanResolved, cleanExtractDir)
if escapesExtraction {
t.Errorf("critical issue!... symlink escapes extraction dir: %s → %s", relPath, target)
t.Logf(" symlink resolves to: %s (outside extraction directory)", cleanResolved)
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
fmt.Sprintf("relative symlink escape: %s → %s (resolves to %s)", relPath, target, cleanResolved))
}
}
findings.symlinksFound = append(findings.symlinksFound, forensicSymlink{
path: relPath,
target: target,
escapesExtraction: escapesExtraction,
resolvedPath: resolvedPath,
})
} else {
// regular file or directory - collect silently
if info.IsDir() {
findings.directories = append(findings.directories, relPath)
} else {
findings.regularFiles = append(findings.regularFiles, relPath)
}
}
return nil
})
return findings
}
// checkFileOutsideExtraction checks if a file was written outside the extraction directory.
// Returns true if the file exists (vulnerability), false otherwise. Silent on success.
func checkFileOutsideExtraction(t *testing.T, filePath string) bool {
t.Helper()
if stat, err := os.Stat(filePath); err == nil {
content, _ := os.ReadFile(filePath)
t.Errorf("critical issue!... file written OUTSIDE extraction directory!")
t.Logf(" location: %s", filePath)
t.Logf(" size: %d bytes", stat.Size())
t.Logf(" content: %s", string(content))
t.Logf(" ...this means an attacker can write files to arbitrary locations on the filesystem")
return true
}
// no file found outside extraction directory...
return false
}
// checkSymlinkCreation verifies if a symlink was created at the expected path and reports
// whether it points outside the extraction directory. Silent unless a symlink is found.
func checkSymlinkCreation(t *testing.T, symlinkPath, extractDir, expectedTarget string) bool {
t.Helper()
if linfo, err := os.Lstat(symlinkPath); err == nil {
if linfo.Mode()&os.ModeSymlink != 0 {
target, _ := os.Readlink(symlinkPath)
if expectedTarget != "" && target == expectedTarget {
t.Errorf("critical issue!... symlink pointing outside extraction dir was created!")
t.Logf(" Symlink: %s → %s", symlinkPath, target)
return true
}
// Check if it escapes even if target doesn't match expected
if filepath.IsAbs(target) {
cleanExtractDir := filepath.Clean(extractDir)
if !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) {
t.Errorf("critical issue!... absolute symlink escapes extraction dir!")
t.Logf(" symlink: %s → %s", symlinkPath, target)
return true
}
}
}
// if it exists but is not a symlink, that's good (attack was thwarted)...
}
return false
}
// createMaliciousZipWithSymlink creates a ZIP archive containing a symlink entry pointing to an arbitrary target,
// followed by a file entry that attempts to write through that symlink.
// returns the path to the created ZIP archive.
func createMaliciousZipWithSymlink(t *testing.T, tempDir, symlinkName, symlinkTarget, fileName string) string {
t.Helper()
maliciousZip := filepath.Join(tempDir, "malicious.zip")
zipFile, err := os.Create(maliciousZip)
require.NoError(t, err)
defer zipFile.Close()
zw := zip.NewWriter(zipFile)
// create parent directories if the symlink is nested
if dir := filepath.Dir(symlinkName); dir != "." {
dirHeader := &zip.FileHeader{
Name: dir + "/",
Method: zip.Store,
}
dirHeader.SetMode(os.ModeDir | 0755)
_, err = zw.CreateHeader(dirHeader)
require.NoError(t, err)
}
// create symlink entry pointing outside extraction directory
// note: ZIP format stores symlinks as regular files with the target path as content
symlinkHeader := &zip.FileHeader{
Name: symlinkName,
Method: zip.Store,
}
symlinkHeader.SetMode(os.ModeSymlink | 0755)
symlinkWriter, err := zw.CreateHeader(symlinkHeader)
require.NoError(t, err)
// write the symlink target as the file content (this is how ZIP stores symlinks)
_, err = symlinkWriter.Write([]byte(symlinkTarget))
require.NoError(t, err)
// create file entry that will be written through the symlink
if fileName != "" {
payloadContent := []byte("MALICIOUS PAYLOAD - This should NOT be written outside extraction dir!")
payloadHeader := &zip.FileHeader{
Name: fileName,
Method: zip.Deflate,
}
payloadHeader.SetMode(0644)
payloadWriter, err := zw.CreateHeader(payloadHeader)
require.NoError(t, err)
_, err = payloadWriter.Write(payloadContent)
require.NoError(t, err)
}
require.NoError(t, zw.Close())
require.NoError(t, zipFile.Close())
return maliciousZip
}

View File

@ -1,229 +0,0 @@
package file
import (
"archive/zip"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"github.com/anchore/syft/internal/log"
)
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
const (
directoryEndLen = 22
directory64LocLen = 20
directory64EndLen = 56
directory64LocSignature = 0x07064b50
directory64EndSignature = 0x06064b50
)
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
type ZipReadCloser struct {
*zip.Reader
io.Closer
}
// OpenZip provides a ZipReadCloser for the given filepath.
func OpenZip(filepath string) (*ZipReadCloser, error) {
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, err
}
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
// need to find the start of the archive and keep track of this offset.
offset, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err)
return nil, err
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
}
if offset > math.MaxInt64 {
return nil, fmt.Errorf("archive start offset too large: %v", offset)
}
offset64 := int64(offset)
size := fi.Size() - offset64
r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size)
if err != nil {
log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err)
return nil, err
}
return &ZipReadCloser{
Reader: r,
Closer: f,
}, nil
}
type readBuf []byte
func (b *readBuf) uint16() uint16 {
v := binary.LittleEndian.Uint16(*b)
*b = (*b)[2:]
return v
}
func (b *readBuf) uint32() uint32 {
v := binary.LittleEndian.Uint32(*b)
*b = (*b)[4:]
return v
}
func (b *readBuf) uint64() uint64 {
v := binary.LittleEndian.Uint64(*b)
*b = (*b)[8:]
return v
}
type directoryEnd struct {
diskNbr uint32 // unused
dirDiskNbr uint32 // unused
dirRecordsThisDisk uint64 // unused
directoryRecords uint64
directorySize uint64
directoryOffset uint64 // relative to file
}
// note: this is derived from readDirectoryEnd within the archive/zip package
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
// look for directoryEndSignature in the last 1k, then in the last 65k
var buf []byte
var directoryEndOffset int64
for i, bLen := range []int64{1024, 65 * 1024} {
if bLen > size {
bLen = size
}
buf = make([]byte, int(bLen))
if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) {
return 0, err
}
if p := findSignatureInBlock(buf); p >= 0 {
buf = buf[p:]
directoryEndOffset = size - bLen + int64(p)
break
}
if i == 1 || bLen == size {
return 0, zip.ErrFormat
}
}
if buf == nil {
// we were unable to find the directoryEndSignature block
return 0, zip.ErrFormat
}
// read header into struct
b := readBuf(buf[4:]) // skip signature
d := &directoryEnd{
diskNbr: uint32(b.uint16()),
dirDiskNbr: uint32(b.uint16()),
dirRecordsThisDisk: uint64(b.uint16()),
directoryRecords: uint64(b.uint16()),
directorySize: uint64(b.uint32()),
directoryOffset: uint64(b.uint32()),
}
// Calculate where the zip data actually begins
// These values mean that the file can be a zip64 file
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
p, err := findDirectory64End(r, directoryEndOffset)
if err == nil && p >= 0 {
directoryEndOffset = p
err = readDirectory64End(r, p, d)
}
if err != nil {
return 0, err
}
}
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
// Make sure directoryOffset points to somewhere in our file.
if d.directoryOffset >= uint64(size) {
return 0, zip.ErrFormat
}
return startOfArchive, nil
}
// findDirectory64End tries to read the zip64 locator just before the
// directory end and returns the offset of the zip64 directory end if
// found.
func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
locOffset := directoryEndOffset - directory64LocLen
if locOffset < 0 {
return -1, nil // no need to look for a header outside the file
}
buf := make([]byte, directory64LocLen)
if _, err := r.ReadAt(buf, locOffset); err != nil {
return -1, err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64LocSignature {
return -1, nil
}
if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
return -1, nil // the file is not a valid zip64-file
}
p := b.uint64() // relative offset of the zip64 end of central directory record
if b.uint32() != 1 { // total number of disks
return -1, nil // the file is not a valid zip64-file
}
return int64(p), nil
}
// readDirectory64End reads the zip64 directory end and updates the
// directory end with the zip64 directory end values.
func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
buf := make([]byte, directory64EndLen)
if _, err := r.ReadAt(buf, offset); err != nil {
return err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64EndSignature {
return errors.New("could not read directory64End")
}
b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
d.diskNbr = b.uint32() // number of this disk
d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
d.directoryRecords = b.uint64() // total number of entries in the central directory
d.directorySize = b.uint64() // size of the central directory
d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
return nil
}
func findSignatureInBlock(b []byte) int {
for i := len(b) - directoryEndLen; i >= 0; i-- {
// defined from directoryEndSignature
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
if n+directoryEndLen+i <= len(b) {
return i
}
}
}
return -1
}

View File

@ -1,50 +0,0 @@
//go:build !windows
// +build !windows
package file
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFindArchiveStartOffset(t *testing.T) {
tests := []struct {
name string
archivePrep func(tb testing.TB) string
expected uint64
}{
{
name: "standard, non-nested zip",
archivePrep: prepZipSourceFixture,
expected: 0,
},
{
name: "zip with prepended bytes",
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
expected: 36,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
archivePath := test.archivePrep(t)
f, err := os.Open(archivePath)
if err != nil {
t.Fatalf("could not open archive %q: %+v", archivePath, err)
}
fi, err := os.Stat(f.Name())
if err != nil {
t.Fatalf("unable to stat archive: %+v", err)
}
actual, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
t.Fatalf("unable to find offset: %+v", err)
}
assert.Equal(t, test.expected, actual)
})
}
}

View File

@ -27,6 +27,7 @@ func AllTypes() []any {
pkg.ELFBinaryPackageNoteJSONPayload{},
pkg.ElixirMixLockEntry{},
pkg.ErlangRebarLockEntry{},
pkg.GGUFFileHeader{},
pkg.GitHubActionsUseStatement{},
pkg.GolangBinaryBuildinfoEntry{},
pkg.GolangModuleEntry{},

View File

@ -124,6 +124,7 @@ var jsonTypes = makeJSONTypes(
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
)
func expandLegacyNameVariants(names ...string) []string {

View File

@ -3,6 +3,7 @@ package task
import (
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/ai"
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
"github.com/anchore/syft/syft/pkg/cataloger/arch"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
@ -178,6 +179,7 @@ func DefaultPackageTaskFactories() Factories {
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
// deprecated catalogers ////////////////////////////////////////
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)

View File

@ -4,7 +4,8 @@ import (
"context"
"strings"
"github.com/anchore/archiver/v3"
"github.com/mholt/archives"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/cataloging"
@ -57,9 +58,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
}
if c.IncludeUnexpandedArchives {
ctx := context.Background()
for coords := range s.Artifacts.FileMetadata {
unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath)
if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) {
format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil)
if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
}
}

View File

@ -130,7 +130,8 @@
"description": "Digests contains file content hashes for integrity verification"
}
},
"type": "object"
"type": "object",
"description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman."
},
"ApkDbEntry": {
"properties": {
@ -433,16 +434,19 @@
"CPE": {
"properties": {
"cpe": {
"type": "string"
"type": "string",
"description": "Value is the CPE string identifier."
},
"source": {
"type": "string"
"type": "string",
"description": "Source is the source where this CPE was obtained or generated from."
}
},
"type": "object",
"required": [
"cpe"
]
],
"description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases."
},
"ClassifierMatch": {
"properties": {
@ -747,19 +751,23 @@
"Descriptor": {
"properties": {
"name": {
"type": "string"
"type": "string",
"description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")."
},
"version": {
"type": "string"
"type": "string",
"description": "Version is the version of the tool that generated this SBOM."
},
"configuration": true
"configuration": {
"description": "Configuration contains the tool configuration used during SBOM generation."
}
},
"type": "object",
"required": [
"name",
"version"
],
"description": "Descriptor describes what created the document as well as surrounding metadata"
"description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation."
},
"Digest": {
"properties": {
@ -1285,58 +1293,71 @@
"File": {
"properties": {
"id": {
"type": "string"
"type": "string",
"description": "ID is a unique identifier for this file within the SBOM."
},
"location": {
"$ref": "#/$defs/Coordinates"
"$ref": "#/$defs/Coordinates",
"description": "Location is the file path and layer information where this file was found."
},
"metadata": {
"$ref": "#/$defs/FileMetadataEntry"
"$ref": "#/$defs/FileMetadataEntry",
"description": "Metadata contains filesystem metadata such as permissions, ownership, and file type."
},
"contents": {
"type": "string"
"type": "string",
"description": "Contents is the file contents for small files."
},
"digests": {
"items": {
"$ref": "#/$defs/Digest"
},
"type": "array"
"type": "array",
"description": "Digests contains cryptographic hashes of the file contents."
},
"licenses": {
"items": {
"$ref": "#/$defs/FileLicense"
},
"type": "array"
"type": "array",
"description": "Licenses contains license information discovered within this file."
},
"executable": {
"$ref": "#/$defs/Executable"
"$ref": "#/$defs/Executable",
"description": "Executable contains executable metadata if this file is a binary."
},
"unknowns": {
"items": {
"type": "string"
},
"type": "array"
"type": "array",
"description": "Unknowns contains unknown fields for forward compatibility."
}
},
"type": "object",
"required": [
"id",
"location"
]
],
"description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages."
},
"FileLicense": {
"properties": {
"value": {
"type": "string"
"type": "string",
"description": "Value is the raw license identifier or text as found in the file."
},
"spdxExpression": {
"type": "string"
"type": "string",
"description": "SPDXExpression is the parsed SPDX license expression."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
},
"evidence": {
"$ref": "#/$defs/FileLicenseEvidence"
"$ref": "#/$defs/FileLicenseEvidence",
"description": "Evidence contains supporting evidence for this license detection."
}
},
"type": "object",
@ -1344,18 +1365,22 @@
"value",
"spdxExpression",
"type"
]
],
"description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression."
},
"FileLicenseEvidence": {
"properties": {
"confidence": {
"type": "integer"
"type": "integer",
"description": "Confidence is the confidence score for this license detection (0-100)."
},
"offset": {
"type": "integer"
"type": "integer",
"description": "Offset is the byte offset where the license text starts in the file."
},
"extent": {
"type": "integer"
"type": "integer",
"description": "Extent is the length of the license text in bytes."
}
},
"type": "object",
@ -1363,30 +1388,38 @@
"confidence",
"offset",
"extent"
]
],
"description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level."
},
"FileMetadataEntry": {
"properties": {
"mode": {
"type": "integer"
"type": "integer",
"description": "Mode is the Unix file permission mode in octal format."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")."
},
"linkDestination": {
"type": "string"
"type": "string",
"description": "LinkDestination is the target path for symbolic links."
},
"userID": {
"type": "integer"
"type": "integer",
"description": "UserID is the file owner user ID."
},
"groupID": {
"type": "integer"
"type": "integer",
"description": "GroupID is the file owner group ID."
},
"mimeType": {
"type": "string"
"type": "string",
"description": "MIMEType is the MIME type of the file contents."
},
"size": {
"type": "integer"
"type": "integer",
"description": "Size is the file size in bytes."
}
},
"type": "object",
@ -1397,7 +1430,8 @@
"groupID",
"mimeType",
"size"
]
],
"description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
},
"GithubActionsUseStatement": {
"properties": {
@ -1545,7 +1579,8 @@
"items": {
"type": "string"
},
"type": "array"
"type": "array",
"description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field."
},
"JavaArchive": {
"properties": {
@ -1974,28 +2009,34 @@
"License": {
"properties": {
"value": {
"type": "string"
"type": "string",
"description": "Value is the raw license identifier or expression as found."
},
"spdxExpression": {
"type": "string"
"type": "string",
"description": "SPDXExpression is the parsed SPDX license expression."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
},
"urls": {
"items": {
"type": "string"
},
"type": "array"
"type": "array",
"description": "URLs are URLs where license text or information can be found."
},
"locations": {
"items": {
"$ref": "#/$defs/Location"
},
"type": "array"
"type": "array",
"description": "Locations are file locations where this license was discovered."
},
"contents": {
"type": "string"
"type": "string",
"description": "Contents is the full license text content."
}
},
"type": "object",
@ -2005,7 +2046,8 @@
"type",
"urls",
"locations"
]
],
"description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations."
},
"LinuxKernelArchive": {
"properties": {
@ -2130,64 +2172,84 @@
"LinuxRelease": {
"properties": {
"prettyName": {
"type": "string"
"type": "string",
"description": "PrettyName is a human-readable operating system name with version."
},
"name": {
"type": "string"
"type": "string",
"description": "Name is the operating system name without version information."
},
"id": {
"type": "string"
"type": "string",
"description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")."
},
"idLike": {
"$ref": "#/$defs/IDLikes"
"$ref": "#/$defs/IDLikes",
"description": "IDLike is a list of operating system IDs this distribution is similar to or derived from."
},
"version": {
"type": "string"
"type": "string",
"description": "Version is the operating system version including codename if available."
},
"versionID": {
"type": "string"
"type": "string",
"description": "VersionID is the operating system version number or identifier."
},
"versionCodename": {
"type": "string"
"type": "string",
"description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")."
},
"buildID": {
"type": "string"
"type": "string",
"description": "BuildID is a build identifier for the operating system."
},
"imageID": {
"type": "string"
"type": "string",
"description": "ImageID is an identifier for container or cloud images."
},
"imageVersion": {
"type": "string"
"type": "string",
"description": "ImageVersion is the version for container or cloud images."
},
"variant": {
"type": "string"
"type": "string",
"description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")."
},
"variantID": {
"type": "string"
"type": "string",
"description": "VariantID is the lower-case operating system variant identifier."
},
"homeURL": {
"type": "string"
"type": "string",
"description": "HomeURL is the homepage URL for the operating system."
},
"supportURL": {
"type": "string"
"type": "string",
"description": "SupportURL is the support or help URL for the operating system."
},
"bugReportURL": {
"type": "string"
"type": "string",
"description": "BugReportURL is the bug reporting URL for the operating system."
},
"privacyPolicyURL": {
"type": "string"
"type": "string",
"description": "PrivacyPolicyURL is the privacy policy URL for the operating system."
},
"cpeName": {
"type": "string"
"type": "string",
"description": "CPEName is the Common Platform Enumeration name for the operating system."
},
"supportEnd": {
"type": "string"
"type": "string",
"description": "SupportEnd is the end of support date or version identifier."
},
"extendedSupport": {
"type": "boolean"
"type": "boolean",
"description": "ExtendedSupport indicates whether extended security or support is available."
}
},
"type": "object"
"type": "object",
"description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files."
},
"Location": {
"properties": {
@ -2283,7 +2345,7 @@
"product_id",
"kb"
],
"description": "MicrosoftKbPatch is slightly odd in how it is expected to map onto data."
"description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)."
},
"NixDerivation": {
"properties": {
@ -3014,7 +3076,8 @@
"type": "object",
"required": [
"integrity"
]
],
"description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification."
},
"PortageDbEntry": {
"properties": {
@ -3501,22 +3564,28 @@
"Relationship": {
"properties": {
"parent": {
"type": "string"
"type": "string",
"description": "Parent is the ID of the parent artifact in this relationship."
},
"child": {
"type": "string"
"type": "string",
"description": "Child is the ID of the child artifact in this relationship."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")."
},
"metadata": true
"metadata": {
"description": "Metadata contains additional relationship-specific metadata."
}
},
"type": "object",
"required": [
"parent",
"child",
"type"
]
],
"description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package."
},
"RpmArchive": {
"properties": {
@ -3863,17 +3932,20 @@
"Schema": {
"properties": {
"version": {
"type": "string"
"type": "string",
"description": "Version is the JSON schema version for this document format."
},
"url": {
"type": "string"
"type": "string",
"description": "URL is the URL to the JSON schema definition document."
}
},
"type": "object",
"required": [
"version",
"url"
]
],
"description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format."
},
"SnapEntry": {
"properties": {
@ -3911,21 +3983,28 @@
"Source": {
"properties": {
"id": {
"type": "string"
"type": "string",
"description": "ID is a unique identifier for the analyzed source artifact."
},
"name": {
"type": "string"
"type": "string",
"description": "Name is the name of the analyzed artifact (e.g., image name, directory path)."
},
"version": {
"type": "string"
"type": "string",
"description": "Version is the version of the analyzed artifact (e.g., image tag)."
},
"supplier": {
"type": "string"
"type": "string",
"description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")."
},
"metadata": true
"metadata": {
"description": "Metadata contains additional source-specific metadata."
}
},
"type": "object",
"required": [
@ -3935,7 +4014,7 @@
"type",
"metadata"
],
"description": "Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill the NTIA minimum elements."
"description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive."
},
"SwiftPackageManagerLockEntry": {
"properties": {

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "anchore.io/schema/syft/json/16.0.42/document",
"$id": "anchore.io/schema/syft/json/16.0.43/document",
"$ref": "#/$defs/Document",
"$defs": {
"AlpmDbEntry": {
@ -130,7 +130,8 @@
"description": "Digests contains file content hashes for integrity verification"
}
},
"type": "object"
"type": "object",
"description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman."
},
"ApkDbEntry": {
"properties": {
@ -433,16 +434,19 @@
"CPE": {
"properties": {
"cpe": {
"type": "string"
"type": "string",
"description": "Value is the CPE string identifier."
},
"source": {
"type": "string"
"type": "string",
"description": "Source is the source where this CPE was obtained or generated from."
}
},
"type": "object",
"required": [
"cpe"
]
],
"description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases."
},
"ClassifierMatch": {
"properties": {
@ -747,19 +751,23 @@
"Descriptor": {
"properties": {
"name": {
"type": "string"
"type": "string",
"description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")."
},
"version": {
"type": "string"
"type": "string",
"description": "Version is the version of the tool that generated this SBOM."
},
"configuration": true
"configuration": {
"description": "Configuration contains the tool configuration used during SBOM generation."
}
},
"type": "object",
"required": [
"name",
"version"
],
"description": "Descriptor describes what created the document as well as surrounding metadata"
"description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation."
},
"Digest": {
"properties": {
@ -1285,58 +1293,71 @@
"File": {
"properties": {
"id": {
"type": "string"
"type": "string",
"description": "ID is a unique identifier for this file within the SBOM."
},
"location": {
"$ref": "#/$defs/Coordinates"
"$ref": "#/$defs/Coordinates",
"description": "Location is the file path and layer information where this file was found."
},
"metadata": {
"$ref": "#/$defs/FileMetadataEntry"
"$ref": "#/$defs/FileMetadataEntry",
"description": "Metadata contains filesystem metadata such as permissions, ownership, and file type."
},
"contents": {
"type": "string"
"type": "string",
"description": "Contents is the file contents for small files."
},
"digests": {
"items": {
"$ref": "#/$defs/Digest"
},
"type": "array"
"type": "array",
"description": "Digests contains cryptographic hashes of the file contents."
},
"licenses": {
"items": {
"$ref": "#/$defs/FileLicense"
},
"type": "array"
"type": "array",
"description": "Licenses contains license information discovered within this file."
},
"executable": {
"$ref": "#/$defs/Executable"
"$ref": "#/$defs/Executable",
"description": "Executable contains executable metadata if this file is a binary."
},
"unknowns": {
"items": {
"type": "string"
},
"type": "array"
"type": "array",
"description": "Unknowns contains unknown fields for forward compatibility."
}
},
"type": "object",
"required": [
"id",
"location"
]
],
"description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages."
},
"FileLicense": {
"properties": {
"value": {
"type": "string"
"type": "string",
"description": "Value is the raw license identifier or text as found in the file."
},
"spdxExpression": {
"type": "string"
"type": "string",
"description": "SPDXExpression is the parsed SPDX license expression."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
},
"evidence": {
"$ref": "#/$defs/FileLicenseEvidence"
"$ref": "#/$defs/FileLicenseEvidence",
"description": "Evidence contains supporting evidence for this license detection."
}
},
"type": "object",
@ -1344,18 +1365,22 @@
"value",
"spdxExpression",
"type"
]
],
"description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression."
},
"FileLicenseEvidence": {
"properties": {
"confidence": {
"type": "integer"
"type": "integer",
"description": "Confidence is the confidence score for this license detection (0-100)."
},
"offset": {
"type": "integer"
"type": "integer",
"description": "Offset is the byte offset where the license text starts in the file."
},
"extent": {
"type": "integer"
"type": "integer",
"description": "Extent is the length of the license text in bytes."
}
},
"type": "object",
@ -1363,30 +1388,38 @@
"confidence",
"offset",
"extent"
]
],
"description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level."
},
"FileMetadataEntry": {
"properties": {
"mode": {
"type": "integer"
"type": "integer",
"description": "Mode is the Unix file permission mode in octal format."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")."
},
"linkDestination": {
"type": "string"
"type": "string",
"description": "LinkDestination is the target path for symbolic links."
},
"userID": {
"type": "integer"
"type": "integer",
"description": "UserID is the file owner user ID."
},
"groupID": {
"type": "integer"
"type": "integer",
"description": "GroupID is the file owner group ID."
},
"mimeType": {
"type": "string"
"type": "string",
"description": "MIMEType is the MIME type of the file contents."
},
"size": {
"type": "integer"
"type": "integer",
"description": "Size is the file size in bytes."
}
},
"type": "object",
@ -1397,7 +1430,50 @@
"groupID",
"mimeType",
"size"
]
],
"description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
},
"GgufFileHeader": {
"properties": {
"ggufVersion": {
"type": "integer",
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
},
"fileSize": {
"type": "integer",
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
},
"architecture": {
"type": "string",
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
},
"quantization": {
"type": "string",
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
},
"parameters": {
"type": "integer",
"description": "Parameters is the number of model parameters (if present in header)"
},
"tensorCount": {
"type": "integer",
"description": "TensorCount is the number of tensors in the model"
},
"header": {
"type": "object",
"description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
},
"metadataHash": {
"type": "string",
"description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
}
},
"type": "object",
"required": [
"ggufVersion",
"tensorCount"
],
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
},
"GithubActionsUseStatement": {
"properties": {
@ -1545,7 +1621,8 @@
"items": {
"type": "string"
},
"type": "array"
"type": "array",
"description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field."
},
"JavaArchive": {
"properties": {
@ -1974,28 +2051,34 @@
"License": {
"properties": {
"value": {
"type": "string"
"type": "string",
"description": "Value is the raw license identifier or expression as found."
},
"spdxExpression": {
"type": "string"
"type": "string",
"description": "SPDXExpression is the parsed SPDX license expression."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
},
"urls": {
"items": {
"type": "string"
},
"type": "array"
"type": "array",
"description": "URLs are URLs where license text or information can be found."
},
"locations": {
"items": {
"$ref": "#/$defs/Location"
},
"type": "array"
"type": "array",
"description": "Locations are file locations where this license was discovered."
},
"contents": {
"type": "string"
"type": "string",
"description": "Contents is the full license text content."
}
},
"type": "object",
@ -2005,7 +2088,8 @@
"type",
"urls",
"locations"
]
],
"description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations."
},
"LinuxKernelArchive": {
"properties": {
@ -2130,64 +2214,84 @@
"LinuxRelease": {
"properties": {
"prettyName": {
"type": "string"
"type": "string",
"description": "PrettyName is a human-readable operating system name with version."
},
"name": {
"type": "string"
"type": "string",
"description": "Name is the operating system name without version information."
},
"id": {
"type": "string"
"type": "string",
"description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")."
},
"idLike": {
"$ref": "#/$defs/IDLikes"
"$ref": "#/$defs/IDLikes",
"description": "IDLike is a list of operating system IDs this distribution is similar to or derived from."
},
"version": {
"type": "string"
"type": "string",
"description": "Version is the operating system version including codename if available."
},
"versionID": {
"type": "string"
"type": "string",
"description": "VersionID is the operating system version number or identifier."
},
"versionCodename": {
"type": "string"
"type": "string",
"description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")."
},
"buildID": {
"type": "string"
"type": "string",
"description": "BuildID is a build identifier for the operating system."
},
"imageID": {
"type": "string"
"type": "string",
"description": "ImageID is an identifier for container or cloud images."
},
"imageVersion": {
"type": "string"
"type": "string",
"description": "ImageVersion is the version for container or cloud images."
},
"variant": {
"type": "string"
"type": "string",
"description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")."
},
"variantID": {
"type": "string"
"type": "string",
"description": "VariantID is the lower-case operating system variant identifier."
},
"homeURL": {
"type": "string"
"type": "string",
"description": "HomeURL is the homepage URL for the operating system."
},
"supportURL": {
"type": "string"
"type": "string",
"description": "SupportURL is the support or help URL for the operating system."
},
"bugReportURL": {
"type": "string"
"type": "string",
"description": "BugReportURL is the bug reporting URL for the operating system."
},
"privacyPolicyURL": {
"type": "string"
"type": "string",
"description": "PrivacyPolicyURL is the privacy policy URL for the operating system."
},
"cpeName": {
"type": "string"
"type": "string",
"description": "CPEName is the Common Platform Enumeration name for the operating system."
},
"supportEnd": {
"type": "string"
"type": "string",
"description": "SupportEnd is the end of support date or version identifier."
},
"extendedSupport": {
"type": "boolean"
"type": "boolean",
"description": "ExtendedSupport indicates whether extended security or support is available."
}
},
"type": "object"
"type": "object",
"description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files."
},
"Location": {
"properties": {
@ -2283,7 +2387,7 @@
"product_id",
"kb"
],
"description": "MicrosoftKbPatch is slightly odd in how it is expected to map onto data."
"description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)."
},
"NixDerivation": {
"properties": {
@ -2517,6 +2621,9 @@
{
"$ref": "#/$defs/ErlangRebarLockEntry"
},
{
"$ref": "#/$defs/GgufFileHeader"
},
{
"$ref": "#/$defs/GithubActionsUseStatement"
},
@ -3014,7 +3121,8 @@
"type": "object",
"required": [
"integrity"
]
],
"description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification."
},
"PortageDbEntry": {
"properties": {
@ -3501,22 +3609,28 @@
"Relationship": {
"properties": {
"parent": {
"type": "string"
"type": "string",
"description": "Parent is the ID of the parent artifact in this relationship."
},
"child": {
"type": "string"
"type": "string",
"description": "Child is the ID of the child artifact in this relationship."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")."
},
"metadata": true
"metadata": {
"description": "Metadata contains additional relationship-specific metadata."
}
},
"type": "object",
"required": [
"parent",
"child",
"type"
]
],
"description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package."
},
"RpmArchive": {
"properties": {
@ -3863,17 +3977,20 @@
"Schema": {
"properties": {
"version": {
"type": "string"
"type": "string",
"description": "Version is the JSON schema version for this document format."
},
"url": {
"type": "string"
"type": "string",
"description": "URL is the URL to the JSON schema definition document."
}
},
"type": "object",
"required": [
"version",
"url"
]
],
"description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format."
},
"SnapEntry": {
"properties": {
@ -3911,21 +4028,28 @@
"Source": {
"properties": {
"id": {
"type": "string"
"type": "string",
"description": "ID is a unique identifier for the analyzed source artifact."
},
"name": {
"type": "string"
"type": "string",
"description": "Name is the name of the analyzed artifact (e.g., image name, directory path)."
},
"version": {
"type": "string"
"type": "string",
"description": "Version is the version of the analyzed artifact (e.g., image tag)."
},
"supplier": {
"type": "string"
"type": "string",
"description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance."
},
"type": {
"type": "string"
"type": "string",
"description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")."
},
"metadata": true
"metadata": {
"description": "Metadata contains additional source-specific metadata."
}
},
"type": "object",
"required": [
@ -3935,7 +4059,7 @@
"type",
"metadata"
],
"description": "Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill the NTIA minimum elements."
"description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive."
},
"SwiftPackageManagerLockEntry": {
"properties": {

View File

@ -0,0 +1,95 @@
package cpes
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/format/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
const ID sbom.FormatID = "cpes"
const version = "1"
var _ sbom.FormatDecoder = (*decoder)(nil)
type decoder struct{}
func NewFormatDecoder() sbom.FormatDecoder {
return decoder{}
}
func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) {
if r == nil {
return nil, "", "", fmt.Errorf("no reader provided")
}
s, err := toSyftModel(r)
return s, ID, version, err
}
func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
if r == nil {
return "", ""
}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
// skip whitespace only lines
continue
}
err := cpe.ValidateString(line)
if err != nil {
return "", ""
}
return ID, version
}
return "", ""
}
func toSyftModel(r io.Reader) (*sbom.SBOM, error) {
var errs []error
pkgs := pkg.NewCollection()
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
// skip invalid CPEs
c, err := cpe.New(line, "")
if err != nil {
log.WithFields("error", err, "line", line).Debug("unable to parse cpe")
continue
}
p := pkg.Package{
Name: c.Attributes.Product,
Version: c.Attributes.Version,
CPEs: []cpe.CPE{c},
}
internal.Backfill(&p)
p.SetID()
pkgs.Add(p)
}
return &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkgs,
},
}, errors.Join(errs...)
}

View File

@ -0,0 +1,171 @@
package cpes
import (
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
func Test_CPEProvider(t *testing.T) {
tests := []struct {
name string
userInput string
sbom *sbom.SBOM
}{
{
name: "takes a single cpe",
userInput: "cpe:/a:apache:log4j:2.14.1",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "log4j",
Version: "2.14.1",
CPEs: []cpe.CPE{
cpe.Must("cpe:/a:apache:log4j:2.14.1", ""),
},
}),
},
},
},
{
name: "takes multiple cpes",
userInput: `cpe:/a:apache:log4j:2.14.1
cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;
cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;
cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;`,
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(
pkg.Package{
Name: "log4j",
Version: "2.14.1",
CPEs: []cpe.CPE{
cpe.Must("cpe:/a:apache:log4j:2.14.1", ""),
},
},
pkg.Package{
Name: "nginx",
Version: "",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;", ""),
},
},
pkg.Package{
Name: "nginx",
Version: "0.5.2",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;", ""),
},
},
pkg.Package{
Name: "nginx",
Version: "0.5.3",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;", ""),
},
},
),
},
},
},
{
name: "takes cpe with no version",
userInput: "cpe:/a:apache:log4j",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "log4j",
CPEs: []cpe.CPE{
cpe.Must("cpe:/a:apache:log4j", ""),
},
}),
},
},
},
{
name: "takes CPE 2.3 format",
userInput: "cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "log4j",
Version: "2.14.1",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*", ""),
},
}),
},
},
},
{
name: "deduces target SW from CPE - known target_sw",
userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "opensearch",
Type: pkg.GemPkg,
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
},
}),
},
},
},
{
name: "handles unknown target_sw CPE field",
userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "opensearch",
Type: "",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*", ""),
},
}),
},
},
},
{
name: "invalid prefix",
userInput: "dir:test-fixtures/cpe",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(),
},
},
},
}
syftPkgOpts := []cmp.Option{
cmpopts.IgnoreFields(pkg.Package{}, "id", "Language"),
cmpopts.IgnoreUnexported(pkg.Package{}, file.LocationSet{}, pkg.LicenseSet{}),
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
dec := NewFormatDecoder()
decodedSBOM, _, _, err := dec.Decode(strings.NewReader(tc.userInput))
require.NoError(t, err)
gotSyftPkgs := decodedSBOM.Artifacts.Packages.Sorted()
wantSyftPkgs := tc.sbom.Artifacts.Packages.Sorted()
require.Equal(t, len(gotSyftPkgs), len(wantSyftPkgs))
for idx, wantPkg := range wantSyftPkgs {
if d := cmp.Diff(wantPkg, gotSyftPkgs[idx], syftPkgOpts...); d != "" {
t.Errorf("unexpected Syft Package (-want +got):\n%s", d)
}
}
})
}
}

View File

@ -3,6 +3,7 @@ package format
import (
"io"
"github.com/anchore/syft/syft/format/cpes"
"github.com/anchore/syft/syft/format/cyclonedxjson"
"github.com/anchore/syft/syft/format/cyclonedxxml"
"github.com/anchore/syft/syft/format/purls"
@ -26,6 +27,7 @@ func Decoders() []sbom.FormatDecoder {
spdxtagvalue.NewFormatDecoder(),
spdxjson.NewFormatDecoder(),
purls.NewFormatDecoder(),
cpes.NewFormatDecoder(),
}
}

View File

@ -1,11 +1,13 @@
package model
import (
"context"
"fmt"
"strings"
"time"
"github.com/anchore/archiver/v3"
"github.com/mholt/archives"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
@ -153,8 +155,8 @@ func trimRelative(s string) string {
// isArchive returns true if the path appears to be an archive
func isArchive(path string) bool {
_, err := archiver.ByExtension(path)
return err == nil
format, _, err := archives.Identify(context.Background(), path, nil)
return err == nil && format != nil
}
func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) {

View File

@ -10,13 +10,31 @@ import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg"
cataloger "github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
)
// Backfill takes all information present in the package and attempts to fill in any missing information
// from any available sources, such as the Metadata and PURL.
// from any available sources, such as the Metadata, PURL, or CPEs.
//
// Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date
func Backfill(p *pkg.Package) {
backfillFromPurl(p)
backfillFromCPE(p)
}
func backfillFromCPE(p *pkg.Package) {
if len(p.CPEs) == 0 {
return
}
c := p.CPEs[0]
if p.Type == "" {
p.Type = cataloger.TargetSoftwareToPackageType(c.Attributes.TargetSW)
}
}
func backfillFromPurl(p *pkg.Package) {
if p.PURL == "" {
return
}

View File

@ -121,6 +121,20 @@ func Test_Backfill(t *testing.T) {
Metadata: pkg.JavaArchive{},
},
},
{
name: "target-sw from CPE",
in: pkg.Package{
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
},
},
expected: pkg.Package{
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
},
Type: pkg.GemPkg,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

View File

@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
}
componentType := cyclonedx.ComponentTypeLibrary
if p.Type == pkg.BinaryPkg {
switch p.Type {
case pkg.BinaryPkg:
componentType = cyclonedx.ComponentTypeApplication
case pkg.ModelPkg:
componentType = cyclonedx.ComponentTypeMachineLearningModel
}
return cyclonedx.Component{

View File

@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
switch component.Type {
case cyclonedx.ComponentTypeOS:
case cyclonedx.ComponentTypeContainer:
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary:
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
p := decodeComponent(component)
idMap[component.BOMRef] = p
if component.BOMRef != "" {

View File

@ -55,6 +55,7 @@ func Test_OriginatorSupplier(t *testing.T) {
pkg.OpamPackage{},
pkg.YarnLockEntry{},
pkg.TerraformLockProviderEntry{},
pkg.GGUFFileHeader{},
)
tests := []struct {
name string

View File

@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
answer = "acquired package info from Homebrew formula"
case pkg.TerraformPkg:
answer = "acquired package info from Terraform dependency lock file"
case pkg.ModelPkg:
answer = "acquired package info from AI artifact (e.g. GGUF File"
default:
answer = "acquired package info from the following paths"
}

View File

@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
"acquired package info from Terraform dependency lock file",
},
},
{
input: pkg.Package{
Type: pkg.ModelPkg,
},
expected: []string{
"",
},
},
}
var pkgTypes []pkg.Type
for _, test := range tests {

View File

@ -35,14 +35,23 @@ func (d *Document) UnmarshalJSON(data []byte) error {
return nil
}
// Descriptor describes what created the document as well as surrounding metadata
// Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation.
type Descriptor struct {
// Name is the name of the tool that generated this SBOM (e.g., "syft").
Name string `json:"name"`
// Version is the version of the tool that generated this SBOM.
Version string `json:"version"`
// Configuration contains the tool configuration used during SBOM generation.
Configuration interface{} `json:"configuration,omitempty"`
}
// Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format.
type Schema struct {
// Version is the JSON schema version for this document format.
Version string `json:"version"`
// URL is the URL to the JSON schema definition document.
URL string `json:"url"`
}

View File

@ -10,24 +10,54 @@ import (
"github.com/anchore/syft/syft/license"
)
// File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages.
type File struct {
// ID is a unique identifier for this file within the SBOM.
ID string `json:"id"`
// Location is the file path and layer information where this file was found.
Location file.Coordinates `json:"location"`
// Metadata contains filesystem metadata such as permissions, ownership, and file type.
Metadata *FileMetadataEntry `json:"metadata,omitempty"`
// Contents is the file contents for small files.
Contents string `json:"contents,omitempty"`
// Digests contains cryptographic hashes of the file contents.
Digests []file.Digest `json:"digests,omitempty"`
// Licenses contains license information discovered within this file.
Licenses []FileLicense `json:"licenses,omitempty"`
// Executable contains executable metadata if this file is a binary.
Executable *file.Executable `json:"executable,omitempty"`
// Unknowns contains unknown fields for forward compatibility.
Unknowns []string `json:"unknowns,omitempty"`
}
// FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file.
type FileMetadataEntry struct {
// Mode is the Unix file permission mode in octal format.
Mode int `json:"mode"`
// Type is the file type (e.g., "RegularFile", "Directory", "SymbolicLink").
Type string `json:"type"`
// LinkDestination is the target path for symbolic links.
LinkDestination string `json:"linkDestination,omitempty"`
// UserID is the file owner user ID.
UserID int `json:"userID"`
// GroupID is the file owner group ID.
GroupID int `json:"groupID"`
// MIMEType is the MIME type of the file contents.
MIMEType string `json:"mimeType"`
// Size is the file size in bytes.
Size int64 `json:"size"`
}
@ -82,16 +112,30 @@ type sbomImportLegacyFileMetadataEntry struct {
Size int64 `json:"Size"`
}
// FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression.
type FileLicense struct {
// Value is the raw license identifier or text as found in the file.
Value string `json:"value"`
// SPDXExpression is the parsed SPDX license expression.
SPDXExpression string `json:"spdxExpression"`
// Type is the license type classification (e.g., declared, concluded, discovered).
Type license.Type `json:"type"`
// Evidence contains supporting evidence for this license detection.
Evidence *FileLicenseEvidence `json:"evidence,omitempty"`
}
// FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level.
type FileLicenseEvidence struct {
// Confidence is the confidence score for this license detection (0-100).
Confidence int `json:"confidence"`
// Offset is the byte offset where the license text starts in the file.
Offset int `json:"offset"`
// Extent is the length of the license text in bytes.
Extent int `json:"extent"`
}

View File

@ -4,27 +4,66 @@ import (
"encoding/json"
)
// IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field.
type IDLikes []string
// LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files.
type LinuxRelease struct {
// PrettyName is a human-readable operating system name with version.
PrettyName string `json:"prettyName,omitempty"`
// Name is the operating system name without version information.
Name string `json:"name,omitempty"`
// ID is the lower-case operating system identifier (e.g., "ubuntu", "rhel").
ID string `json:"id,omitempty"`
// IDLike is a list of operating system IDs this distribution is similar to or derived from.
IDLike IDLikes `json:"idLike,omitempty"`
// Version is the operating system version including codename if available.
Version string `json:"version,omitempty"`
// VersionID is the operating system version number or identifier.
VersionID string `json:"versionID,omitempty"`
// VersionCodename is the operating system release codename (e.g., "jammy", "bullseye").
VersionCodename string `json:"versionCodename,omitempty"`
// BuildID is a build identifier for the operating system.
BuildID string `json:"buildID,omitempty"`
// ImageID is an identifier for container or cloud images.
ImageID string `json:"imageID,omitempty"`
// ImageVersion is the version for container or cloud images.
ImageVersion string `json:"imageVersion,omitempty"`
// Variant is the operating system variant name (e.g., "Server", "Workstation").
Variant string `json:"variant,omitempty"`
// VariantID is the lower-case operating system variant identifier.
VariantID string `json:"variantID,omitempty"`
// HomeURL is the homepage URL for the operating system.
HomeURL string `json:"homeURL,omitempty"`
// SupportURL is the support or help URL for the operating system.
SupportURL string `json:"supportURL,omitempty"`
// BugReportURL is the bug reporting URL for the operating system.
BugReportURL string `json:"bugReportURL,omitempty"`
// PrivacyPolicyURL is the privacy policy URL for the operating system.
PrivacyPolicyURL string `json:"privacyPolicyURL,omitempty"`
// CPEName is the Common Platform Enumeration name for the operating system.
CPEName string `json:"cpeName,omitempty"`
// SupportEnd is the end of support date or version identifier.
SupportEnd string `json:"supportEnd,omitempty"`
// ExtendedSupport indicates whether extended security or support is available.
ExtendedSupport bool `json:"extendedSupport,omitempty"`
}

View File

@ -36,21 +36,39 @@ type PackageBasicData struct {
PURL string `json:"purl"`
}
// cpes is a collection of Common Platform Enumeration identifiers for a package.
type cpes []CPE
// CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases.
type CPE struct {
// Value is the CPE string identifier.
Value string `json:"cpe"`
// Source is the source where this CPE was obtained or generated from.
Source string `json:"source,omitempty"`
}
// licenses is a collection of license findings associated with a package.
type licenses []License
// License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations.
type License struct {
// Value is the raw license identifier or expression as found.
Value string `json:"value"`
// SPDXExpression is the parsed SPDX license expression.
SPDXExpression string `json:"spdxExpression"`
// Type is the license type classification (e.g., declared, concluded, discovered).
Type license.Type `json:"type"`
// URLs are URLs where license text or information can be found.
URLs []string `json:"urls"`
// Locations are file locations where this license was discovered.
Locations []file.Location `json:"locations"`
// Contents is the full license text content.
Contents string `json:"contents,omitempty"`
}

View File

@ -1,8 +1,16 @@
package model
// Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package.
type Relationship struct {
// Parent is the ID of the parent artifact in this relationship.
Parent string `json:"parent"`
// Child is the ID of the child artifact in this relationship.
Child string `json:"child"`
// Type is the relationship type (e.g., "contains", "dependency-of", "ancestor-of").
Type string `json:"type"`
// Metadata contains additional relationship-specific metadata.
Metadata interface{} `json:"metadata,omitempty"`
}

View File

@ -11,18 +11,25 @@ import (
"github.com/anchore/syft/syft/source"
)
// Source object represents the thing that was cataloged
// Note: syft currently makes no claims or runs any logic to determine the Supplier field below
// Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill
// the NTIA minimum elements. For mor information see the NTIA framing document below
// https://www.ntia.gov/files/ntia/publications/framingsbom_20191112.pdf
// Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive.
// The Supplier field can be provided by users to fulfill NTIA minimum elements requirements.
type Source struct {
// ID is a unique identifier for the analyzed source artifact.
ID string `json:"id"`
// Name is the name of the analyzed artifact (e.g., image name, directory path).
Name string `json:"name"`
// Version is the version of the analyzed artifact (e.g., image tag).
Version string `json:"version"`
// Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance.
Supplier string `json:"supplier,omitempty"`
// Type is the source type (e.g., "image", "directory", "file").
Type string `json:"type"`
// Metadata contains additional source-specific metadata.
Metadata interface{} `json:"metadata"`
}

View File

@ -58,6 +58,7 @@ type AlpmDBEntry struct {
Depends []string `mapstructure:"depends" json:"depends,omitempty"`
}
// AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman.
type AlpmFileRecord struct {
// Path is the file path relative to the filesystem root
Path string `mapstruture:"path" json:"path,omitempty"`

View File

@ -0,0 +1,16 @@
/*
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
including support for GGUF (GPT-Generated Unified Format) model files.
*/
package ai
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
func NewGGUFCataloger() pkg.Cataloger {
return generic.NewCataloger("gguf-cataloger").
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
}

View File

@ -0,0 +1,140 @@
package ai
import (
"os"
"path/filepath"
"testing"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func TestGGUFCataloger_Globs(t *testing.T) {
tests := []struct {
name string
fixture string
expected []string
}{
{
name: "obtain gguf files",
fixture: "test-fixtures/glob-paths",
expected: []string{
"models/model.gguf",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewGGUFCataloger())
})
}
}
func TestGGUFCataloger(t *testing.T) {
tests := []struct {
name string
setup func(t *testing.T) string
expectedPackages []pkg.Package
expectedRelationships []artifact.Relationship
}{
{
name: "catalog single GGUF file",
setup: func(t *testing.T) string {
dir := t.TempDir()
data := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "llama3-8b").
withStringKV("general.version", "3.0").
withStringKV("general.license", "Apache-2.0").
withStringKV("general.quantization", "Q4_K_M").
withUint64KV("general.parameter_count", 8030000000).
withStringKV("general.some_random_kv", "foobar").
build()
path := filepath.Join(dir, "llama3-8b.gguf")
os.WriteFile(path, data, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "llama3-8b",
Version: "3.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.GGUFFileHeader{
Architecture: "llama",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
MetadataKeyValuesHash: "6e3d368066455ce4",
RemainingKeyValues: map[string]interface{}{
"general.some_random_kv": "foobar",
},
},
},
},
expectedRelationships: nil,
},
{
name: "catalog GGUF file with minimal metadata",
setup: func(t *testing.T) string {
dir := t.TempDir()
data := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "gpt2").
withStringKV("general.name", "gpt2-small").
withStringKV("gpt2.context_length", "1024").
withUint32KV("gpt2.embedding_length", 768).
build()
path := filepath.Join(dir, "gpt2-small.gguf")
os.WriteFile(path, data, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "gpt2-small",
Version: "",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Metadata: pkg.GGUFFileHeader{
Architecture: "gpt2",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
MetadataKeyValuesHash: "9dc6f23591062a27",
RemainingKeyValues: map[string]interface{}{
"gpt2.context_length": "1024",
"gpt2.embedding_length": uint32(768),
},
},
},
},
expectedRelationships: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fixtureDir := tt.setup(t)
// Use pkgtest to catalog and compare
pkgtest.NewCatalogTester().
FromDirectory(t, fixtureDir).
Expects(tt.expectedPackages, tt.expectedRelationships).
IgnoreLocationLayer().
IgnorePackageFields("FoundBy", "Locations").
TestCataloger(t, NewGGUFCataloger())
})
}
}

View File

@ -0,0 +1,22 @@
package ai
import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package {
p := pkg.Package{
Name: modelName,
Version: version,
Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
Metadata: *metadata,
// NOTE: PURL is intentionally not set as the package-url spec
// has not yet finalized support for ML model packages
}
p.SetID()
return p
}

View File

@ -0,0 +1,121 @@
package ai
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func TestNewGGUFPackage(t *testing.T) {
tests := []struct {
name string
metadata *pkg.GGUFFileHeader
input struct {
modelName string
version string
license string
locations []file.Location
}
expected pkg.Package
}{
{
name: "complete GGUF package with all fields",
input: struct {
modelName string
version string
license string
locations []file.Location
}{
modelName: "llama3-8b",
version: "3.0",
license: "Apache-2.0",
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
},
metadata: &pkg.GGUFFileHeader{
Architecture: "llama",
Quantization: "Q4_K_M",
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
RemainingKeyValues: map[string]any{
"general.random_kv": "foobar",
},
},
expected: pkg.Package{
Name: "llama3-8b",
Version: "3.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.GGUFFileHeader{
Architecture: "llama",
Quantization: "Q4_K_M",
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
RemainingKeyValues: map[string]any{
"general.random_kv": "foobar",
},
},
Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")),
},
},
{
name: "minimal GGUF package",
input: struct {
modelName string
version string
license string
locations []file.Location
}{
modelName: "gpt2-small",
version: "1.0",
license: "MIT",
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
},
metadata: &pkg.GGUFFileHeader{
Architecture: "gpt2",
GGUFVersion: 3,
TensorCount: 50,
},
expected: pkg.Package{
Name: "gpt2-small",
Version: "1.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("MIT", "", nil),
),
Metadata: pkg.GGUFFileHeader{
Architecture: "gpt2",
GGUFVersion: 3,
TensorCount: 50,
},
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := newGGUFPackage(
tt.metadata,
tt.input.modelName,
tt.input.version,
tt.input.license,
tt.input.locations...,
)
// Verify metadata type
_, ok := actual.Metadata.(pkg.GGUFFileHeader)
require.True(t, ok, "metadata should be GGUFFileHeader")
// Use AssertPackagesEqual for comprehensive comparison
pkgtest.AssertPackagesEqual(t, tt.expected, actual)
})
}
}

View File

@ -0,0 +1,63 @@
package ai
import (
"encoding/binary"
"fmt"
"io"
gguf_parser "github.com/gpustack/gguf-parser-go"
)
// GGUF file format constants
const (
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
)
// copyHeader copies the GGUF header from the reader to the writer.
// It validates the magic number first, then copies the rest of the data.
// The reader should be wrapped with io.LimitedReader to prevent OOM issues.
func copyHeader(w io.Writer, r io.Reader) error {
// Read initial chunk to validate magic number
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
if _, err := io.ReadFull(r, initialBuf); err != nil {
return fmt.Errorf("failed to read GGUF header prefix: %w", err)
}
// Verify magic number
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
if magic != ggufMagicNumber {
return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
}
// Write the initial buffer to the writer
if _, err := w.Write(initialBuf); err != nil {
return fmt.Errorf("failed to write GGUF header prefix: %w", err)
}
// Copy the rest of the header from reader to writer
// The LimitedReader will return EOF once maxHeaderSize is reached
if _, err := io.Copy(w, r); err != nil {
return fmt.Errorf("failed to copy GGUF header: %w", err)
}
return nil
}
// Helper to convert gguf_parser metadata to simpler types
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
result := make(map[string]interface{})
for _, kv := range kvs {
// Skip standard fields that are extracted separately
switch kv.Key {
case "general.architecture", "general.name", "general.license",
"general.version", "general.parameter_count", "general.quantization":
continue
}
result[kv.Key] = kv.Value
}
return result
}

View File

@ -0,0 +1,135 @@
package ai
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
"github.com/cespare/xxhash/v2"
gguf_parser "github.com/gpustack/gguf-parser-go"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
// parseGGUFModel parses a GGUF model file and returns the discovered package.
// This implementation only reads the header portion of the file, not the entire model.
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
defer internal.CloseAndLogError(reader, reader.Path())
// Create a temporary file for the library to parse
// The library requires a file path, so we create a temp file
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
if err != nil {
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
}
tempPath := tempFile.Name()
defer os.Remove(tempPath)
// Copy and validate the GGUF file header using LimitedReader to prevent OOM
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
if err := copyHeader(tempFile, limitedReader); err != nil {
tempFile.Close()
return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err)
}
tempFile.Close()
// Parse using gguf-parser-go with options to skip unnecessary data
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
gguf_parser.SkipLargeMetadata(),
)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
}
// Extract metadata
metadata := ggufFile.Metadata()
// Extract version separately (will be set on Package.Version)
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
// Convert to syft metadata structure
syftMetadata := &pkg.GGUFFileHeader{
Architecture: metadata.Architecture,
Quantization: metadata.FileTypeDescriptor,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount,
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
}
// If model name is not in metadata, use filename
if metadata.Name == "" {
metadata.Name = extractModelNameFromPath(reader.Path())
}
// Create package from metadata
p := newGGUFPackage(
syftMetadata,
metadata.Name,
modelVersion,
metadata.License,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
}
// computeKVMetadataHash computes a stable hash of the KV metadata for use as a global identifier
func computeKVMetadataHash(metadata gguf_parser.GGUFMetadataKVs) string {
// Sort the KV pairs by key for stable hashing
sortedKVs := make([]gguf_parser.GGUFMetadataKV, len(metadata))
copy(sortedKVs, metadata)
sort.Slice(sortedKVs, func(i, j int) bool {
return sortedKVs[i].Key < sortedKVs[j].Key
})
// Marshal sorted KVs to JSON for stable hashing
jsonBytes, err := json.Marshal(sortedKVs)
if err != nil {
log.Debugf("failed to marshal metadata for hashing: %v", err)
return ""
}
// Compute xxhash
hash := xxhash.Sum64(jsonBytes)
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
}
// extractVersion attempts to extract version from metadata KV pairs
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
for _, kv := range kvs {
if kv.Key == "general.version" {
if v, ok := kv.Value.(string); ok && v != "" {
return v
}
}
}
return ""
}
// extractModelNameFromPath extracts the model name from the file path
func extractModelNameFromPath(path string) string {
// Get the base filename
base := filepath.Base(path)
// Remove .gguf extension
name := strings.TrimSuffix(base, ".gguf")
return name
}
// integrity check
var _ generic.Parser = parseGGUFModel

View File

@ -0,0 +1,128 @@
package ai
import (
"bytes"
"encoding/binary"
)
// GGUF type constants for test builder
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
const (
ggufMagic = 0x46554747 // "GGUF" in little-endian
ggufTypeUint8 = 0
ggufTypeInt8 = 1
ggufTypeUint16 = 2
ggufTypeInt16 = 3
ggufTypeUint32 = 4
ggufTypeInt32 = 5
ggufTypeFloat32 = 6
ggufTypeBool = 7
ggufTypeString = 8
ggufTypeArray = 9
ggufTypeUint64 = 10
ggufTypeInt64 = 11
ggufTypeFloat64 = 12
)
// testGGUFBuilder helps build GGUF files for testing
type testGGUFBuilder struct {
buf *bytes.Buffer
version uint32
tensorCount uint64
kvPairs []testKVPair
}
type testKVPair struct {
key string
valueType uint32
value interface{}
}
func newTestGGUFBuilder() *testGGUFBuilder {
return &testGGUFBuilder{
buf: new(bytes.Buffer),
version: 3,
tensorCount: 0,
kvPairs: []testKVPair{},
}
}
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
b.version = v
return b
}
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
b.tensorCount = count
return b
}
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
return b
}
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
return b
}
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
return b
}
func (b *testGGUFBuilder) writeString(s string) {
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
b.buf.WriteString(s)
}
func (b *testGGUFBuilder) build() []byte {
// Write magic number "GGUF"
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
// Write version
binary.Write(b.buf, binary.LittleEndian, b.version)
// Write tensor count
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
// Write KV count
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
// Write KV pairs
for _, kv := range b.kvPairs {
// Write key
b.writeString(kv.key)
// Write value type
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
// Write value based on type
switch kv.valueType {
case ggufTypeString:
b.writeString(kv.value.(string))
case ggufTypeUint32:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
case ggufTypeUint64:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
case ggufTypeUint8:
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
case ggufTypeInt32:
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
case ggufTypeBool:
var v uint8
if kv.value.(bool) {
v = 1
}
binary.Write(b.buf, binary.LittleEndian, v)
}
}
return b.buf.Bytes()
}
// buildInvalidMagic creates a file with invalid magic number
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
buf := new(bytes.Buffer)
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
return buf.Bytes()
}

View File

@ -0,0 +1,58 @@
package cpe
import (
"strings"
"github.com/anchore/syft/syft/pkg"
)
// TargetSoftwareToPackageType is derived from looking at target_software attributes in the NVD dataset
// TODO: ideally this would be driven from the store, where we can resolve ecosystem aliases directly
func TargetSoftwareToPackageType(tsw string) pkg.Type {
tsw = strings.NewReplacer("-", "_", " ", "_").Replace(strings.ToLower(tsw))
switch tsw {
case "alpine", "apk":
return pkg.ApkPkg
case "debian", "dpkg":
return pkg.DebPkg
case "java", "maven", "ant", "gradle", "jenkins", "jenkins_ci", "kafka", "logstash", "mule", "nifi", "solr", "spark", "storm", "struts", "tomcat", "zookeeper", "log4j":
return pkg.JavaPkg
case "javascript", "node", "nodejs", "node.js", "npm", "yarn", "apache", "jquery", "next.js", "prismjs":
return pkg.NpmPkg
case "c", "c++", "c/c++", "conan", "gnu_c++", "qt":
return pkg.ConanPkg
case "dart":
return pkg.DartPubPkg
case "redhat", "rpm", "redhat_enterprise_linux", "rhel", "suse", "suse_linux", "opensuse", "opensuse_linux", "fedora", "centos", "oracle_linux", "ol":
return pkg.RpmPkg
case "elixir", "hex":
return pkg.HexPkg
case "erlang":
return pkg.ErlangOTPPkg
case ".net", ".net_framework", "asp", "asp.net", "dotnet", "dotnet_framework", "c#", "csharp", "nuget":
return pkg.DotnetPkg
case "ruby", "gem", "nokogiri", "ruby_on_rails":
return pkg.GemPkg
case "rust", "cargo", "crates":
return pkg.RustPkg
case "python", "pip", "pypi", "flask":
return pkg.PythonPkg
case "kb", "knowledgebase", "msrc", "mskb", "microsoft":
return pkg.KbPkg
case "portage", "gentoo":
return pkg.PortagePkg
case "go", "golang", "gomodule":
return pkg.GoModulePkg
case "linux_kernel", "linux", "z/linux":
return pkg.LinuxKernelPkg
case "php":
return pkg.PhpComposerPkg
case "swift":
return pkg.SwiftPkg
case "wordpress", "wordpress_plugin", "wordpress_":
return pkg.WordpressPluginPkg
case "lua", "luarocks":
return pkg.LuaRocksPkg
}
return ""
}

View File

@ -80,7 +80,7 @@ func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ f
// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives
func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {
@ -99,7 +99,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
// and parse nested archives or ignore them.
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
// fetch the last element of the virtual path
virtualElements := strings.Split(reader.Path(), ":")
currentFilepath := virtualElements[len(virtualElements)-1]
@ -109,7 +109,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
}
fileManifest, err := intFile.NewZipFileManifest(archivePath)
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
if err != nil {
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
}
@ -226,7 +226,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
}
// fetch the manifest file
contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...)
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...)
if err != nil {
return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err)
}
@ -387,8 +387,9 @@ type parsedPomProject struct {
// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) {
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
// Find the pom.properties/pom.xml if the names seem like a plausible match
properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
artifactsMap := j.buildArtifactsMap(properties)
pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap)
@ -519,13 +520,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
var pkgs []pkg.Package
// pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
if err != nil {
return nil, err
}
// pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
if err != nil {
return nil, err
}
@ -575,7 +576,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
}
if len(licenseMatches) > 0 {
contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...)
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
if err != nil {
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
}
@ -616,7 +617,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare
// associating each discovered package to the given parent package.
func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
// search and parse pom.properties files & fetch the contents
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
}
@ -680,8 +681,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit
return nestedPkgs, nestedRelationships, nil
}
func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
}
@ -709,8 +710,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
return propertiesByParentPath, nil
}
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
}

View File

@ -72,7 +72,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
require.NoError(t, err)
// setup parser
ap, cleanupFn, err := newJavaArchiveParser(
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -372,7 +372,7 @@ func TestParseJar(t *testing.T) {
UseNetwork: false,
UseMavenLocalRepository: false,
}
parser, cleanupFn, err := newJavaArchiveParser(
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -1499,7 +1499,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
fixture, err := os.Open(fixturePath)
require.NoError(t, err)
parser, cleanupFn, err := newJavaArchiveParser(
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -1636,7 +1636,7 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) {
ctx := context.TODO()
// setup parser
ap, cleanupFn, err := newJavaArchiveParser(
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,

View File

@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con
}
func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...)
if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
}

View File

@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
// or archiver).
fileManifest, err := intFile.NewZipFileManifest(archivePath)
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
if err != nil {
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
}

37
syft/pkg/gguf.go Normal file
View File

@ -0,0 +1,37 @@
package pkg
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
// loading and saving of models, particularly quantized large language models.
// The Model Name, License, and Version fields have all been lifted up to be on the syft Package.
type GGUFFileHeader struct {
// GGUFVersion is the GGUF format version (e.g., 3)
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
// Parameters is the number of model parameters (if present in header)
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
// TensorCount is the number of tensors in the model
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
// represented as typed fields above. This preserves additional metadata fields for reference
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
// This hash is computed over the complete header metadata (including the fields extracted
// into typed fields above) and provides a stable identifier for the model configuration
// across different file locations or remotes. It allows matching identical models even
// when stored in different repositories or with different filenames.
MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
}

View File

@ -1,10 +1,7 @@
package pkg
// MicrosoftKbPatch is slightly odd in how it is expected to map onto data.
// This is critical to grasp because there is no MSRC cataloger. The `ProductID`
// field is expected to be the MSRC Product ID, for example:
// "Windows 10 Version 1703 for 32-bit Systems".
// `Kb` is expected to be the actual KB number, for example "5001028"
// MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center).
// This type captures both the product being patched and the KB article number for the update.
type MicrosoftKbPatch struct {
// ProductID is MSRC Product ID (e.g. "Windows 10 Version 1703 for 32-bit Systems")
ProductID string `toml:"product_id" json:"product_id"`

View File

@ -48,6 +48,7 @@ type YarnLockEntry struct {
Dependencies map[string]string `mapstructure:"dependencies" json:"dependencies"`
}
// PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification.
type PnpmLockResolution struct {
// Integrity is Subresource Integrity hash for verification (SRI format)
Integrity string `mapstructure:"integrity" json:"integrity"`

View File

@ -54,6 +54,7 @@ const (
TerraformPkg Type = "terraform"
WordpressPluginPkg Type = "wordpress-plugin"
HomebrewPkg Type = "homebrew"
ModelPkg Type = "model"
)
// AllPkgs represents all supported package types
@ -98,6 +99,7 @@ var AllPkgs = []Type{
TerraformPkg,
WordpressPluginPkg,
HomebrewPkg,
ModelPkg,
}
// PackageURLType returns the PURL package type for the current package.

View File

@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
expectedTypes.Remove(string(HomebrewPkg))
expectedTypes.Remove(string(TerraformPkg))
expectedTypes.Remove(string(GraalVMNativeImagePkg))
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
for _, test := range tests {

View File

@ -4,13 +4,15 @@ import (
"context"
"crypto"
"fmt"
"io"
"os"
"path"
"path/filepath"
"sync"
"github.com/mholt/archives"
"github.com/opencontainers/go-digest"
"github.com/anchore/archiver/v3"
stereoFile "github.com/anchore/stereoscope/pkg/file"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
@ -208,18 +210,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
// unarchived.
envelopedUnarchiver, err := archiver.ByExtension(path)
if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok {
// when tar/zip files are extracted, if there are multiple entries at the same
// location, the last entry wins
// NOTE: this currently does not display any messages if an overwrite happens
switch v := unarchiver.(type) {
case *archiver.Tar:
v.OverwriteExisting = true
case *archiver.Zip:
v.OverwriteExisting = true
}
envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil)
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
if err != nil {
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
@ -246,15 +238,58 @@ func digestOfFileContents(path string) string {
return di.String()
}
func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) {
func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
var cleanupFn = func() error { return nil }
archive, err := os.Open(path)
if err != nil {
return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err)
}
defer archive.Close()
tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
if err != nil {
return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
}
cleanupFn := func() error {
visitor := func(_ context.Context, file archives.FileInfo) error {
// Protect against symlink attacks by ensuring path doesn't escape tempDir
destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive)
if err != nil {
return err
}
if file.IsDir() {
return os.MkdirAll(destPath, file.Mode())
}
if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
return fmt.Errorf("failed to create parent directory: %w", err)
}
rc, err := file.Open()
if err != nil {
return fmt.Errorf("failed to open file in archive: %w", err)
}
defer rc.Close()
destFile, err := os.Create(destPath)
if err != nil {
return fmt.Errorf("failed to create file in destination: %w", err)
}
defer destFile.Close()
if err := destFile.Chmod(file.Mode()); err != nil {
return fmt.Errorf("failed to change mode of destination file: %w", err)
}
if _, err := io.Copy(destFile, rc); err != nil {
return fmt.Errorf("failed to copy file contents: %w", err)
}
return nil
}
return tempDir, func() error {
return os.RemoveAll(tempDir)
}
return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir)
}, unarchiver.Extract(context.Background(), archive, visitor)
}