mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
Compare commits
No commits in common. "2b9700f1ee7d6c9e844adf4cf610eb7f62776d8c" and "91f612069d2ae4966ed30f4ea102d4628f878298" have entirely different histories.
2b9700f1ee
...
91f612069d
@ -90,7 +90,7 @@ tools:
|
||||
# used for running all local and CI tasks
|
||||
- name: task
|
||||
version:
|
||||
want: v3.45.5
|
||||
want: v3.45.4
|
||||
method: github-release
|
||||
with:
|
||||
repo: go-task/task
|
||||
@ -98,7 +98,7 @@ tools:
|
||||
# used for triggering a release
|
||||
- name: gh
|
||||
version:
|
||||
want: v2.83.1
|
||||
want: v2.83.0
|
||||
method: github-release
|
||||
with:
|
||||
repo: cli/cli
|
||||
|
||||
7
.github/workflows/codeql-analysis.yml
vendored
7
.github/workflows/codeql-analysis.yml
vendored
@ -6,7 +6,6 @@
|
||||
name: "CodeQL Security Scan"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
# only run when there are pushes to the main branch (not on PRs)
|
||||
@ -49,7 +48,7 @@ jobs:
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5
|
||||
uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@ -60,7 +59,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5
|
||||
uses: github/codeql-action/autobuild@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@ -74,4 +73,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@014f16e7ab1402f30e7c3329d33797e7948572db #v3.29.5
|
||||
uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee #v3.29.5
|
||||
|
||||
1
.github/workflows/dependabot-automation.yaml
vendored
1
.github/workflows/dependabot-automation.yaml
vendored
@ -7,4 +7,5 @@ permissions:
|
||||
|
||||
jobs:
|
||||
run:
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
uses: anchore/workflows/.github/workflows/dependabot-automation.yaml@main
|
||||
|
||||
1
.github/workflows/oss-project-board-add.yaml
vendored
1
.github/workflows/oss-project-board-add.yaml
vendored
@ -13,6 +13,7 @@ on:
|
||||
|
||||
jobs:
|
||||
run:
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
uses: "anchore/workflows/.github/workflows/oss-project-board-add.yaml@main"
|
||||
secrets:
|
||||
token: ${{ secrets.OSS_PROJECT_GH_TOKEN }}
|
||||
|
||||
1
.github/workflows/release.yaml
vendored
1
.github/workflows/release.yaml
vendored
@ -192,6 +192,7 @@ jobs:
|
||||
release-install-script:
|
||||
needs: [release]
|
||||
if: ${{ needs.release.result == 'success' }}
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
uses: "anchore/workflows/.github/workflows/release-install-script.yaml@main"
|
||||
with:
|
||||
tag: ${{ github.event.inputs.version }}
|
||||
|
||||
@ -10,6 +10,7 @@ jobs:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: write
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
uses: "anchore/workflows/.github/workflows/remove-awaiting-response-label.yaml@main"
|
||||
secrets:
|
||||
token: ${{ secrets.OSS_PROJECT_GH_TOKEN }}
|
||||
|
||||
@ -15,7 +15,7 @@ jobs:
|
||||
name: "Publish test fixture image cache"
|
||||
# we use this runner to get enough storage space for docker images and fixture cache
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=build/disk=large
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=release
|
||||
if: github.repository == 'anchore/syft' # only run for main repo
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
name: "Validate GitHub Actions"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/**'
|
||||
|
||||
153
.github/workflows/validations.yaml
vendored
153
.github/workflows/validations.yaml
vendored
@ -32,7 +32,7 @@ jobs:
|
||||
name: "Unit tests"
|
||||
# we need more storage than what's on the default runner
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=medium
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=small
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #v5.0.0
|
||||
with:
|
||||
@ -50,7 +50,7 @@ jobs:
|
||||
# Note: changing this job name requires making the same update in the .github/workflows/release.yaml pipeline
|
||||
name: "Integration tests"
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=medium
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=small
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #v5.0.0
|
||||
with:
|
||||
@ -97,12 +97,100 @@ jobs:
|
||||
path: snapshot/
|
||||
retention-days: 30
|
||||
|
||||
# # upload each platform artifact individually so downstream jobs can download only what they need
|
||||
# - run: npm install @actions/artifact@2.3.2
|
||||
#
|
||||
# - name: Upload individual platform artifacts
|
||||
# uses: actions/github-script@v8
|
||||
# env:
|
||||
# ACTIONS_ARTIFACT_UPLOAD_CONCURRENCY: 10
|
||||
# with:
|
||||
# script: |
|
||||
# const { readdirSync } = require('fs')
|
||||
# const { DefaultArtifactClient } = require('@actions/artifact')
|
||||
# const artifact = new DefaultArtifactClient()
|
||||
# const ls = d => readdirSync(d, { withFileTypes: true })
|
||||
# const baseDir = "./snapshot"
|
||||
# const dirs = ls(baseDir).filter(f => f.isDirectory()).map(f => f.name)
|
||||
# const uploads = []
|
||||
#
|
||||
# // filter to only amd64 and arm64 architectures
|
||||
# const supportedArchs = ['amd64', 'arm64']
|
||||
# const filteredDirs = dirs.filter(dir =>
|
||||
# supportedArchs.some(arch => dir.includes(arch))
|
||||
# )
|
||||
#
|
||||
# // upload platform subdirectories
|
||||
# for (const dir of filteredDirs) {
|
||||
# // uploadArtifact returns Promise<{id, size}>
|
||||
# uploads.push(artifact.uploadArtifact(
|
||||
# // name of the archive:
|
||||
# `${dir}`,
|
||||
# // array of all files to include:
|
||||
# ls(`${baseDir}/${dir}`).map(f => `${baseDir}/${dir}/${f.name}`),
|
||||
# // base directory to trim from entries:
|
||||
# `${baseDir}/${dir}`,
|
||||
# { retentionDays: 30 }
|
||||
# ))
|
||||
# }
|
||||
#
|
||||
# // upload RPM and DEB packages for supported architectures
|
||||
# const packageFiles = ls(baseDir).filter(f =>
|
||||
# f.isFile() &&
|
||||
# (f.name.endsWith('.deb') || f.name.endsWith('.rpm')) &&
|
||||
# supportedArchs.some(arch => f.name.includes(`_${arch}.`))
|
||||
# )
|
||||
# for (const file of packageFiles) {
|
||||
# uploads.push(artifact.uploadArtifact(
|
||||
# file.name,
|
||||
# [`${baseDir}/${file.name}`],
|
||||
# baseDir,
|
||||
# { retentionDays: 30 }
|
||||
# ))
|
||||
# }
|
||||
#
|
||||
# // upload SBOM files for supported architectures
|
||||
# const sbomFiles = ls(baseDir).filter(f =>
|
||||
# f.isFile() &&
|
||||
# f.name.endsWith('.sbom') &&
|
||||
# supportedArchs.some(arch => f.name.includes(`_${arch}.`))
|
||||
# )
|
||||
# for (const file of sbomFiles) {
|
||||
# uploads.push(artifact.uploadArtifact(
|
||||
# file.name,
|
||||
# [`${baseDir}/${file.name}`],
|
||||
# baseDir,
|
||||
# { retentionDays: 30 }
|
||||
# ))
|
||||
# }
|
||||
#
|
||||
# // upload checksums file (needed by install tests)
|
||||
# const rootFiles = ls(baseDir).filter(f => f.isFile() && f.name.match(/syft_.*_checksums\.txt$/))
|
||||
# if (rootFiles.length > 0) {
|
||||
# const checksumsFile = rootFiles[0].name
|
||||
# uploads.push(artifact.uploadArtifact(
|
||||
# 'syft_checksums.txt',
|
||||
# [`${baseDir}/${checksumsFile}`],
|
||||
# baseDir,
|
||||
# { retentionDays: 30 }
|
||||
# ))
|
||||
# }
|
||||
#
|
||||
# // wait for all uploads to finish
|
||||
# try {
|
||||
# const results = await Promise.all(uploads)
|
||||
# console.log(`Successfully uploaded ${results.length} artifacts`)
|
||||
# } catch (error) {
|
||||
# console.error('Upload failed:', error)
|
||||
# throw error
|
||||
# }
|
||||
|
||||
Acceptance-Linux:
|
||||
# Note: changing this job name requires making the same update in the .github/workflows/release.yaml pipeline
|
||||
name: "Acceptance tests (Linux)"
|
||||
needs: [Build-Snapshot-Artifacts]
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=medium
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=small
|
||||
steps:
|
||||
# required for magic-cache from runs-on to function with artifact upload/download (see https://runs-on.com/caching/magic-cache/#actionsupload-artifact-compatibility)
|
||||
- uses: runs-on/action@v2
|
||||
@ -122,8 +210,35 @@ jobs:
|
||||
name: snapshot
|
||||
path: snapshot
|
||||
|
||||
- name: Restore binary permissions
|
||||
run: chmod +x snapshot/*/syft snapshot/*/*.exe 2>/dev/null || true
|
||||
# - name: Download checksums file
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# name: syft_checksums.txt
|
||||
# path: snapshot
|
||||
#
|
||||
# - name: Download Linux amd64 snapshot
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# name: linux-build_linux_amd64_v1
|
||||
# path: snapshot/linux-build_linux_amd64_v1
|
||||
#
|
||||
# - name: Download Linux amd64 deb
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# pattern: syft_*_linux_amd64.deb
|
||||
# path: snapshot
|
||||
#
|
||||
# - name: Download Linux amd64 rpm
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# pattern: syft_*_linux_amd64.rpm
|
||||
# path: snapshot
|
||||
#
|
||||
# - name: Download Linux amd64 sbom
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# pattern: syft_*_linux_amd64.sbom
|
||||
# path: snapshot
|
||||
|
||||
- name: Run comparison tests (Linux)
|
||||
run: make compare-linux
|
||||
@ -166,8 +281,23 @@ jobs:
|
||||
name: snapshot
|
||||
path: snapshot
|
||||
|
||||
- name: Restore binary permissions
|
||||
run: chmod +x snapshot/*/syft 2>/dev/null || true
|
||||
# - name: Download checksums file
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# name: syft_checksums.txt
|
||||
# path: snapshot
|
||||
#
|
||||
# - name: Download macOS Intel snapshot
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# name: darwin-build_darwin_amd64_v1
|
||||
# path: snapshot/darwin-build_darwin_amd64_v1
|
||||
#
|
||||
# - name: Download macOS amd64 sbom
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# pattern: syft_*_darwin_amd64.sbom
|
||||
# path: snapshot
|
||||
|
||||
- name: Run comparison tests (Mac)
|
||||
run: make compare-mac
|
||||
@ -180,7 +310,7 @@ jobs:
|
||||
name: "CLI tests (Linux)"
|
||||
needs: [Build-Snapshot-Artifacts]
|
||||
# Runner definition: workflows/.github/runs-on.yml
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=medium
|
||||
runs-on: runs-on=${{ github.run_id }}/runner=small
|
||||
steps:
|
||||
# required for magic-cache from runs-on to function with artifact upload/download (see https://runs-on.com/caching/magic-cache/#actionsupload-artifact-compatibility)
|
||||
- uses: runs-on/action@v2
|
||||
@ -200,8 +330,11 @@ jobs:
|
||||
name: snapshot
|
||||
path: snapshot
|
||||
|
||||
- name: Restore binary permissions
|
||||
run: chmod +x snapshot/*/syft snapshot/*/*.exe 2>/dev/null || true
|
||||
# - name: Download Linux amd64 snapshot
|
||||
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 #v6.0.0
|
||||
# with:
|
||||
# name: linux-build_linux_amd64_v1
|
||||
# path: snapshot/linux-build_linux_amd64_v1
|
||||
|
||||
- name: Run CLI Tests (Linux)
|
||||
run: make cli
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -73,5 +73,3 @@ cosign.pub
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
|
||||
|
||||
@ -106,8 +106,8 @@ syft <image> -o <format>
|
||||
Where the `formats` available are:
|
||||
- `syft-json`: Use this to get as much information out of Syft as possible!
|
||||
- `syft-text`: A row-oriented, human-and-machine-friendly output.
|
||||
- `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
|
||||
- `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/).
|
||||
|
||||
@ -39,9 +39,9 @@ vars:
|
||||
# e.g. when installing snapshot debs from a local path, ./ forces the deb to be installed in the current working directory instead of referencing a package name
|
||||
SNAPSHOT_DIR: ./snapshot
|
||||
SNAPSHOT_BIN: "{{ .PROJECT_ROOT }}/{{ .SNAPSHOT_DIR }}/{{ .OS }}-build_{{ .OS }}_{{ .ARCH }}/{{ .PROJECT }}"
|
||||
SNAPSHOT_CMD: "{{ .TOOL_DIR }}/goreleaser release --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --skip=publish --skip=sign --parallelism=12"
|
||||
BUILD_CMD: "{{ .TOOL_DIR }}/goreleaser build --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --single-target --parallelism=12"
|
||||
RELEASE_CMD: "{{ .TOOL_DIR }}/goreleaser release --clean --release-notes {{ .CHANGELOG }} --parallelism=12"
|
||||
SNAPSHOT_CMD: "{{ .TOOL_DIR }}/goreleaser release --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --skip=publish --skip=sign"
|
||||
BUILD_CMD: "{{ .TOOL_DIR }}/goreleaser build --config {{ .TMP_DIR }}/goreleaser.yaml --clean --snapshot --single-target"
|
||||
RELEASE_CMD: "{{ .TOOL_DIR }}/goreleaser release --clean --release-notes {{ .CHANGELOG }}"
|
||||
VERSION:
|
||||
sh: git describe --dirty --always --tags
|
||||
|
||||
@ -218,14 +218,14 @@ tasks:
|
||||
# unit test coverage threshold (in % coverage)
|
||||
COVERAGE_THRESHOLD: 62
|
||||
cmds:
|
||||
- "go test -p=4 -parallel=8 -coverprofile {{ .TMP_DIR }}/unit-coverage-details.txt {{ .TEST_PKGS }}"
|
||||
- "go test -coverprofile {{ .TMP_DIR }}/unit-coverage-details.txt {{ .TEST_PKGS }}"
|
||||
- cmd: ".github/scripts/coverage.py {{ .COVERAGE_THRESHOLD }} {{ .TMP_DIR }}/unit-coverage-details.txt"
|
||||
silent: true
|
||||
|
||||
integration:
|
||||
desc: Run integration tests
|
||||
cmds:
|
||||
- "go test -v -p=4 -parallel=8 ./cmd/syft/internal/test/integration"
|
||||
- "go test -v ./cmd/syft/internal/test/integration"
|
||||
# exercise most of the CLI with the data race detector
|
||||
# we use a larger image to ensure we're using multiple catalogers at a time
|
||||
- "go run -race cmd/syft/main.go anchore/test_images:grype-quality-dotnet-69f15d2"
|
||||
|
||||
@ -87,7 +87,6 @@ func TestPkgCoverageImage(t *testing.T) {
|
||||
definedPkgs.Remove(string(pkg.TerraformPkg))
|
||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||
|
||||
var cases []testCase
|
||||
cases = append(cases, commonTestCases...)
|
||||
@ -162,7 +161,6 @@ func TestPkgCoverageDirectory(t *testing.T) {
|
||||
definedPkgs.Remove(string(pkg.UnknownPkg))
|
||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
||||
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||
|
||||
// for directory scans we should not expect to see any of the following package types
|
||||
definedPkgs.Remove(string(pkg.KbPkg))
|
||||
|
||||
22
go.mod
22
go.mod
@ -11,6 +11,7 @@ require (
|
||||
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
|
||||
github.com/acobaugh/osrelease v0.1.0
|
||||
github.com/adrg/xdg v0.5.3
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
|
||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
|
||||
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
|
||||
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
|
||||
@ -89,7 +90,7 @@ require (
|
||||
go.uber.org/goleak v1.3.0
|
||||
go.yaml.in/yaml/v3 v3.0.4
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b
|
||||
golang.org/x/mod v0.30.0
|
||||
golang.org/x/mod v0.29.0
|
||||
golang.org/x/net v0.46.0
|
||||
modernc.org/sqlite v1.40.0
|
||||
)
|
||||
@ -167,6 +168,7 @@ require (
|
||||
github.com/goccy/go-yaml v1.18.0
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
|
||||
github.com/google/s2a-go v0.1.8 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
||||
@ -207,6 +209,10 @@ require (
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/ncruces/go-strftime v0.1.9 // indirect
|
||||
github.com/nwaples/rardecode v1.1.3 // indirect
|
||||
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
|
||||
github.com/olekukonko/errors v1.1.0 // indirect
|
||||
github.com/olekukonko/ll v0.1.2 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.1 // indirect
|
||||
github.com/opencontainers/runtime-spec v1.1.0 // indirect
|
||||
github.com/opencontainers/selinux v1.13.0 // indirect
|
||||
@ -280,11 +286,6 @@ require (
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/cespare/xxhash/v2 v2.3.0
|
||||
github.com/gpustack/gguf-parser-go v0.22.1
|
||||
)
|
||||
|
||||
require (
|
||||
cyphar.com/go-pathrs v0.2.1 // indirect
|
||||
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
||||
@ -309,16 +310,7 @@ require (
|
||||
github.com/clipperhouse/stringish v0.1.1 // indirect
|
||||
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
|
||||
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
||||
github.com/henvic/httpretty v0.1.4 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
|
||||
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
|
||||
github.com/olekukonko/errors v1.1.0 // indirect
|
||||
github.com/olekukonko/ll v0.1.2 // indirect
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||
gonum.org/v1/gonum v0.15.1 // indirect
|
||||
)
|
||||
|
||||
retract (
|
||||
|
||||
22
go.sum
22
go.sum
@ -110,6 +110,8 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
|
||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
|
||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
|
||||
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
|
||||
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
|
||||
@ -227,6 +229,7 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy
|
||||
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
|
||||
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
|
||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
@ -477,6 +480,8 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||
@ -544,8 +549,6 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
|
||||
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
||||
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
||||
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
||||
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
|
||||
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||
@ -595,8 +598,6 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
||||
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
||||
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
||||
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
||||
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||
@ -624,7 +625,6 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||
@ -730,11 +730,9 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||
@ -751,6 +749,8 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+
|
||||
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
|
||||
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
|
||||
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
|
||||
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
|
||||
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
||||
github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A=
|
||||
github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
|
||||
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=
|
||||
@ -860,8 +860,6 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
|
||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
||||
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
||||
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
||||
@ -1072,8 +1070,8 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
|
||||
golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
|
||||
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
|
||||
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
@ -1315,8 +1313,6 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||
|
||||
@ -3,5 +3,5 @@ package internal
|
||||
const (
|
||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||
JSONSchemaVersion = "16.0.43"
|
||||
JSONSchemaVersion = "16.0.42"
|
||||
)
|
||||
|
||||
@ -1,40 +1,17 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/archiver/v3"
|
||||
)
|
||||
|
||||
// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
|
||||
func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
|
||||
tarReader, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
|
||||
}
|
||||
defer internal.CloseAndLogError(tarReader, archivePath)
|
||||
|
||||
format, _, err := archives.Identify(ctx, archivePath, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to identify tar compression format: %w", err)
|
||||
}
|
||||
|
||||
extractor, ok := format.(archives.Extractor)
|
||||
if !ok {
|
||||
return fmt.Errorf("file format does not support extraction: %s", archivePath)
|
||||
}
|
||||
|
||||
return extractor.Extract(ctx, tarReader, visitor)
|
||||
}
|
||||
|
||||
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
|
||||
func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
|
||||
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) {
|
||||
results := make(map[string]Opener)
|
||||
|
||||
// don't allow for full traversal, only select traversal from given paths
|
||||
@ -42,7 +19,9 @@ func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir s
|
||||
return results, nil
|
||||
}
|
||||
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
visitor := func(file archiver.File) error {
|
||||
defer file.Close()
|
||||
|
||||
// ignore directories
|
||||
if file.IsDir() {
|
||||
return nil
|
||||
@ -64,13 +43,7 @@ func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir s
|
||||
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
|
||||
defer tempFile.Close()
|
||||
|
||||
packedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
defer internal.CloseAndLogError(packedFile, archivePath)
|
||||
|
||||
if err := safeCopy(tempFile, packedFile); err != nil {
|
||||
if err := safeCopy(tempFile, file.ReadCloser); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
|
||||
}
|
||||
|
||||
@ -79,7 +52,7 @@ func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir s
|
||||
return nil
|
||||
}
|
||||
|
||||
return results, TraverseFilesInTar(ctx, archivePath, visitor)
|
||||
return results, archiver.Walk(archivePath, visitor)
|
||||
}
|
||||
|
||||
func matchesAnyGlob(name string, globs ...string) bool {
|
||||
|
||||
@ -1,12 +1,10 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"github.com/scylladb/go-set/strset"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
@ -16,25 +14,22 @@ import (
|
||||
type ZipFileManifest map[string]os.FileInfo
|
||||
|
||||
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
|
||||
func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) {
|
||||
zipReader, err := os.Open(archivePath)
|
||||
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) {
|
||||
zipReader, err := OpenZip(archivePath)
|
||||
manifest := make(ZipFileManifest)
|
||||
if err != nil {
|
||||
log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
|
||||
return manifest, err
|
||||
}
|
||||
defer func() {
|
||||
if err = zipReader.Close(); err != nil {
|
||||
err = zipReader.Close()
|
||||
if err != nil {
|
||||
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error {
|
||||
manifest.Add(file.NameInArchive, file.FileInfo)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return manifest, err
|
||||
for _, file := range zipReader.File {
|
||||
manifest.Add(file.Name, file.FileInfo())
|
||||
}
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
@ -4,7 +4,6 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path"
|
||||
@ -25,7 +24,7 @@ func TestNewZipFileManifest(t *testing.T) {
|
||||
|
||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
||||
|
||||
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||
actual, err := NewZipFileManifest(archiveFilePath)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
@ -60,7 +59,7 @@ func TestNewZip64FileManifest(t *testing.T) {
|
||||
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
|
||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
|
||||
|
||||
actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||
actual, err := NewZipFileManifest(archiveFilePath)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
@ -100,7 +99,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
|
||||
|
||||
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
|
||||
|
||||
z, err := NewZipFileManifest(context.Background(), archiveFilePath)
|
||||
z, err := NewZipFileManifest(archiveFilePath)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
|
||||
@ -1,15 +1,13 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
)
|
||||
|
||||
@ -27,7 +25,7 @@ type errZipSlipDetected struct {
|
||||
}
|
||||
|
||||
func (e *errZipSlipDetected) Error() string {
|
||||
return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
|
||||
return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
|
||||
}
|
||||
|
||||
type zipTraversalRequest map[string]struct{}
|
||||
@ -41,34 +39,38 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest {
|
||||
}
|
||||
|
||||
// TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
|
||||
func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error {
|
||||
func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error {
|
||||
request := newZipTraverseRequest(paths...)
|
||||
|
||||
zipReader, err := os.Open(archivePath)
|
||||
zipReader, err := OpenZip(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := zipReader.Close(); err != nil {
|
||||
err = zipReader.Close()
|
||||
if err != nil {
|
||||
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
|
||||
for _, file := range zipReader.File {
|
||||
// if no paths are given then assume that all files should be traversed
|
||||
if len(paths) > 0 {
|
||||
if _, ok := request[file.NameInArchive]; !ok {
|
||||
if _, ok := request[file.Name]; !ok {
|
||||
// this file path is not of interest
|
||||
return nil
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return visitor(ctx, file)
|
||||
})
|
||||
if err = visitor(file); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
|
||||
func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) {
|
||||
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
|
||||
results := make(map[string]Opener)
|
||||
|
||||
// don't allow for full traversal, only select traversal from given paths
|
||||
@ -76,8 +78,9 @@ func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string
|
||||
return results, nil
|
||||
}
|
||||
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-"
|
||||
visitor := func(file *zip.File) error {
|
||||
tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-"
|
||||
|
||||
tempFile, err := os.CreateTemp(dir, tempfilePrefix)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create temp file: %w", err)
|
||||
@ -89,32 +92,33 @@ func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string
|
||||
|
||||
zippedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := zippedFile.Close(); err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||
err := zippedFile.Close()
|
||||
if err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if file.IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
|
||||
if file.FileInfo().IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
||||
}
|
||||
|
||||
if err := safeCopy(tempFile, zippedFile); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
|
||||
results[file.NameInArchive] = Opener{path: tempFile.Name()}
|
||||
results[file.Name] = Opener{path: tempFile.Name()}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
|
||||
return results, TraverseFilesInZip(archivePath, visitor, paths...)
|
||||
}
|
||||
|
||||
// ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
|
||||
func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) {
|
||||
func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) {
|
||||
results := make(map[string]string)
|
||||
|
||||
// don't allow for full traversal, only select traversal from given paths
|
||||
@ -122,38 +126,37 @@ func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (
|
||||
return results, nil
|
||||
}
|
||||
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
visitor := func(file *zip.File) error {
|
||||
zippedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := zippedFile.Close(); err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if file.IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
|
||||
if file.FileInfo().IsDir() {
|
||||
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
||||
}
|
||||
|
||||
var buffer bytes.Buffer
|
||||
if err := safeCopy(&buffer, zippedFile); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
|
||||
results[file.NameInArchive] = buffer.String()
|
||||
results[file.Name] = buffer.String()
|
||||
|
||||
err = zippedFile.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
|
||||
return results, TraverseFilesInZip(archivePath, visitor, paths...)
|
||||
}
|
||||
|
||||
// UnzipToDir extracts a zip archive to a target directory.
|
||||
func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
joinedPath, err := SafeJoin(targetDir, file.NameInArchive)
|
||||
func UnzipToDir(archivePath, targetDir string) error {
|
||||
visitor := func(file *zip.File) error {
|
||||
joinedPath, err := safeJoin(targetDir, file.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -161,11 +164,11 @@ func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
|
||||
return extractSingleFile(file, joinedPath, archivePath)
|
||||
}
|
||||
|
||||
return TraverseFilesInZip(ctx, archivePath, visitor)
|
||||
return TraverseFilesInZip(archivePath, visitor)
|
||||
}
|
||||
|
||||
// SafeJoin ensures that any destinations do not resolve to a path above the prefix path.
|
||||
func SafeJoin(prefix string, dest ...string) (string, error) {
|
||||
// safeJoin ensures that any destinations do not resolve to a path above the prefix path.
|
||||
func safeJoin(prefix string, dest ...string) (string, error) {
|
||||
joinResult := filepath.Join(append([]string{prefix}, dest...)...)
|
||||
cleanJoinResult := filepath.Clean(joinResult)
|
||||
if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
|
||||
@ -178,18 +181,13 @@ func SafeJoin(prefix string, dest ...string) (string, error) {
|
||||
return joinResult, nil
|
||||
}
|
||||
|
||||
func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error {
|
||||
func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error {
|
||||
zippedFile, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
|
||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := zippedFile.Close(); err != nil {
|
||||
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if file.IsDir() {
|
||||
if file.FileInfo().IsDir() {
|
||||
err = os.MkdirAll(expandedFilePath, file.Mode())
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
||||
@ -204,16 +202,20 @@ func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath str
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if err := outputFile.Close(); err != nil {
|
||||
log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := safeCopy(outputFile, zippedFile); err != nil {
|
||||
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err)
|
||||
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err)
|
||||
}
|
||||
|
||||
err = outputFile.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err)
|
||||
}
|
||||
}
|
||||
|
||||
err = zippedFile.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -4,8 +4,6 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@ -19,7 +17,6 @@ import (
|
||||
|
||||
"github.com/go-test/deep"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func equal(r1, r2 io.Reader) (bool, error) {
|
||||
@ -58,7 +55,7 @@ func TestUnzipToDir(t *testing.T) {
|
||||
expectedPaths := len(expectedZipArchiveEntries)
|
||||
observedPaths := 0
|
||||
|
||||
err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir)
|
||||
err = UnzipToDir(archiveFilePath, unzipDestinationDir)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to unzip archive: %+v", err)
|
||||
}
|
||||
@ -148,7 +145,7 @@ func TestContentsFromZip(t *testing.T) {
|
||||
paths = append(paths, p)
|
||||
}
|
||||
|
||||
actual, err := ContentsFromZip(context.Background(), archivePath, paths...)
|
||||
actual, err := ContentsFromZip(archivePath, paths...)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to extract from unzip archive: %+v", err)
|
||||
}
|
||||
@ -310,528 +307,9 @@ func TestSafeJoin(t *testing.T) {
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) {
|
||||
actual, err := SafeJoin(test.prefix, test.args...)
|
||||
actual, err := safeJoin(test.prefix, test.args...)
|
||||
test.errAssertion(t, err)
|
||||
assert.Equal(t, test.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSymlinkProtection demonstrates that SafeJoin protects against symlink-based
|
||||
// directory traversal attacks by validating that archive entry paths cannot escape
|
||||
// the extraction directory.
|
||||
func TestSafeJoin_SymlinkProtection(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
archivePath string // Path as it would appear in the archive
|
||||
expectError bool
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "path traversal via ../",
|
||||
archivePath: "../../../outside/file.txt",
|
||||
expectError: true,
|
||||
description: "Archive entry with ../ trying to escape extraction dir",
|
||||
},
|
||||
{
|
||||
name: "absolute path symlink target",
|
||||
archivePath: "../../../sensitive.txt",
|
||||
expectError: true,
|
||||
description: "Simulates symlink pointing outside via relative path",
|
||||
},
|
||||
{
|
||||
name: "safe relative path within extraction dir",
|
||||
archivePath: "subdir/safe.txt",
|
||||
expectError: false,
|
||||
description: "Normal file path that stays within extraction directory",
|
||||
},
|
||||
{
|
||||
name: "safe path with internal ../",
|
||||
archivePath: "dir1/../dir2/file.txt",
|
||||
expectError: false,
|
||||
description: "Path with ../ that still resolves within extraction dir",
|
||||
},
|
||||
{
|
||||
name: "deeply nested traversal",
|
||||
archivePath: "../../../../../../tmp/evil.txt",
|
||||
expectError: true,
|
||||
description: "Multiple levels of ../ trying to escape",
|
||||
},
|
||||
{
|
||||
name: "single parent directory escape",
|
||||
archivePath: "../",
|
||||
expectError: true,
|
||||
description: "Simple one-level escape attempt",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create temp directories to simulate extraction scenario
|
||||
tmpDir := t.TempDir()
|
||||
extractDir := filepath.Join(tmpDir, "extract")
|
||||
outsideDir := filepath.Join(tmpDir, "outside")
|
||||
|
||||
require.NoError(t, os.MkdirAll(extractDir, 0755))
|
||||
require.NoError(t, os.MkdirAll(outsideDir, 0755))
|
||||
|
||||
// Create a file outside extraction dir that an attacker might target
|
||||
outsideFile := filepath.Join(outsideDir, "sensitive.txt")
|
||||
require.NoError(t, os.WriteFile(outsideFile, []byte("sensitive data"), 0644))
|
||||
|
||||
// Test SafeJoin - this is what happens when processing archive entries
|
||||
result, err := SafeJoin(extractDir, tt.archivePath)
|
||||
|
||||
if tt.expectError {
|
||||
// Should block malicious paths
|
||||
require.Error(t, err, "Expected SafeJoin to reject malicious path")
|
||||
var zipSlipErr *errZipSlipDetected
|
||||
assert.ErrorAs(t, err, &zipSlipErr, "Error should be errZipSlipDetected type")
|
||||
assert.Empty(t, result, "Result should be empty for blocked paths")
|
||||
} else {
|
||||
// Should allow safe paths
|
||||
require.NoError(t, err, "Expected SafeJoin to allow safe path")
|
||||
assert.NotEmpty(t, result, "Result should not be empty for safe paths")
|
||||
assert.True(t, strings.HasPrefix(filepath.Clean(result), filepath.Clean(extractDir)),
|
||||
"Safe path should resolve within extraction directory")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestUnzipToDir_SymlinkAttacks tests UnzipToDir function with malicious ZIP archives
|
||||
// containing symlink entries that attempt path traversal attacks.
|
||||
//
|
||||
// EXPECTED BEHAVIOR: UnzipToDir should either:
|
||||
// 1. Detect and reject symlinks explicitly with a security error, OR
|
||||
// 2. Extract them safely (library converts symlinks to regular files)
|
||||
func TestUnzipToDir_SymlinkAttacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
symlinkName string
|
||||
fileName string
|
||||
errContains string
|
||||
}{
|
||||
{
|
||||
name: "direct symlink to outside directory",
|
||||
symlinkName: "evil_link",
|
||||
fileName: "evil_link/payload.txt",
|
||||
errContains: "not a directory", // attempt to write through symlink leaf (which is not a directory)
|
||||
},
|
||||
{
|
||||
name: "directory symlink attack",
|
||||
symlinkName: "safe_dir/link",
|
||||
fileName: "safe_dir/link/payload.txt",
|
||||
errContains: "not a directory", // attempt to write through symlink (which is not a directory)
|
||||
},
|
||||
{
|
||||
name: "symlink without payload file",
|
||||
symlinkName: "standalone_link",
|
||||
fileName: "", // no payload file
|
||||
errContains: "", // no error expected, symlink without payload is safe
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// create outside target directory
|
||||
outsideDir := filepath.Join(tempDir, "outside_target")
|
||||
require.NoError(t, os.MkdirAll(outsideDir, 0755))
|
||||
|
||||
// create extraction directory
|
||||
extractDir := filepath.Join(tempDir, "extract")
|
||||
require.NoError(t, os.MkdirAll(extractDir, 0755))
|
||||
|
||||
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, outsideDir, tt.fileName)
|
||||
|
||||
err := UnzipToDir(context.Background(), maliciousZip, extractDir)
|
||||
|
||||
// check error expectations
|
||||
if tt.errContains != "" {
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tt.errContains)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
analyzeExtractionDirectory(t, extractDir)
|
||||
|
||||
// check if payload file escaped extraction directory
|
||||
if tt.fileName != "" {
|
||||
maliciousFile := filepath.Join(outsideDir, filepath.Base(tt.fileName))
|
||||
checkFileOutsideExtraction(t, maliciousFile)
|
||||
}
|
||||
|
||||
// check if symlink was created pointing outside
|
||||
symlinkPath := filepath.Join(extractDir, tt.symlinkName)
|
||||
checkSymlinkCreation(t, symlinkPath, extractDir, outsideDir)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestContentsFromZip_SymlinkAttacks tests the ContentsFromZip function with malicious
|
||||
// ZIP archives containing symlink entries.
|
||||
//
|
||||
// EXPECTED BEHAVIOR: ContentsFromZip should either:
|
||||
// 1. Reject symlinks explicitly, OR
|
||||
// 2. Return empty content for symlinks (library behavior)
|
||||
//
|
||||
// Though ContentsFromZip doesn't write to disk, but if symlinks are followed, it could read sensitive
|
||||
// files from outside the archive.
|
||||
func TestContentsFromZip_SymlinkAttacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
symlinkName string
|
||||
symlinkTarget string
|
||||
requestPath string
|
||||
errContains string
|
||||
}{
|
||||
{
|
||||
name: "request symlink entry directly",
|
||||
symlinkName: "evil_link",
|
||||
symlinkTarget: "/etc/hosts", // attempt to read sensitive file
|
||||
requestPath: "evil_link",
|
||||
errContains: "", // no error expected - library returns symlink metadata
|
||||
},
|
||||
{
|
||||
name: "symlink in nested directory",
|
||||
symlinkName: "nested/link",
|
||||
symlinkTarget: "/etc/hosts",
|
||||
requestPath: "nested/link",
|
||||
errContains: "", // no error expected - library returns symlink metadata
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// create malicious ZIP with symlink entry (no payload file needed)
|
||||
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
|
||||
|
||||
contents, err := ContentsFromZip(context.Background(), maliciousZip, tt.requestPath)
|
||||
|
||||
// check error expectations
|
||||
if tt.errContains != "" {
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tt.errContains)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
// verify symlink handling - library should return symlink target as content (metadata)
|
||||
content, found := contents[tt.requestPath]
|
||||
require.True(t, found, "symlink entry should be found in results")
|
||||
|
||||
// verify symlink was NOT followed (content should be target path or empty)
|
||||
if content != "" && content != tt.symlinkTarget {
|
||||
// content is not empty and not the symlink target - check if actual file was read
|
||||
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
|
||||
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
|
||||
if readErr == nil && string(targetContent) == content {
|
||||
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was read!")
|
||||
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
|
||||
t.Logf(" content length: %d bytes", len(content))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractFromZipToUniqueTempFile_SymlinkAttacks tests the ExtractFromZipToUniqueTempFile
|
||||
// function with malicious ZIP archives containing symlink entries.
|
||||
//
|
||||
// EXPECTED BEHAVIOR: ExtractFromZipToUniqueTempFile should either:
|
||||
// 1. Reject symlinks explicitly, OR
|
||||
// 2. Extract them safely (library converts to empty files, filepath.Base sanitizes names)
|
||||
//
|
||||
// This function uses filepath.Base() on the archive entry name for temp file prefix and
|
||||
// os.CreateTemp() which creates files in the specified directory, so it should be protected.
|
||||
func TestExtractFromZipToUniqueTempFile_SymlinkAttacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
symlinkName string
|
||||
symlinkTarget string
|
||||
requestPath string
|
||||
errContains string
|
||||
}{
|
||||
{
|
||||
name: "extract symlink entry to temp file",
|
||||
symlinkName: "evil_link",
|
||||
symlinkTarget: "/etc/passwd",
|
||||
requestPath: "evil_link",
|
||||
errContains: "", // no error expected - library extracts symlink metadata
|
||||
},
|
||||
{
|
||||
name: "extract nested symlink",
|
||||
symlinkName: "nested/dir/link",
|
||||
symlinkTarget: "/tmp/outside",
|
||||
requestPath: "nested/dir/link",
|
||||
errContains: "", // no error expected
|
||||
},
|
||||
{
|
||||
name: "extract path traversal symlink name",
|
||||
symlinkName: "../../escape",
|
||||
symlinkTarget: "/tmp/outside",
|
||||
requestPath: "../../escape",
|
||||
errContains: "", // no error expected - filepath.Base sanitizes name
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
|
||||
|
||||
// create temp directory for extraction
|
||||
extractTempDir := filepath.Join(tempDir, "temp_extract")
|
||||
require.NoError(t, os.MkdirAll(extractTempDir, 0755))
|
||||
|
||||
openers, err := ExtractFromZipToUniqueTempFile(context.Background(), maliciousZip, extractTempDir, tt.requestPath)
|
||||
|
||||
// check error expectations
|
||||
if tt.errContains != "" {
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tt.errContains)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
// verify symlink was extracted
|
||||
opener, found := openers[tt.requestPath]
|
||||
require.True(t, found, "symlink entry should be extracted")
|
||||
|
||||
// verify temp file is within temp directory
|
||||
tempFilePath := opener.path
|
||||
cleanTempDir := filepath.Clean(extractTempDir)
|
||||
cleanTempFile := filepath.Clean(tempFilePath)
|
||||
require.True(t, strings.HasPrefix(cleanTempFile, cleanTempDir),
|
||||
"temp file must be within temp directory: %s not in %s", cleanTempFile, cleanTempDir)
|
||||
|
||||
// verify symlink was NOT followed (content should be target path or empty)
|
||||
f, openErr := opener.Open()
|
||||
require.NoError(t, openErr)
|
||||
defer f.Close()
|
||||
|
||||
content, readErr := io.ReadAll(f)
|
||||
require.NoError(t, readErr)
|
||||
|
||||
// check if symlink was followed (content matches actual file)
|
||||
if len(content) > 0 && string(content) != tt.symlinkTarget {
|
||||
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
|
||||
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
|
||||
if readErr == nil && string(targetContent) == string(content) {
|
||||
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was copied!")
|
||||
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
|
||||
t.Logf(" content length: %d bytes", len(content))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// forensicFindings contains the results of analyzing an extraction directory
|
||||
type forensicFindings struct {
|
||||
symlinksFound []forensicSymlink
|
||||
regularFiles []string
|
||||
directories []string
|
||||
symlinkVulnerabilities []string
|
||||
}
|
||||
|
||||
type forensicSymlink struct {
|
||||
path string
|
||||
target string
|
||||
escapesExtraction bool
|
||||
resolvedPath string
|
||||
}
|
||||
|
||||
// analyzeExtractionDirectory walks the extraction directory and detects symlinks that point
|
||||
// outside the extraction directory. It is silent unless vulnerabilities are found.
|
||||
func analyzeExtractionDirectory(t *testing.T, extractDir string) forensicFindings {
|
||||
t.Helper()
|
||||
|
||||
findings := forensicFindings{}
|
||||
|
||||
filepath.Walk(extractDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
// only log if there's an error walking the directory
|
||||
t.Logf("Error walking %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
relPath := strings.TrimPrefix(path, extractDir+"/")
|
||||
if relPath == "" {
|
||||
relPath = "."
|
||||
}
|
||||
|
||||
// use Lstat to detect symlinks without following them
|
||||
linfo, lerr := os.Lstat(path)
|
||||
if lerr == nil && linfo.Mode()&os.ModeSymlink != 0 {
|
||||
target, _ := os.Readlink(path)
|
||||
|
||||
// resolve to see where it actually points
|
||||
var resolvedPath string
|
||||
var escapesExtraction bool
|
||||
|
||||
if filepath.IsAbs(target) {
|
||||
// absolute symlink
|
||||
resolvedPath = target
|
||||
cleanExtractDir := filepath.Clean(extractDir)
|
||||
escapesExtraction = !strings.HasPrefix(filepath.Clean(target), cleanExtractDir)
|
||||
|
||||
if escapesExtraction {
|
||||
t.Errorf("critical issue!... absolute symlink created: %s → %s", relPath, target)
|
||||
t.Logf(" this symlink points outside the extraction directory")
|
||||
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
|
||||
fmt.Sprintf("absolute symlink: %s → %s", relPath, target))
|
||||
}
|
||||
} else {
|
||||
// relative symlink - resolve it
|
||||
resolvedPath = filepath.Join(filepath.Dir(path), target)
|
||||
cleanResolved := filepath.Clean(resolvedPath)
|
||||
cleanExtractDir := filepath.Clean(extractDir)
|
||||
|
||||
escapesExtraction = !strings.HasPrefix(cleanResolved, cleanExtractDir)
|
||||
|
||||
if escapesExtraction {
|
||||
t.Errorf("critical issue!... symlink escapes extraction dir: %s → %s", relPath, target)
|
||||
t.Logf(" symlink resolves to: %s (outside extraction directory)", cleanResolved)
|
||||
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
|
||||
fmt.Sprintf("relative symlink escape: %s → %s (resolves to %s)", relPath, target, cleanResolved))
|
||||
}
|
||||
}
|
||||
|
||||
findings.symlinksFound = append(findings.symlinksFound, forensicSymlink{
|
||||
path: relPath,
|
||||
target: target,
|
||||
escapesExtraction: escapesExtraction,
|
||||
resolvedPath: resolvedPath,
|
||||
})
|
||||
} else {
|
||||
// regular file or directory - collect silently
|
||||
if info.IsDir() {
|
||||
findings.directories = append(findings.directories, relPath)
|
||||
} else {
|
||||
findings.regularFiles = append(findings.regularFiles, relPath)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return findings
|
||||
}
|
||||
|
||||
// checkFileOutsideExtraction checks if a file was written outside the extraction directory.
|
||||
// Returns true if the file exists (vulnerability), false otherwise. Silent on success.
|
||||
func checkFileOutsideExtraction(t *testing.T, filePath string) bool {
|
||||
t.Helper()
|
||||
|
||||
if stat, err := os.Stat(filePath); err == nil {
|
||||
content, _ := os.ReadFile(filePath)
|
||||
t.Errorf("critical issue!... file written OUTSIDE extraction directory!")
|
||||
t.Logf(" location: %s", filePath)
|
||||
t.Logf(" size: %d bytes", stat.Size())
|
||||
t.Logf(" content: %s", string(content))
|
||||
t.Logf(" ...this means an attacker can write files to arbitrary locations on the filesystem")
|
||||
return true
|
||||
}
|
||||
// no file found outside extraction directory...
|
||||
return false
|
||||
}
|
||||
|
||||
// checkSymlinkCreation verifies if a symlink was created at the expected path and reports
|
||||
// whether it points outside the extraction directory. Silent unless a symlink is found.
|
||||
func checkSymlinkCreation(t *testing.T, symlinkPath, extractDir, expectedTarget string) bool {
|
||||
t.Helper()
|
||||
|
||||
if linfo, err := os.Lstat(symlinkPath); err == nil {
|
||||
if linfo.Mode()&os.ModeSymlink != 0 {
|
||||
target, _ := os.Readlink(symlinkPath)
|
||||
|
||||
if expectedTarget != "" && target == expectedTarget {
|
||||
t.Errorf("critical issue!... symlink pointing outside extraction dir was created!")
|
||||
t.Logf(" Symlink: %s → %s", symlinkPath, target)
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if it escapes even if target doesn't match expected
|
||||
if filepath.IsAbs(target) {
|
||||
cleanExtractDir := filepath.Clean(extractDir)
|
||||
if !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) {
|
||||
t.Errorf("critical issue!... absolute symlink escapes extraction dir!")
|
||||
t.Logf(" symlink: %s → %s", symlinkPath, target)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
// if it exists but is not a symlink, that's good (attack was thwarted)...
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// createMaliciousZipWithSymlink creates a ZIP archive containing a symlink entry pointing to an arbitrary target,
|
||||
// followed by a file entry that attempts to write through that symlink.
|
||||
// returns the path to the created ZIP archive.
|
||||
func createMaliciousZipWithSymlink(t *testing.T, tempDir, symlinkName, symlinkTarget, fileName string) string {
|
||||
t.Helper()
|
||||
|
||||
maliciousZip := filepath.Join(tempDir, "malicious.zip")
|
||||
zipFile, err := os.Create(maliciousZip)
|
||||
require.NoError(t, err)
|
||||
defer zipFile.Close()
|
||||
|
||||
zw := zip.NewWriter(zipFile)
|
||||
|
||||
// create parent directories if the symlink is nested
|
||||
if dir := filepath.Dir(symlinkName); dir != "." {
|
||||
dirHeader := &zip.FileHeader{
|
||||
Name: dir + "/",
|
||||
Method: zip.Store,
|
||||
}
|
||||
dirHeader.SetMode(os.ModeDir | 0755)
|
||||
_, err = zw.CreateHeader(dirHeader)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// create symlink entry pointing outside extraction directory
|
||||
// note: ZIP format stores symlinks as regular files with the target path as content
|
||||
symlinkHeader := &zip.FileHeader{
|
||||
Name: symlinkName,
|
||||
Method: zip.Store,
|
||||
}
|
||||
symlinkHeader.SetMode(os.ModeSymlink | 0755)
|
||||
|
||||
symlinkWriter, err := zw.CreateHeader(symlinkHeader)
|
||||
require.NoError(t, err)
|
||||
|
||||
// write the symlink target as the file content (this is how ZIP stores symlinks)
|
||||
_, err = symlinkWriter.Write([]byte(symlinkTarget))
|
||||
require.NoError(t, err)
|
||||
|
||||
// create file entry that will be written through the symlink
|
||||
if fileName != "" {
|
||||
payloadContent := []byte("MALICIOUS PAYLOAD - This should NOT be written outside extraction dir!")
|
||||
payloadHeader := &zip.FileHeader{
|
||||
Name: fileName,
|
||||
Method: zip.Deflate,
|
||||
}
|
||||
payloadHeader.SetMode(0644)
|
||||
|
||||
payloadWriter, err := zw.CreateHeader(payloadHeader)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = payloadWriter.Write(payloadContent)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.NoError(t, zw.Close())
|
||||
require.NoError(t, zipFile.Close())
|
||||
|
||||
return maliciousZip
|
||||
}
|
||||
|
||||
229
internal/file/zip_read_closer.go
Normal file
229
internal/file/zip_read_closer.go
Normal file
@ -0,0 +1,229 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
)
|
||||
|
||||
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
|
||||
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
|
||||
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
|
||||
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
|
||||
|
||||
const (
|
||||
directoryEndLen = 22
|
||||
directory64LocLen = 20
|
||||
directory64EndLen = 56
|
||||
directory64LocSignature = 0x07064b50
|
||||
directory64EndSignature = 0x06064b50
|
||||
)
|
||||
|
||||
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
|
||||
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
|
||||
type ZipReadCloser struct {
|
||||
*zip.Reader
|
||||
io.Closer
|
||||
}
|
||||
|
||||
// OpenZip provides a ZipReadCloser for the given filepath.
|
||||
func OpenZip(filepath string) (*ZipReadCloser, error) {
|
||||
f, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
|
||||
// need to find the start of the archive and keep track of this offset.
|
||||
offset, err := findArchiveStartOffset(f, fi.Size())
|
||||
if err != nil {
|
||||
log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, err := f.Seek(0, io.SeekStart); err != nil {
|
||||
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
|
||||
}
|
||||
|
||||
if offset > math.MaxInt64 {
|
||||
return nil, fmt.Errorf("archive start offset too large: %v", offset)
|
||||
}
|
||||
offset64 := int64(offset)
|
||||
|
||||
size := fi.Size() - offset64
|
||||
|
||||
r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size)
|
||||
if err != nil {
|
||||
log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &ZipReadCloser{
|
||||
Reader: r,
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type readBuf []byte
|
||||
|
||||
func (b *readBuf) uint16() uint16 {
|
||||
v := binary.LittleEndian.Uint16(*b)
|
||||
*b = (*b)[2:]
|
||||
return v
|
||||
}
|
||||
|
||||
func (b *readBuf) uint32() uint32 {
|
||||
v := binary.LittleEndian.Uint32(*b)
|
||||
*b = (*b)[4:]
|
||||
return v
|
||||
}
|
||||
|
||||
func (b *readBuf) uint64() uint64 {
|
||||
v := binary.LittleEndian.Uint64(*b)
|
||||
*b = (*b)[8:]
|
||||
return v
|
||||
}
|
||||
|
||||
type directoryEnd struct {
|
||||
diskNbr uint32 // unused
|
||||
dirDiskNbr uint32 // unused
|
||||
dirRecordsThisDisk uint64 // unused
|
||||
directoryRecords uint64
|
||||
directorySize uint64
|
||||
directoryOffset uint64 // relative to file
|
||||
}
|
||||
|
||||
// note: this is derived from readDirectoryEnd within the archive/zip package
|
||||
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
|
||||
// look for directoryEndSignature in the last 1k, then in the last 65k
|
||||
var buf []byte
|
||||
var directoryEndOffset int64
|
||||
for i, bLen := range []int64{1024, 65 * 1024} {
|
||||
if bLen > size {
|
||||
bLen = size
|
||||
}
|
||||
buf = make([]byte, int(bLen))
|
||||
if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) {
|
||||
return 0, err
|
||||
}
|
||||
if p := findSignatureInBlock(buf); p >= 0 {
|
||||
buf = buf[p:]
|
||||
directoryEndOffset = size - bLen + int64(p)
|
||||
break
|
||||
}
|
||||
if i == 1 || bLen == size {
|
||||
return 0, zip.ErrFormat
|
||||
}
|
||||
}
|
||||
|
||||
if buf == nil {
|
||||
// we were unable to find the directoryEndSignature block
|
||||
return 0, zip.ErrFormat
|
||||
}
|
||||
|
||||
// read header into struct
|
||||
b := readBuf(buf[4:]) // skip signature
|
||||
d := &directoryEnd{
|
||||
diskNbr: uint32(b.uint16()),
|
||||
dirDiskNbr: uint32(b.uint16()),
|
||||
dirRecordsThisDisk: uint64(b.uint16()),
|
||||
directoryRecords: uint64(b.uint16()),
|
||||
directorySize: uint64(b.uint32()),
|
||||
directoryOffset: uint64(b.uint32()),
|
||||
}
|
||||
// Calculate where the zip data actually begins
|
||||
|
||||
// These values mean that the file can be a zip64 file
|
||||
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
|
||||
p, err := findDirectory64End(r, directoryEndOffset)
|
||||
if err == nil && p >= 0 {
|
||||
directoryEndOffset = p
|
||||
err = readDirectory64End(r, p, d)
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
|
||||
|
||||
// Make sure directoryOffset points to somewhere in our file.
|
||||
if d.directoryOffset >= uint64(size) {
|
||||
return 0, zip.ErrFormat
|
||||
}
|
||||
return startOfArchive, nil
|
||||
}
|
||||
|
||||
// findDirectory64End tries to read the zip64 locator just before the
|
||||
// directory end and returns the offset of the zip64 directory end if
|
||||
// found.
|
||||
func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
|
||||
locOffset := directoryEndOffset - directory64LocLen
|
||||
if locOffset < 0 {
|
||||
return -1, nil // no need to look for a header outside the file
|
||||
}
|
||||
buf := make([]byte, directory64LocLen)
|
||||
if _, err := r.ReadAt(buf, locOffset); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
b := readBuf(buf)
|
||||
if sig := b.uint32(); sig != directory64LocSignature {
|
||||
return -1, nil
|
||||
}
|
||||
if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
|
||||
return -1, nil // the file is not a valid zip64-file
|
||||
}
|
||||
p := b.uint64() // relative offset of the zip64 end of central directory record
|
||||
if b.uint32() != 1 { // total number of disks
|
||||
return -1, nil // the file is not a valid zip64-file
|
||||
}
|
||||
return int64(p), nil
|
||||
}
|
||||
|
||||
// readDirectory64End reads the zip64 directory end and updates the
|
||||
// directory end with the zip64 directory end values.
|
||||
func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
|
||||
buf := make([]byte, directory64EndLen)
|
||||
if _, err := r.ReadAt(buf, offset); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b := readBuf(buf)
|
||||
if sig := b.uint32(); sig != directory64EndSignature {
|
||||
return errors.New("could not read directory64End")
|
||||
}
|
||||
|
||||
b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
|
||||
d.diskNbr = b.uint32() // number of this disk
|
||||
d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
|
||||
d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
|
||||
d.directoryRecords = b.uint64() // total number of entries in the central directory
|
||||
d.directorySize = b.uint64() // size of the central directory
|
||||
d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func findSignatureInBlock(b []byte) int {
|
||||
for i := len(b) - directoryEndLen; i >= 0; i-- {
|
||||
// defined from directoryEndSignature
|
||||
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
|
||||
// n is length of comment
|
||||
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
|
||||
if n+directoryEndLen+i <= len(b) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
50
internal/file/zip_read_closer_test.go
Normal file
50
internal/file/zip_read_closer_test.go
Normal file
@ -0,0 +1,50 @@
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package file
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFindArchiveStartOffset(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
archivePrep func(tb testing.TB) string
|
||||
expected uint64
|
||||
}{
|
||||
{
|
||||
name: "standard, non-nested zip",
|
||||
archivePrep: prepZipSourceFixture,
|
||||
expected: 0,
|
||||
},
|
||||
{
|
||||
name: "zip with prepended bytes",
|
||||
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
|
||||
expected: 36,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
archivePath := test.archivePrep(t)
|
||||
f, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
t.Fatalf("could not open archive %q: %+v", archivePath, err)
|
||||
}
|
||||
fi, err := os.Stat(f.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("unable to stat archive: %+v", err)
|
||||
}
|
||||
|
||||
actual, err := findArchiveStartOffset(f, fi.Size())
|
||||
if err != nil {
|
||||
t.Fatalf("unable to find offset: %+v", err)
|
||||
}
|
||||
assert.Equal(t, test.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -27,7 +27,6 @@ func AllTypes() []any {
|
||||
pkg.ELFBinaryPackageNoteJSONPayload{},
|
||||
pkg.ElixirMixLockEntry{},
|
||||
pkg.ErlangRebarLockEntry{},
|
||||
pkg.GGUFFileHeader{},
|
||||
pkg.GitHubActionsUseStatement{},
|
||||
pkg.GolangBinaryBuildinfoEntry{},
|
||||
pkg.GolangModuleEntry{},
|
||||
|
||||
@ -124,7 +124,6 @@ var jsonTypes = makeJSONTypes(
|
||||
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
|
||||
)
|
||||
|
||||
func expandLegacyNameVariants(names ...string) []string {
|
||||
|
||||
@ -3,7 +3,6 @@ package task
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/ai"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||
@ -179,7 +178,6 @@ func DefaultPackageTaskFactories() Factories {
|
||||
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
||||
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
||||
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
||||
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
||||
|
||||
// deprecated catalogers ////////////////////////////////////////
|
||||
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
||||
|
||||
@ -4,8 +4,7 @@ import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/archiver/v3"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/internal/sbomsync"
|
||||
"github.com/anchore/syft/syft/cataloging"
|
||||
@ -58,10 +57,9 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
|
||||
}
|
||||
|
||||
if c.IncludeUnexpandedArchives {
|
||||
ctx := context.Background()
|
||||
for coords := range s.Artifacts.FileMetadata {
|
||||
format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil)
|
||||
if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
||||
unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath)
|
||||
if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
||||
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
|
||||
}
|
||||
}
|
||||
|
||||
@ -130,8 +130,7 @@
|
||||
"description": "Digests contains file content hashes for integrity verification"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman."
|
||||
"type": "object"
|
||||
},
|
||||
"ApkDbEntry": {
|
||||
"properties": {
|
||||
@ -434,19 +433,16 @@
|
||||
"CPE": {
|
||||
"properties": {
|
||||
"cpe": {
|
||||
"type": "string",
|
||||
"description": "Value is the CPE string identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"source": {
|
||||
"type": "string",
|
||||
"description": "Source is the source where this CPE was obtained or generated from."
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"cpe"
|
||||
],
|
||||
"description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases."
|
||||
]
|
||||
},
|
||||
"ClassifierMatch": {
|
||||
"properties": {
|
||||
@ -751,23 +747,19 @@
|
||||
"Descriptor": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")."
|
||||
"type": "string"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the version of the tool that generated this SBOM."
|
||||
"type": "string"
|
||||
},
|
||||
"configuration": {
|
||||
"description": "Configuration contains the tool configuration used during SBOM generation."
|
||||
}
|
||||
"configuration": true
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"name",
|
||||
"version"
|
||||
],
|
||||
"description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation."
|
||||
"description": "Descriptor describes what created the document as well as surrounding metadata"
|
||||
},
|
||||
"Digest": {
|
||||
"properties": {
|
||||
@ -1293,71 +1285,58 @@
|
||||
"File": {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "ID is a unique identifier for this file within the SBOM."
|
||||
"type": "string"
|
||||
},
|
||||
"location": {
|
||||
"$ref": "#/$defs/Coordinates",
|
||||
"description": "Location is the file path and layer information where this file was found."
|
||||
"$ref": "#/$defs/Coordinates"
|
||||
},
|
||||
"metadata": {
|
||||
"$ref": "#/$defs/FileMetadataEntry",
|
||||
"description": "Metadata contains filesystem metadata such as permissions, ownership, and file type."
|
||||
"$ref": "#/$defs/FileMetadataEntry"
|
||||
},
|
||||
"contents": {
|
||||
"type": "string",
|
||||
"description": "Contents is the file contents for small files."
|
||||
"type": "string"
|
||||
},
|
||||
"digests": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/Digest"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Digests contains cryptographic hashes of the file contents."
|
||||
"type": "array"
|
||||
},
|
||||
"licenses": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/FileLicense"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Licenses contains license information discovered within this file."
|
||||
"type": "array"
|
||||
},
|
||||
"executable": {
|
||||
"$ref": "#/$defs/Executable",
|
||||
"description": "Executable contains executable metadata if this file is a binary."
|
||||
"$ref": "#/$defs/Executable"
|
||||
},
|
||||
"unknowns": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Unknowns contains unknown fields for forward compatibility."
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"id",
|
||||
"location"
|
||||
],
|
||||
"description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages."
|
||||
]
|
||||
},
|
||||
"FileLicense": {
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Value is the raw license identifier or text as found in the file."
|
||||
"type": "string"
|
||||
},
|
||||
"spdxExpression": {
|
||||
"type": "string",
|
||||
"description": "SPDXExpression is the parsed SPDX license expression."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
|
||||
"type": "string"
|
||||
},
|
||||
"evidence": {
|
||||
"$ref": "#/$defs/FileLicenseEvidence",
|
||||
"description": "Evidence contains supporting evidence for this license detection."
|
||||
"$ref": "#/$defs/FileLicenseEvidence"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -1365,22 +1344,18 @@
|
||||
"value",
|
||||
"spdxExpression",
|
||||
"type"
|
||||
],
|
||||
"description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression."
|
||||
]
|
||||
},
|
||||
"FileLicenseEvidence": {
|
||||
"properties": {
|
||||
"confidence": {
|
||||
"type": "integer",
|
||||
"description": "Confidence is the confidence score for this license detection (0-100)."
|
||||
"type": "integer"
|
||||
},
|
||||
"offset": {
|
||||
"type": "integer",
|
||||
"description": "Offset is the byte offset where the license text starts in the file."
|
||||
"type": "integer"
|
||||
},
|
||||
"extent": {
|
||||
"type": "integer",
|
||||
"description": "Extent is the length of the license text in bytes."
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -1388,38 +1363,30 @@
|
||||
"confidence",
|
||||
"offset",
|
||||
"extent"
|
||||
],
|
||||
"description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level."
|
||||
]
|
||||
},
|
||||
"FileMetadataEntry": {
|
||||
"properties": {
|
||||
"mode": {
|
||||
"type": "integer",
|
||||
"description": "Mode is the Unix file permission mode in octal format."
|
||||
"type": "integer"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")."
|
||||
"type": "string"
|
||||
},
|
||||
"linkDestination": {
|
||||
"type": "string",
|
||||
"description": "LinkDestination is the target path for symbolic links."
|
||||
"type": "string"
|
||||
},
|
||||
"userID": {
|
||||
"type": "integer",
|
||||
"description": "UserID is the file owner user ID."
|
||||
"type": "integer"
|
||||
},
|
||||
"groupID": {
|
||||
"type": "integer",
|
||||
"description": "GroupID is the file owner group ID."
|
||||
"type": "integer"
|
||||
},
|
||||
"mimeType": {
|
||||
"type": "string",
|
||||
"description": "MIMEType is the MIME type of the file contents."
|
||||
"type": "string"
|
||||
},
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "Size is the file size in bytes."
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -1430,8 +1397,7 @@
|
||||
"groupID",
|
||||
"mimeType",
|
||||
"size"
|
||||
],
|
||||
"description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
|
||||
]
|
||||
},
|
||||
"GithubActionsUseStatement": {
|
||||
"properties": {
|
||||
@ -1579,8 +1545,7 @@
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field."
|
||||
"type": "array"
|
||||
},
|
||||
"JavaArchive": {
|
||||
"properties": {
|
||||
@ -2009,34 +1974,28 @@
|
||||
"License": {
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Value is the raw license identifier or expression as found."
|
||||
"type": "string"
|
||||
},
|
||||
"spdxExpression": {
|
||||
"type": "string",
|
||||
"description": "SPDXExpression is the parsed SPDX license expression."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
|
||||
"type": "string"
|
||||
},
|
||||
"urls": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "URLs are URLs where license text or information can be found."
|
||||
"type": "array"
|
||||
},
|
||||
"locations": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/Location"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Locations are file locations where this license was discovered."
|
||||
"type": "array"
|
||||
},
|
||||
"contents": {
|
||||
"type": "string",
|
||||
"description": "Contents is the full license text content."
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -2046,8 +2005,7 @@
|
||||
"type",
|
||||
"urls",
|
||||
"locations"
|
||||
],
|
||||
"description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations."
|
||||
]
|
||||
},
|
||||
"LinuxKernelArchive": {
|
||||
"properties": {
|
||||
@ -2172,84 +2130,64 @@
|
||||
"LinuxRelease": {
|
||||
"properties": {
|
||||
"prettyName": {
|
||||
"type": "string",
|
||||
"description": "PrettyName is a human-readable operating system name with version."
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name is the operating system name without version information."
|
||||
"type": "string"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")."
|
||||
"type": "string"
|
||||
},
|
||||
"idLike": {
|
||||
"$ref": "#/$defs/IDLikes",
|
||||
"description": "IDLike is a list of operating system IDs this distribution is similar to or derived from."
|
||||
"$ref": "#/$defs/IDLikes"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the operating system version including codename if available."
|
||||
"type": "string"
|
||||
},
|
||||
"versionID": {
|
||||
"type": "string",
|
||||
"description": "VersionID is the operating system version number or identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"versionCodename": {
|
||||
"type": "string",
|
||||
"description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")."
|
||||
"type": "string"
|
||||
},
|
||||
"buildID": {
|
||||
"type": "string",
|
||||
"description": "BuildID is a build identifier for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"imageID": {
|
||||
"type": "string",
|
||||
"description": "ImageID is an identifier for container or cloud images."
|
||||
"type": "string"
|
||||
},
|
||||
"imageVersion": {
|
||||
"type": "string",
|
||||
"description": "ImageVersion is the version for container or cloud images."
|
||||
"type": "string"
|
||||
},
|
||||
"variant": {
|
||||
"type": "string",
|
||||
"description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")."
|
||||
"type": "string"
|
||||
},
|
||||
"variantID": {
|
||||
"type": "string",
|
||||
"description": "VariantID is the lower-case operating system variant identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"homeURL": {
|
||||
"type": "string",
|
||||
"description": "HomeURL is the homepage URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"supportURL": {
|
||||
"type": "string",
|
||||
"description": "SupportURL is the support or help URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"bugReportURL": {
|
||||
"type": "string",
|
||||
"description": "BugReportURL is the bug reporting URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"privacyPolicyURL": {
|
||||
"type": "string",
|
||||
"description": "PrivacyPolicyURL is the privacy policy URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"cpeName": {
|
||||
"type": "string",
|
||||
"description": "CPEName is the Common Platform Enumeration name for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"supportEnd": {
|
||||
"type": "string",
|
||||
"description": "SupportEnd is the end of support date or version identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"extendedSupport": {
|
||||
"type": "boolean",
|
||||
"description": "ExtendedSupport indicates whether extended security or support is available."
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files."
|
||||
"type": "object"
|
||||
},
|
||||
"Location": {
|
||||
"properties": {
|
||||
@ -2345,7 +2283,7 @@
|
||||
"product_id",
|
||||
"kb"
|
||||
],
|
||||
"description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)."
|
||||
"description": "MicrosoftKbPatch is slightly odd in how it is expected to map onto data."
|
||||
},
|
||||
"NixDerivation": {
|
||||
"properties": {
|
||||
@ -3076,8 +3014,7 @@
|
||||
"type": "object",
|
||||
"required": [
|
||||
"integrity"
|
||||
],
|
||||
"description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification."
|
||||
]
|
||||
},
|
||||
"PortageDbEntry": {
|
||||
"properties": {
|
||||
@ -3564,28 +3501,22 @@
|
||||
"Relationship": {
|
||||
"properties": {
|
||||
"parent": {
|
||||
"type": "string",
|
||||
"description": "Parent is the ID of the parent artifact in this relationship."
|
||||
"type": "string"
|
||||
},
|
||||
"child": {
|
||||
"type": "string",
|
||||
"description": "Child is the ID of the child artifact in this relationship."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")."
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Metadata contains additional relationship-specific metadata."
|
||||
}
|
||||
"metadata": true
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"parent",
|
||||
"child",
|
||||
"type"
|
||||
],
|
||||
"description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package."
|
||||
]
|
||||
},
|
||||
"RpmArchive": {
|
||||
"properties": {
|
||||
@ -3932,20 +3863,17 @@
|
||||
"Schema": {
|
||||
"properties": {
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the JSON schema version for this document format."
|
||||
"type": "string"
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL is the URL to the JSON schema definition document."
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"version",
|
||||
"url"
|
||||
],
|
||||
"description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format."
|
||||
]
|
||||
},
|
||||
"SnapEntry": {
|
||||
"properties": {
|
||||
@ -3983,28 +3911,21 @@
|
||||
"Source": {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "ID is a unique identifier for the analyzed source artifact."
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name is the name of the analyzed artifact (e.g., image name, directory path)."
|
||||
"type": "string"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the version of the analyzed artifact (e.g., image tag)."
|
||||
"type": "string"
|
||||
},
|
||||
"supplier": {
|
||||
"type": "string",
|
||||
"description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")."
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Metadata contains additional source-specific metadata."
|
||||
}
|
||||
"metadata": true
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
@ -4014,7 +3935,7 @@
|
||||
"type",
|
||||
"metadata"
|
||||
],
|
||||
"description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive."
|
||||
"description": "Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill the NTIA minimum elements."
|
||||
},
|
||||
"SwiftPackageManagerLockEntry": {
|
||||
"properties": {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "anchore.io/schema/syft/json/16.0.43/document",
|
||||
"$id": "anchore.io/schema/syft/json/16.0.42/document",
|
||||
"$ref": "#/$defs/Document",
|
||||
"$defs": {
|
||||
"AlpmDbEntry": {
|
||||
@ -130,8 +130,7 @@
|
||||
"description": "Digests contains file content hashes for integrity verification"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman."
|
||||
"type": "object"
|
||||
},
|
||||
"ApkDbEntry": {
|
||||
"properties": {
|
||||
@ -434,19 +433,16 @@
|
||||
"CPE": {
|
||||
"properties": {
|
||||
"cpe": {
|
||||
"type": "string",
|
||||
"description": "Value is the CPE string identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"source": {
|
||||
"type": "string",
|
||||
"description": "Source is the source where this CPE was obtained or generated from."
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"cpe"
|
||||
],
|
||||
"description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases."
|
||||
]
|
||||
},
|
||||
"ClassifierMatch": {
|
||||
"properties": {
|
||||
@ -751,23 +747,19 @@
|
||||
"Descriptor": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")."
|
||||
"type": "string"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the version of the tool that generated this SBOM."
|
||||
"type": "string"
|
||||
},
|
||||
"configuration": {
|
||||
"description": "Configuration contains the tool configuration used during SBOM generation."
|
||||
}
|
||||
"configuration": true
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"name",
|
||||
"version"
|
||||
],
|
||||
"description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation."
|
||||
"description": "Descriptor describes what created the document as well as surrounding metadata"
|
||||
},
|
||||
"Digest": {
|
||||
"properties": {
|
||||
@ -1293,71 +1285,58 @@
|
||||
"File": {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "ID is a unique identifier for this file within the SBOM."
|
||||
"type": "string"
|
||||
},
|
||||
"location": {
|
||||
"$ref": "#/$defs/Coordinates",
|
||||
"description": "Location is the file path and layer information where this file was found."
|
||||
"$ref": "#/$defs/Coordinates"
|
||||
},
|
||||
"metadata": {
|
||||
"$ref": "#/$defs/FileMetadataEntry",
|
||||
"description": "Metadata contains filesystem metadata such as permissions, ownership, and file type."
|
||||
"$ref": "#/$defs/FileMetadataEntry"
|
||||
},
|
||||
"contents": {
|
||||
"type": "string",
|
||||
"description": "Contents is the file contents for small files."
|
||||
"type": "string"
|
||||
},
|
||||
"digests": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/Digest"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Digests contains cryptographic hashes of the file contents."
|
||||
"type": "array"
|
||||
},
|
||||
"licenses": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/FileLicense"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Licenses contains license information discovered within this file."
|
||||
"type": "array"
|
||||
},
|
||||
"executable": {
|
||||
"$ref": "#/$defs/Executable",
|
||||
"description": "Executable contains executable metadata if this file is a binary."
|
||||
"$ref": "#/$defs/Executable"
|
||||
},
|
||||
"unknowns": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Unknowns contains unknown fields for forward compatibility."
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"id",
|
||||
"location"
|
||||
],
|
||||
"description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages."
|
||||
]
|
||||
},
|
||||
"FileLicense": {
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Value is the raw license identifier or text as found in the file."
|
||||
"type": "string"
|
||||
},
|
||||
"spdxExpression": {
|
||||
"type": "string",
|
||||
"description": "SPDXExpression is the parsed SPDX license expression."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
|
||||
"type": "string"
|
||||
},
|
||||
"evidence": {
|
||||
"$ref": "#/$defs/FileLicenseEvidence",
|
||||
"description": "Evidence contains supporting evidence for this license detection."
|
||||
"$ref": "#/$defs/FileLicenseEvidence"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -1365,22 +1344,18 @@
|
||||
"value",
|
||||
"spdxExpression",
|
||||
"type"
|
||||
],
|
||||
"description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression."
|
||||
]
|
||||
},
|
||||
"FileLicenseEvidence": {
|
||||
"properties": {
|
||||
"confidence": {
|
||||
"type": "integer",
|
||||
"description": "Confidence is the confidence score for this license detection (0-100)."
|
||||
"type": "integer"
|
||||
},
|
||||
"offset": {
|
||||
"type": "integer",
|
||||
"description": "Offset is the byte offset where the license text starts in the file."
|
||||
"type": "integer"
|
||||
},
|
||||
"extent": {
|
||||
"type": "integer",
|
||||
"description": "Extent is the length of the license text in bytes."
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -1388,38 +1363,30 @@
|
||||
"confidence",
|
||||
"offset",
|
||||
"extent"
|
||||
],
|
||||
"description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level."
|
||||
]
|
||||
},
|
||||
"FileMetadataEntry": {
|
||||
"properties": {
|
||||
"mode": {
|
||||
"type": "integer",
|
||||
"description": "Mode is the Unix file permission mode in octal format."
|
||||
"type": "integer"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")."
|
||||
"type": "string"
|
||||
},
|
||||
"linkDestination": {
|
||||
"type": "string",
|
||||
"description": "LinkDestination is the target path for symbolic links."
|
||||
"type": "string"
|
||||
},
|
||||
"userID": {
|
||||
"type": "integer",
|
||||
"description": "UserID is the file owner user ID."
|
||||
"type": "integer"
|
||||
},
|
||||
"groupID": {
|
||||
"type": "integer",
|
||||
"description": "GroupID is the file owner group ID."
|
||||
"type": "integer"
|
||||
},
|
||||
"mimeType": {
|
||||
"type": "string",
|
||||
"description": "MIMEType is the MIME type of the file contents."
|
||||
"type": "string"
|
||||
},
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "Size is the file size in bytes."
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -1430,50 +1397,7 @@
|
||||
"groupID",
|
||||
"mimeType",
|
||||
"size"
|
||||
],
|
||||
"description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
|
||||
},
|
||||
"GgufFileHeader": {
|
||||
"properties": {
|
||||
"ggufVersion": {
|
||||
"type": "integer",
|
||||
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
|
||||
},
|
||||
"fileSize": {
|
||||
"type": "integer",
|
||||
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||
},
|
||||
"architecture": {
|
||||
"type": "string",
|
||||
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
|
||||
},
|
||||
"quantization": {
|
||||
"type": "string",
|
||||
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
|
||||
},
|
||||
"parameters": {
|
||||
"type": "integer",
|
||||
"description": "Parameters is the number of model parameters (if present in header)"
|
||||
},
|
||||
"tensorCount": {
|
||||
"type": "integer",
|
||||
"description": "TensorCount is the number of tensors in the model"
|
||||
},
|
||||
"header": {
|
||||
"type": "object",
|
||||
"description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
|
||||
},
|
||||
"metadataHash": {
|
||||
"type": "string",
|
||||
"description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"ggufVersion",
|
||||
"tensorCount"
|
||||
],
|
||||
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
|
||||
]
|
||||
},
|
||||
"GithubActionsUseStatement": {
|
||||
"properties": {
|
||||
@ -1621,8 +1545,7 @@
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field."
|
||||
"type": "array"
|
||||
},
|
||||
"JavaArchive": {
|
||||
"properties": {
|
||||
@ -2051,34 +1974,28 @@
|
||||
"License": {
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Value is the raw license identifier or expression as found."
|
||||
"type": "string"
|
||||
},
|
||||
"spdxExpression": {
|
||||
"type": "string",
|
||||
"description": "SPDXExpression is the parsed SPDX license expression."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the license type classification (e.g., declared, concluded, discovered)."
|
||||
"type": "string"
|
||||
},
|
||||
"urls": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "URLs are URLs where license text or information can be found."
|
||||
"type": "array"
|
||||
},
|
||||
"locations": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/Location"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Locations are file locations where this license was discovered."
|
||||
"type": "array"
|
||||
},
|
||||
"contents": {
|
||||
"type": "string",
|
||||
"description": "Contents is the full license text content."
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
@ -2088,8 +2005,7 @@
|
||||
"type",
|
||||
"urls",
|
||||
"locations"
|
||||
],
|
||||
"description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations."
|
||||
]
|
||||
},
|
||||
"LinuxKernelArchive": {
|
||||
"properties": {
|
||||
@ -2214,84 +2130,64 @@
|
||||
"LinuxRelease": {
|
||||
"properties": {
|
||||
"prettyName": {
|
||||
"type": "string",
|
||||
"description": "PrettyName is a human-readable operating system name with version."
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name is the operating system name without version information."
|
||||
"type": "string"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")."
|
||||
"type": "string"
|
||||
},
|
||||
"idLike": {
|
||||
"$ref": "#/$defs/IDLikes",
|
||||
"description": "IDLike is a list of operating system IDs this distribution is similar to or derived from."
|
||||
"$ref": "#/$defs/IDLikes"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the operating system version including codename if available."
|
||||
"type": "string"
|
||||
},
|
||||
"versionID": {
|
||||
"type": "string",
|
||||
"description": "VersionID is the operating system version number or identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"versionCodename": {
|
||||
"type": "string",
|
||||
"description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")."
|
||||
"type": "string"
|
||||
},
|
||||
"buildID": {
|
||||
"type": "string",
|
||||
"description": "BuildID is a build identifier for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"imageID": {
|
||||
"type": "string",
|
||||
"description": "ImageID is an identifier for container or cloud images."
|
||||
"type": "string"
|
||||
},
|
||||
"imageVersion": {
|
||||
"type": "string",
|
||||
"description": "ImageVersion is the version for container or cloud images."
|
||||
"type": "string"
|
||||
},
|
||||
"variant": {
|
||||
"type": "string",
|
||||
"description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")."
|
||||
"type": "string"
|
||||
},
|
||||
"variantID": {
|
||||
"type": "string",
|
||||
"description": "VariantID is the lower-case operating system variant identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"homeURL": {
|
||||
"type": "string",
|
||||
"description": "HomeURL is the homepage URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"supportURL": {
|
||||
"type": "string",
|
||||
"description": "SupportURL is the support or help URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"bugReportURL": {
|
||||
"type": "string",
|
||||
"description": "BugReportURL is the bug reporting URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"privacyPolicyURL": {
|
||||
"type": "string",
|
||||
"description": "PrivacyPolicyURL is the privacy policy URL for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"cpeName": {
|
||||
"type": "string",
|
||||
"description": "CPEName is the Common Platform Enumeration name for the operating system."
|
||||
"type": "string"
|
||||
},
|
||||
"supportEnd": {
|
||||
"type": "string",
|
||||
"description": "SupportEnd is the end of support date or version identifier."
|
||||
"type": "string"
|
||||
},
|
||||
"extendedSupport": {
|
||||
"type": "boolean",
|
||||
"description": "ExtendedSupport indicates whether extended security or support is available."
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files."
|
||||
"type": "object"
|
||||
},
|
||||
"Location": {
|
||||
"properties": {
|
||||
@ -2387,7 +2283,7 @@
|
||||
"product_id",
|
||||
"kb"
|
||||
],
|
||||
"description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)."
|
||||
"description": "MicrosoftKbPatch is slightly odd in how it is expected to map onto data."
|
||||
},
|
||||
"NixDerivation": {
|
||||
"properties": {
|
||||
@ -2621,9 +2517,6 @@
|
||||
{
|
||||
"$ref": "#/$defs/ErlangRebarLockEntry"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/GgufFileHeader"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/GithubActionsUseStatement"
|
||||
},
|
||||
@ -3121,8 +3014,7 @@
|
||||
"type": "object",
|
||||
"required": [
|
||||
"integrity"
|
||||
],
|
||||
"description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification."
|
||||
]
|
||||
},
|
||||
"PortageDbEntry": {
|
||||
"properties": {
|
||||
@ -3609,28 +3501,22 @@
|
||||
"Relationship": {
|
||||
"properties": {
|
||||
"parent": {
|
||||
"type": "string",
|
||||
"description": "Parent is the ID of the parent artifact in this relationship."
|
||||
"type": "string"
|
||||
},
|
||||
"child": {
|
||||
"type": "string",
|
||||
"description": "Child is the ID of the child artifact in this relationship."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")."
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Metadata contains additional relationship-specific metadata."
|
||||
}
|
||||
"metadata": true
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"parent",
|
||||
"child",
|
||||
"type"
|
||||
],
|
||||
"description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package."
|
||||
]
|
||||
},
|
||||
"RpmArchive": {
|
||||
"properties": {
|
||||
@ -3977,20 +3863,17 @@
|
||||
"Schema": {
|
||||
"properties": {
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the JSON schema version for this document format."
|
||||
"type": "string"
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL is the URL to the JSON schema definition document."
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"version",
|
||||
"url"
|
||||
],
|
||||
"description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format."
|
||||
]
|
||||
},
|
||||
"SnapEntry": {
|
||||
"properties": {
|
||||
@ -4028,28 +3911,21 @@
|
||||
"Source": {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "ID is a unique identifier for the analyzed source artifact."
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name is the name of the analyzed artifact (e.g., image name, directory path)."
|
||||
"type": "string"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version is the version of the analyzed artifact (e.g., image tag)."
|
||||
"type": "string"
|
||||
},
|
||||
"supplier": {
|
||||
"type": "string",
|
||||
"description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance."
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")."
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Metadata contains additional source-specific metadata."
|
||||
}
|
||||
"metadata": true
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
@ -4059,7 +3935,7 @@
|
||||
"type",
|
||||
"metadata"
|
||||
],
|
||||
"description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive."
|
||||
"description": "Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill the NTIA minimum elements."
|
||||
},
|
||||
"SwiftPackageManagerLockEntry": {
|
||||
"properties": {
|
||||
|
||||
@ -1,95 +0,0 @@
|
||||
package cpes
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/cpe"
|
||||
"github.com/anchore/syft/syft/format/internal"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/sbom"
|
||||
)
|
||||
|
||||
const ID sbom.FormatID = "cpes"
|
||||
const version = "1"
|
||||
|
||||
var _ sbom.FormatDecoder = (*decoder)(nil)
|
||||
|
||||
type decoder struct{}
|
||||
|
||||
func NewFormatDecoder() sbom.FormatDecoder {
|
||||
return decoder{}
|
||||
}
|
||||
|
||||
func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) {
|
||||
if r == nil {
|
||||
return nil, "", "", fmt.Errorf("no reader provided")
|
||||
}
|
||||
s, err := toSyftModel(r)
|
||||
return s, ID, version, err
|
||||
}
|
||||
|
||||
func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
|
||||
if r == nil {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
// skip whitespace only lines
|
||||
continue
|
||||
}
|
||||
|
||||
err := cpe.ValidateString(line)
|
||||
if err != nil {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
return ID, version
|
||||
}
|
||||
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func toSyftModel(r io.Reader) (*sbom.SBOM, error) {
|
||||
var errs []error
|
||||
pkgs := pkg.NewCollection()
|
||||
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// skip invalid CPEs
|
||||
c, err := cpe.New(line, "")
|
||||
if err != nil {
|
||||
log.WithFields("error", err, "line", line).Debug("unable to parse cpe")
|
||||
continue
|
||||
}
|
||||
|
||||
p := pkg.Package{
|
||||
Name: c.Attributes.Product,
|
||||
Version: c.Attributes.Version,
|
||||
CPEs: []cpe.CPE{c},
|
||||
}
|
||||
|
||||
internal.Backfill(&p)
|
||||
p.SetID()
|
||||
pkgs.Add(p)
|
||||
}
|
||||
|
||||
return &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkgs,
|
||||
},
|
||||
}, errors.Join(errs...)
|
||||
}
|
||||
@ -1,171 +0,0 @@
|
||||
package cpes
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/cpe"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/sbom"
|
||||
)
|
||||
|
||||
func Test_CPEProvider(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
userInput string
|
||||
sbom *sbom.SBOM
|
||||
}{
|
||||
{
|
||||
name: "takes a single cpe",
|
||||
userInput: "cpe:/a:apache:log4j:2.14.1",
|
||||
sbom: &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(pkg.Package{
|
||||
Name: "log4j",
|
||||
Version: "2.14.1",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:/a:apache:log4j:2.14.1", ""),
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "takes multiple cpes",
|
||||
userInput: `cpe:/a:apache:log4j:2.14.1
|
||||
cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;
|
||||
cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;
|
||||
cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;`,
|
||||
sbom: &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(
|
||||
pkg.Package{
|
||||
Name: "log4j",
|
||||
Version: "2.14.1",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:/a:apache:log4j:2.14.1", ""),
|
||||
},
|
||||
},
|
||||
pkg.Package{
|
||||
Name: "nginx",
|
||||
Version: "",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;", ""),
|
||||
},
|
||||
},
|
||||
pkg.Package{
|
||||
Name: "nginx",
|
||||
Version: "0.5.2",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;", ""),
|
||||
},
|
||||
},
|
||||
pkg.Package{
|
||||
Name: "nginx",
|
||||
Version: "0.5.3",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;", ""),
|
||||
},
|
||||
},
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "takes cpe with no version",
|
||||
userInput: "cpe:/a:apache:log4j",
|
||||
sbom: &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(pkg.Package{
|
||||
Name: "log4j",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:/a:apache:log4j", ""),
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "takes CPE 2.3 format",
|
||||
userInput: "cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*",
|
||||
sbom: &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(pkg.Package{
|
||||
Name: "log4j",
|
||||
Version: "2.14.1",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*", ""),
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "deduces target SW from CPE - known target_sw",
|
||||
userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*",
|
||||
sbom: &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(pkg.Package{
|
||||
Name: "opensearch",
|
||||
Type: pkg.GemPkg,
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "handles unknown target_sw CPE field",
|
||||
userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*",
|
||||
sbom: &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(pkg.Package{
|
||||
Name: "opensearch",
|
||||
Type: "",
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*", ""),
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid prefix",
|
||||
userInput: "dir:test-fixtures/cpe",
|
||||
sbom: &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
syftPkgOpts := []cmp.Option{
|
||||
cmpopts.IgnoreFields(pkg.Package{}, "id", "Language"),
|
||||
cmpopts.IgnoreUnexported(pkg.Package{}, file.LocationSet{}, pkg.LicenseSet{}),
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
dec := NewFormatDecoder()
|
||||
|
||||
decodedSBOM, _, _, err := dec.Decode(strings.NewReader(tc.userInput))
|
||||
require.NoError(t, err)
|
||||
|
||||
gotSyftPkgs := decodedSBOM.Artifacts.Packages.Sorted()
|
||||
wantSyftPkgs := tc.sbom.Artifacts.Packages.Sorted()
|
||||
require.Equal(t, len(gotSyftPkgs), len(wantSyftPkgs))
|
||||
for idx, wantPkg := range wantSyftPkgs {
|
||||
if d := cmp.Diff(wantPkg, gotSyftPkgs[idx], syftPkgOpts...); d != "" {
|
||||
t.Errorf("unexpected Syft Package (-want +got):\n%s", d)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -3,7 +3,6 @@ package format
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/anchore/syft/syft/format/cpes"
|
||||
"github.com/anchore/syft/syft/format/cyclonedxjson"
|
||||
"github.com/anchore/syft/syft/format/cyclonedxxml"
|
||||
"github.com/anchore/syft/syft/format/purls"
|
||||
@ -27,7 +26,6 @@ func Decoders() []sbom.FormatDecoder {
|
||||
spdxtagvalue.NewFormatDecoder(),
|
||||
spdxjson.NewFormatDecoder(),
|
||||
purls.NewFormatDecoder(),
|
||||
cpes.NewFormatDecoder(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,13 +1,11 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
|
||||
"github.com/anchore/archiver/v3"
|
||||
"github.com/anchore/packageurl-go"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
@ -155,8 +153,8 @@ func trimRelative(s string) string {
|
||||
|
||||
// isArchive returns true if the path appears to be an archive
|
||||
func isArchive(path string) bool {
|
||||
format, _, err := archives.Identify(context.Background(), path, nil)
|
||||
return err == nil && format != nil
|
||||
_, err := archiver.ByExtension(path)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) {
|
||||
|
||||
@ -10,31 +10,13 @@ import (
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/cpe"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
cataloger "github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
|
||||
)
|
||||
|
||||
// Backfill takes all information present in the package and attempts to fill in any missing information
|
||||
// from any available sources, such as the Metadata, PURL, or CPEs.
|
||||
// from any available sources, such as the Metadata and PURL.
|
||||
//
|
||||
// Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date
|
||||
func Backfill(p *pkg.Package) {
|
||||
backfillFromPurl(p)
|
||||
backfillFromCPE(p)
|
||||
}
|
||||
|
||||
func backfillFromCPE(p *pkg.Package) {
|
||||
if len(p.CPEs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
c := p.CPEs[0]
|
||||
|
||||
if p.Type == "" {
|
||||
p.Type = cataloger.TargetSoftwareToPackageType(c.Attributes.TargetSW)
|
||||
}
|
||||
}
|
||||
|
||||
func backfillFromPurl(p *pkg.Package) {
|
||||
if p.PURL == "" {
|
||||
return
|
||||
}
|
||||
|
||||
@ -121,20 +121,6 @@ func Test_Backfill(t *testing.T) {
|
||||
Metadata: pkg.JavaArchive{},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "target-sw from CPE",
|
||||
in: pkg.Package{
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
|
||||
},
|
||||
},
|
||||
expected: pkg.Package{
|
||||
CPEs: []cpe.CPE{
|
||||
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
|
||||
},
|
||||
Type: pkg.GemPkg,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
||||
@ -40,11 +40,8 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
|
||||
}
|
||||
|
||||
componentType := cyclonedx.ComponentTypeLibrary
|
||||
switch p.Type {
|
||||
case pkg.BinaryPkg:
|
||||
if p.Type == pkg.BinaryPkg {
|
||||
componentType = cyclonedx.ComponentTypeApplication
|
||||
case pkg.ModelPkg:
|
||||
componentType = cyclonedx.ComponentTypeMachineLearningModel
|
||||
}
|
||||
|
||||
return cyclonedx.Component{
|
||||
|
||||
@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
|
||||
switch component.Type {
|
||||
case cyclonedx.ComponentTypeOS:
|
||||
case cyclonedx.ComponentTypeContainer:
|
||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
|
||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary:
|
||||
p := decodeComponent(component)
|
||||
idMap[component.BOMRef] = p
|
||||
if component.BOMRef != "" {
|
||||
|
||||
@ -55,7 +55,6 @@ func Test_OriginatorSupplier(t *testing.T) {
|
||||
pkg.OpamPackage{},
|
||||
pkg.YarnLockEntry{},
|
||||
pkg.TerraformLockProviderEntry{},
|
||||
pkg.GGUFFileHeader{},
|
||||
)
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@ -82,8 +82,6 @@ func SourceInfo(p pkg.Package) string {
|
||||
answer = "acquired package info from Homebrew formula"
|
||||
case pkg.TerraformPkg:
|
||||
answer = "acquired package info from Terraform dependency lock file"
|
||||
case pkg.ModelPkg:
|
||||
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
||||
default:
|
||||
answer = "acquired package info from the following paths"
|
||||
}
|
||||
|
||||
@ -351,14 +351,6 @@ func Test_SourceInfo(t *testing.T) {
|
||||
"acquired package info from Terraform dependency lock file",
|
||||
},
|
||||
},
|
||||
{
|
||||
input: pkg.Package{
|
||||
Type: pkg.ModelPkg,
|
||||
},
|
||||
expected: []string{
|
||||
"",
|
||||
},
|
||||
},
|
||||
}
|
||||
var pkgTypes []pkg.Type
|
||||
for _, test := range tests {
|
||||
|
||||
@ -35,23 +35,14 @@ func (d *Document) UnmarshalJSON(data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation.
|
||||
// Descriptor describes what created the document as well as surrounding metadata
|
||||
type Descriptor struct {
|
||||
// Name is the name of the tool that generated this SBOM (e.g., "syft").
|
||||
Name string `json:"name"`
|
||||
|
||||
// Version is the version of the tool that generated this SBOM.
|
||||
Version string `json:"version"`
|
||||
|
||||
// Configuration contains the tool configuration used during SBOM generation.
|
||||
Name string `json:"name"`
|
||||
Version string `json:"version"`
|
||||
Configuration interface{} `json:"configuration,omitempty"`
|
||||
}
|
||||
|
||||
// Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format.
|
||||
type Schema struct {
|
||||
// Version is the JSON schema version for this document format.
|
||||
Version string `json:"version"`
|
||||
|
||||
// URL is the URL to the JSON schema definition document.
|
||||
URL string `json:"url"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
@ -10,55 +10,25 @@ import (
|
||||
"github.com/anchore/syft/syft/license"
|
||||
)
|
||||
|
||||
// File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages.
|
||||
type File struct {
|
||||
// ID is a unique identifier for this file within the SBOM.
|
||||
ID string `json:"id"`
|
||||
|
||||
// Location is the file path and layer information where this file was found.
|
||||
Location file.Coordinates `json:"location"`
|
||||
|
||||
// Metadata contains filesystem metadata such as permissions, ownership, and file type.
|
||||
Metadata *FileMetadataEntry `json:"metadata,omitempty"`
|
||||
|
||||
// Contents is the file contents for small files.
|
||||
Contents string `json:"contents,omitempty"`
|
||||
|
||||
// Digests contains cryptographic hashes of the file contents.
|
||||
Digests []file.Digest `json:"digests,omitempty"`
|
||||
|
||||
// Licenses contains license information discovered within this file.
|
||||
Licenses []FileLicense `json:"licenses,omitempty"`
|
||||
|
||||
// Executable contains executable metadata if this file is a binary.
|
||||
Executable *file.Executable `json:"executable,omitempty"`
|
||||
|
||||
// Unknowns contains unknown fields for forward compatibility.
|
||||
Unknowns []string `json:"unknowns,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Location file.Coordinates `json:"location"`
|
||||
Metadata *FileMetadataEntry `json:"metadata,omitempty"`
|
||||
Contents string `json:"contents,omitempty"`
|
||||
Digests []file.Digest `json:"digests,omitempty"`
|
||||
Licenses []FileLicense `json:"licenses,omitempty"`
|
||||
Executable *file.Executable `json:"executable,omitempty"`
|
||||
Unknowns []string `json:"unknowns,omitempty"`
|
||||
}
|
||||
|
||||
// FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file.
|
||||
type FileMetadataEntry struct {
|
||||
// Mode is the Unix file permission mode in octal format.
|
||||
Mode int `json:"mode"`
|
||||
|
||||
// Type is the file type (e.g., "RegularFile", "Directory", "SymbolicLink").
|
||||
Type string `json:"type"`
|
||||
|
||||
// LinkDestination is the target path for symbolic links.
|
||||
Mode int `json:"mode"`
|
||||
Type string `json:"type"`
|
||||
LinkDestination string `json:"linkDestination,omitempty"`
|
||||
|
||||
// UserID is the file owner user ID.
|
||||
UserID int `json:"userID"`
|
||||
|
||||
// GroupID is the file owner group ID.
|
||||
GroupID int `json:"groupID"`
|
||||
|
||||
// MIMEType is the MIME type of the file contents.
|
||||
MIMEType string `json:"mimeType"`
|
||||
|
||||
// Size is the file size in bytes.
|
||||
Size int64 `json:"size"`
|
||||
UserID int `json:"userID"`
|
||||
GroupID int `json:"groupID"`
|
||||
MIMEType string `json:"mimeType"`
|
||||
Size int64 `json:"size"`
|
||||
}
|
||||
|
||||
type auxFileMetadataEntry FileMetadataEntry
|
||||
@ -112,31 +82,17 @@ type sbomImportLegacyFileMetadataEntry struct {
|
||||
Size int64 `json:"Size"`
|
||||
}
|
||||
|
||||
// FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression.
|
||||
type FileLicense struct {
|
||||
// Value is the raw license identifier or text as found in the file.
|
||||
Value string `json:"value"`
|
||||
|
||||
// SPDXExpression is the parsed SPDX license expression.
|
||||
SPDXExpression string `json:"spdxExpression"`
|
||||
|
||||
// Type is the license type classification (e.g., declared, concluded, discovered).
|
||||
Type license.Type `json:"type"`
|
||||
|
||||
// Evidence contains supporting evidence for this license detection.
|
||||
Evidence *FileLicenseEvidence `json:"evidence,omitempty"`
|
||||
Value string `json:"value"`
|
||||
SPDXExpression string `json:"spdxExpression"`
|
||||
Type license.Type `json:"type"`
|
||||
Evidence *FileLicenseEvidence `json:"evidence,omitempty"`
|
||||
}
|
||||
|
||||
// FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level.
|
||||
type FileLicenseEvidence struct {
|
||||
// Confidence is the confidence score for this license detection (0-100).
|
||||
Confidence int `json:"confidence"`
|
||||
|
||||
// Offset is the byte offset where the license text starts in the file.
|
||||
Offset int `json:"offset"`
|
||||
|
||||
// Extent is the length of the license text in bytes.
|
||||
Extent int `json:"extent"`
|
||||
Offset int `json:"offset"`
|
||||
Extent int `json:"extent"`
|
||||
}
|
||||
|
||||
type intOrStringFileType struct {
|
||||
|
||||
@ -4,67 +4,28 @@ import (
|
||||
"encoding/json"
|
||||
)
|
||||
|
||||
// IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field.
|
||||
type IDLikes []string
|
||||
|
||||
// LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files.
|
||||
type LinuxRelease struct {
|
||||
// PrettyName is a human-readable operating system name with version.
|
||||
PrettyName string `json:"prettyName,omitempty"`
|
||||
|
||||
// Name is the operating system name without version information.
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// ID is the lower-case operating system identifier (e.g., "ubuntu", "rhel").
|
||||
ID string `json:"id,omitempty"`
|
||||
|
||||
// IDLike is a list of operating system IDs this distribution is similar to or derived from.
|
||||
IDLike IDLikes `json:"idLike,omitempty"`
|
||||
|
||||
// Version is the operating system version including codename if available.
|
||||
Version string `json:"version,omitempty"`
|
||||
|
||||
// VersionID is the operating system version number or identifier.
|
||||
VersionID string `json:"versionID,omitempty"`
|
||||
|
||||
// VersionCodename is the operating system release codename (e.g., "jammy", "bullseye").
|
||||
VersionCodename string `json:"versionCodename,omitempty"`
|
||||
|
||||
// BuildID is a build identifier for the operating system.
|
||||
BuildID string `json:"buildID,omitempty"`
|
||||
|
||||
// ImageID is an identifier for container or cloud images.
|
||||
ImageID string `json:"imageID,omitempty"`
|
||||
|
||||
// ImageVersion is the version for container or cloud images.
|
||||
ImageVersion string `json:"imageVersion,omitempty"`
|
||||
|
||||
// Variant is the operating system variant name (e.g., "Server", "Workstation").
|
||||
Variant string `json:"variant,omitempty"`
|
||||
|
||||
// VariantID is the lower-case operating system variant identifier.
|
||||
VariantID string `json:"variantID,omitempty"`
|
||||
|
||||
// HomeURL is the homepage URL for the operating system.
|
||||
HomeURL string `json:"homeURL,omitempty"`
|
||||
|
||||
// SupportURL is the support or help URL for the operating system.
|
||||
SupportURL string `json:"supportURL,omitempty"`
|
||||
|
||||
// BugReportURL is the bug reporting URL for the operating system.
|
||||
BugReportURL string `json:"bugReportURL,omitempty"`
|
||||
|
||||
// PrivacyPolicyURL is the privacy policy URL for the operating system.
|
||||
PrivacyPolicyURL string `json:"privacyPolicyURL,omitempty"`
|
||||
|
||||
// CPEName is the Common Platform Enumeration name for the operating system.
|
||||
CPEName string `json:"cpeName,omitempty"`
|
||||
|
||||
// SupportEnd is the end of support date or version identifier.
|
||||
SupportEnd string `json:"supportEnd,omitempty"`
|
||||
|
||||
// ExtendedSupport indicates whether extended security or support is available.
|
||||
ExtendedSupport bool `json:"extendedSupport,omitempty"`
|
||||
PrettyName string `json:"prettyName,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
IDLike IDLikes `json:"idLike,omitempty"`
|
||||
Version string `json:"version,omitempty"`
|
||||
VersionID string `json:"versionID,omitempty"`
|
||||
VersionCodename string `json:"versionCodename,omitempty"`
|
||||
BuildID string `json:"buildID,omitempty"`
|
||||
ImageID string `json:"imageID,omitempty"`
|
||||
ImageVersion string `json:"imageVersion,omitempty"`
|
||||
Variant string `json:"variant,omitempty"`
|
||||
VariantID string `json:"variantID,omitempty"`
|
||||
HomeURL string `json:"homeURL,omitempty"`
|
||||
SupportURL string `json:"supportURL,omitempty"`
|
||||
BugReportURL string `json:"bugReportURL,omitempty"`
|
||||
PrivacyPolicyURL string `json:"privacyPolicyURL,omitempty"`
|
||||
CPEName string `json:"cpeName,omitempty"`
|
||||
SupportEnd string `json:"supportEnd,omitempty"`
|
||||
ExtendedSupport bool `json:"extendedSupport,omitempty"`
|
||||
}
|
||||
|
||||
func (s *IDLikes) UnmarshalJSON(data []byte) error {
|
||||
|
||||
@ -36,40 +36,22 @@ type PackageBasicData struct {
|
||||
PURL string `json:"purl"`
|
||||
}
|
||||
|
||||
// cpes is a collection of Common Platform Enumeration identifiers for a package.
|
||||
type cpes []CPE
|
||||
|
||||
// CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases.
|
||||
type CPE struct {
|
||||
// Value is the CPE string identifier.
|
||||
Value string `json:"cpe"`
|
||||
|
||||
// Source is the source where this CPE was obtained or generated from.
|
||||
Value string `json:"cpe"`
|
||||
Source string `json:"source,omitempty"`
|
||||
}
|
||||
|
||||
// licenses is a collection of license findings associated with a package.
|
||||
type licenses []License
|
||||
|
||||
// License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations.
|
||||
type License struct {
|
||||
// Value is the raw license identifier or expression as found.
|
||||
Value string `json:"value"`
|
||||
|
||||
// SPDXExpression is the parsed SPDX license expression.
|
||||
SPDXExpression string `json:"spdxExpression"`
|
||||
|
||||
// Type is the license type classification (e.g., declared, concluded, discovered).
|
||||
Type license.Type `json:"type"`
|
||||
|
||||
// URLs are URLs where license text or information can be found.
|
||||
URLs []string `json:"urls"`
|
||||
|
||||
// Locations are file locations where this license was discovered.
|
||||
Locations []file.Location `json:"locations"`
|
||||
|
||||
// Contents is the full license text content.
|
||||
Contents string `json:"contents,omitempty"`
|
||||
Value string `json:"value"`
|
||||
SPDXExpression string `json:"spdxExpression"`
|
||||
Type license.Type `json:"type"`
|
||||
URLs []string `json:"urls"`
|
||||
Locations []file.Location `json:"locations"`
|
||||
Contents string `json:"contents,omitempty"`
|
||||
}
|
||||
|
||||
func newModelLicensesFromValues(licenses []string) (ml []License) {
|
||||
|
||||
@ -1,16 +1,8 @@
|
||||
package model
|
||||
|
||||
// Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package.
|
||||
type Relationship struct {
|
||||
// Parent is the ID of the parent artifact in this relationship.
|
||||
Parent string `json:"parent"`
|
||||
|
||||
// Child is the ID of the child artifact in this relationship.
|
||||
Child string `json:"child"`
|
||||
|
||||
// Type is the relationship type (e.g., "contains", "dependency-of", "ancestor-of").
|
||||
Type string `json:"type"`
|
||||
|
||||
// Metadata contains additional relationship-specific metadata.
|
||||
Parent string `json:"parent"`
|
||||
Child string `json:"child"`
|
||||
Type string `json:"type"`
|
||||
Metadata interface{} `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
@ -11,25 +11,18 @@ import (
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
// Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive.
|
||||
// The Supplier field can be provided by users to fulfill NTIA minimum elements requirements.
|
||||
// Source object represents the thing that was cataloged
|
||||
// Note: syft currently makes no claims or runs any logic to determine the Supplier field below
|
||||
|
||||
// Instead, the Supplier can be determined by the user of syft and passed as a config or flag to help fulfill
|
||||
// the NTIA minimum elements. For mor information see the NTIA framing document below
|
||||
// https://www.ntia.gov/files/ntia/publications/framingsbom_20191112.pdf
|
||||
type Source struct {
|
||||
// ID is a unique identifier for the analyzed source artifact.
|
||||
ID string `json:"id"`
|
||||
|
||||
// Name is the name of the analyzed artifact (e.g., image name, directory path).
|
||||
Name string `json:"name"`
|
||||
|
||||
// Version is the version of the analyzed artifact (e.g., image tag).
|
||||
Version string `json:"version"`
|
||||
|
||||
// Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance.
|
||||
Supplier string `json:"supplier,omitempty"`
|
||||
|
||||
// Type is the source type (e.g., "image", "directory", "file").
|
||||
Type string `json:"type"`
|
||||
|
||||
// Metadata contains additional source-specific metadata.
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Version string `json:"version"`
|
||||
Supplier string `json:"supplier,omitempty"`
|
||||
Type string `json:"type"`
|
||||
Metadata interface{} `json:"metadata"`
|
||||
}
|
||||
|
||||
|
||||
@ -58,7 +58,6 @@ type AlpmDBEntry struct {
|
||||
Depends []string `mapstructure:"depends" json:"depends,omitempty"`
|
||||
}
|
||||
|
||||
// AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman.
|
||||
type AlpmFileRecord struct {
|
||||
// Path is the file path relative to the filesystem root
|
||||
Path string `mapstruture:"path" json:"path,omitempty"`
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
/*
|
||||
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
||||
including support for GGUF (GPT-Generated Unified Format) model files.
|
||||
*/
|
||||
package ai
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||
func NewGGUFCataloger() pkg.Cataloger {
|
||||
return generic.NewCataloger("gguf-cataloger").
|
||||
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||
}
|
||||
@ -1,140 +0,0 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||
)
|
||||
|
||||
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixture string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "obtain gguf files",
|
||||
fixture: "test-fixtures/glob-paths",
|
||||
expected: []string{
|
||||
"models/model.gguf",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, test.fixture).
|
||||
ExpectsResolverContentQueries(test.expected).
|
||||
TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T) string
|
||||
expectedPackages []pkg.Package
|
||||
expectedRelationships []artifact.Relationship
|
||||
}{
|
||||
{
|
||||
name: "catalog single GGUF file",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "llama3-8b").
|
||||
withStringKV("general.version", "3.0").
|
||||
withStringKV("general.license", "Apache-2.0").
|
||||
withStringKV("general.quantization", "Q4_K_M").
|
||||
withUint64KV("general.parameter_count", 8030000000).
|
||||
withStringKV("general.some_random_kv", "foobar").
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||
os.WriteFile(path, data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "llama3-8b",
|
||||
Version: "3.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
MetadataKeyValuesHash: "6e3d368066455ce4",
|
||||
RemainingKeyValues: map[string]interface{}{
|
||||
"general.some_random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
{
|
||||
name: "catalog GGUF file with minimal metadata",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "gpt2").
|
||||
withStringKV("general.name", "gpt2-small").
|
||||
withStringKV("gpt2.context_length", "1024").
|
||||
withUint32KV("gpt2.embedding_length", 768).
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, "gpt2-small.gguf")
|
||||
os.WriteFile(path, data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "gpt2-small",
|
||||
Version: "",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
MetadataKeyValuesHash: "9dc6f23591062a27",
|
||||
RemainingKeyValues: map[string]interface{}{
|
||||
"gpt2.context_length": "1024",
|
||||
"gpt2.embedding_length": uint32(768),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fixtureDir := tt.setup(t)
|
||||
|
||||
// Use pkgtest to catalog and compare
|
||||
pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, fixtureDir).
|
||||
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||
IgnoreLocationLayer().
|
||||
IgnorePackageFields("FoundBy", "Locations").
|
||||
TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1,22 +0,0 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package {
|
||||
p := pkg.Package{
|
||||
Name: modelName,
|
||||
Version: version,
|
||||
Locations: file.NewLocationSet(locations...),
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
|
||||
Metadata: *metadata,
|
||||
// NOTE: PURL is intentionally not set as the package-url spec
|
||||
// has not yet finalized support for ML model packages
|
||||
}
|
||||
p.SetID()
|
||||
|
||||
return p
|
||||
}
|
||||
@ -1,121 +0,0 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||
)
|
||||
|
||||
func TestNewGGUFPackage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
metadata *pkg.GGUFFileHeader
|
||||
input struct {
|
||||
modelName string
|
||||
version string
|
||||
license string
|
||||
locations []file.Location
|
||||
}
|
||||
expected pkg.Package
|
||||
}{
|
||||
{
|
||||
name: "complete GGUF package with all fields",
|
||||
input: struct {
|
||||
modelName string
|
||||
version string
|
||||
license string
|
||||
locations []file.Location
|
||||
}{
|
||||
modelName: "llama3-8b",
|
||||
version: "3.0",
|
||||
license: "Apache-2.0",
|
||||
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||
},
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
RemainingKeyValues: map[string]any{
|
||||
"general.random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
expected: pkg.Package{
|
||||
Name: "llama3-8b",
|
||||
Version: "3.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
RemainingKeyValues: map[string]any{
|
||||
"general.random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal GGUF package",
|
||||
input: struct {
|
||||
modelName string
|
||||
version string
|
||||
license string
|
||||
locations []file.Location
|
||||
}{
|
||||
modelName: "gpt2-small",
|
||||
version: "1.0",
|
||||
license: "MIT",
|
||||
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||
},
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 50,
|
||||
},
|
||||
expected: pkg.Package{
|
||||
Name: "gpt2-small",
|
||||
Version: "1.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("MIT", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 50,
|
||||
},
|
||||
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
actual := newGGUFPackage(
|
||||
tt.metadata,
|
||||
tt.input.modelName,
|
||||
tt.input.version,
|
||||
tt.input.license,
|
||||
tt.input.locations...,
|
||||
)
|
||||
|
||||
// Verify metadata type
|
||||
_, ok := actual.Metadata.(pkg.GGUFFileHeader)
|
||||
require.True(t, ok, "metadata should be GGUFFileHeader")
|
||||
|
||||
// Use AssertPackagesEqual for comprehensive comparison
|
||||
pkgtest.AssertPackagesEqual(t, tt.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1,63 +0,0 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
)
|
||||
|
||||
// GGUF file format constants
|
||||
const (
|
||||
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
|
||||
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
|
||||
)
|
||||
|
||||
// copyHeader copies the GGUF header from the reader to the writer.
|
||||
// It validates the magic number first, then copies the rest of the data.
|
||||
// The reader should be wrapped with io.LimitedReader to prevent OOM issues.
|
||||
func copyHeader(w io.Writer, r io.Reader) error {
|
||||
// Read initial chunk to validate magic number
|
||||
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
|
||||
if _, err := io.ReadFull(r, initialBuf); err != nil {
|
||||
return fmt.Errorf("failed to read GGUF header prefix: %w", err)
|
||||
}
|
||||
|
||||
// Verify magic number
|
||||
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
|
||||
if magic != ggufMagicNumber {
|
||||
return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
||||
}
|
||||
|
||||
// Write the initial buffer to the writer
|
||||
if _, err := w.Write(initialBuf); err != nil {
|
||||
return fmt.Errorf("failed to write GGUF header prefix: %w", err)
|
||||
}
|
||||
|
||||
// Copy the rest of the header from reader to writer
|
||||
// The LimitedReader will return EOF once maxHeaderSize is reached
|
||||
if _, err := io.Copy(w, r); err != nil {
|
||||
return fmt.Errorf("failed to copy GGUF header: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Helper to convert gguf_parser metadata to simpler types
|
||||
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
|
||||
for _, kv := range kvs {
|
||||
// Skip standard fields that are extracted separately
|
||||
switch kv.Key {
|
||||
case "general.architecture", "general.name", "general.license",
|
||||
"general.version", "general.parameter_count", "general.quantization":
|
||||
continue
|
||||
}
|
||||
result[kv.Key] = kv.Value
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
@ -1,135 +0,0 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/internal/unknown"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
||||
// This implementation only reads the header portion of the file, not the entire model.
|
||||
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
defer internal.CloseAndLogError(reader, reader.Path())
|
||||
|
||||
// Create a temporary file for the library to parse
|
||||
// The library requires a file path, so we create a temp file
|
||||
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
tempPath := tempFile.Name()
|
||||
defer os.Remove(tempPath)
|
||||
|
||||
// Copy and validate the GGUF file header using LimitedReader to prevent OOM
|
||||
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
|
||||
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
|
||||
if err := copyHeader(tempFile, limitedReader); err != nil {
|
||||
tempFile.Close()
|
||||
return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err)
|
||||
}
|
||||
tempFile.Close()
|
||||
|
||||
// Parse using gguf-parser-go with options to skip unnecessary data
|
||||
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
|
||||
gguf_parser.SkipLargeMetadata(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
||||
}
|
||||
|
||||
// Extract metadata
|
||||
metadata := ggufFile.Metadata()
|
||||
|
||||
// Extract version separately (will be set on Package.Version)
|
||||
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
|
||||
|
||||
// Convert to syft metadata structure
|
||||
syftMetadata := &pkg.GGUFFileHeader{
|
||||
Architecture: metadata.Architecture,
|
||||
Quantization: metadata.FileTypeDescriptor,
|
||||
Parameters: uint64(metadata.Parameters),
|
||||
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||
TensorCount: ggufFile.Header.TensorCount,
|
||||
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
|
||||
}
|
||||
|
||||
// If model name is not in metadata, use filename
|
||||
if metadata.Name == "" {
|
||||
metadata.Name = extractModelNameFromPath(reader.Path())
|
||||
}
|
||||
|
||||
// Create package from metadata
|
||||
p := newGGUFPackage(
|
||||
syftMetadata,
|
||||
metadata.Name,
|
||||
modelVersion,
|
||||
metadata.License,
|
||||
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||
)
|
||||
|
||||
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
||||
}
|
||||
|
||||
// computeKVMetadataHash computes a stable hash of the KV metadata for use as a global identifier
|
||||
func computeKVMetadataHash(metadata gguf_parser.GGUFMetadataKVs) string {
|
||||
// Sort the KV pairs by key for stable hashing
|
||||
sortedKVs := make([]gguf_parser.GGUFMetadataKV, len(metadata))
|
||||
copy(sortedKVs, metadata)
|
||||
sort.Slice(sortedKVs, func(i, j int) bool {
|
||||
return sortedKVs[i].Key < sortedKVs[j].Key
|
||||
})
|
||||
|
||||
// Marshal sorted KVs to JSON for stable hashing
|
||||
jsonBytes, err := json.Marshal(sortedKVs)
|
||||
if err != nil {
|
||||
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Compute xxhash
|
||||
hash := xxhash.Sum64(jsonBytes)
|
||||
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
|
||||
}
|
||||
|
||||
// extractVersion attempts to extract version from metadata KV pairs
|
||||
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||
for _, kv := range kvs {
|
||||
if kv.Key == "general.version" {
|
||||
if v, ok := kv.Value.(string); ok && v != "" {
|
||||
return v
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractModelNameFromPath extracts the model name from the file path
|
||||
func extractModelNameFromPath(path string) string {
|
||||
// Get the base filename
|
||||
base := filepath.Base(path)
|
||||
|
||||
// Remove .gguf extension
|
||||
name := strings.TrimSuffix(base, ".gguf")
|
||||
|
||||
return name
|
||||
}
|
||||
|
||||
// integrity check
|
||||
var _ generic.Parser = parseGGUFModel
|
||||
@ -1,128 +0,0 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// GGUF type constants for test builder
|
||||
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
|
||||
const (
|
||||
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
||||
ggufTypeUint8 = 0
|
||||
ggufTypeInt8 = 1
|
||||
ggufTypeUint16 = 2
|
||||
ggufTypeInt16 = 3
|
||||
ggufTypeUint32 = 4
|
||||
ggufTypeInt32 = 5
|
||||
ggufTypeFloat32 = 6
|
||||
ggufTypeBool = 7
|
||||
ggufTypeString = 8
|
||||
ggufTypeArray = 9
|
||||
ggufTypeUint64 = 10
|
||||
ggufTypeInt64 = 11
|
||||
ggufTypeFloat64 = 12
|
||||
)
|
||||
|
||||
// testGGUFBuilder helps build GGUF files for testing
|
||||
type testGGUFBuilder struct {
|
||||
buf *bytes.Buffer
|
||||
version uint32
|
||||
tensorCount uint64
|
||||
kvPairs []testKVPair
|
||||
}
|
||||
|
||||
type testKVPair struct {
|
||||
key string
|
||||
valueType uint32
|
||||
value interface{}
|
||||
}
|
||||
|
||||
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||
return &testGGUFBuilder{
|
||||
buf: new(bytes.Buffer),
|
||||
version: 3,
|
||||
tensorCount: 0,
|
||||
kvPairs: []testKVPair{},
|
||||
}
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||
b.version = v
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||
b.tensorCount = count
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) writeString(s string) {
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||
b.buf.WriteString(s)
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) build() []byte {
|
||||
// Write magic number "GGUF"
|
||||
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||
|
||||
// Write version
|
||||
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||
|
||||
// Write tensor count
|
||||
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||
|
||||
// Write KV count
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||
|
||||
// Write KV pairs
|
||||
for _, kv := range b.kvPairs {
|
||||
// Write key
|
||||
b.writeString(kv.key)
|
||||
// Write value type
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||
// Write value based on type
|
||||
switch kv.valueType {
|
||||
case ggufTypeString:
|
||||
b.writeString(kv.value.(string))
|
||||
case ggufTypeUint32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||
case ggufTypeUint64:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||
case ggufTypeUint8:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||
case ggufTypeInt32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||
case ggufTypeBool:
|
||||
var v uint8
|
||||
if kv.value.(bool) {
|
||||
v = 1
|
||||
}
|
||||
binary.Write(b.buf, binary.LittleEndian, v)
|
||||
}
|
||||
}
|
||||
|
||||
return b.buf.Bytes()
|
||||
}
|
||||
|
||||
// buildInvalidMagic creates a file with invalid magic number
|
||||
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||
buf := new(bytes.Buffer)
|
||||
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||
return buf.Bytes()
|
||||
}
|
||||
@ -1,58 +0,0 @@
|
||||
package cpe
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
// TargetSoftwareToPackageType is derived from looking at target_software attributes in the NVD dataset
|
||||
// TODO: ideally this would be driven from the store, where we can resolve ecosystem aliases directly
|
||||
func TargetSoftwareToPackageType(tsw string) pkg.Type {
|
||||
tsw = strings.NewReplacer("-", "_", " ", "_").Replace(strings.ToLower(tsw))
|
||||
switch tsw {
|
||||
case "alpine", "apk":
|
||||
return pkg.ApkPkg
|
||||
case "debian", "dpkg":
|
||||
return pkg.DebPkg
|
||||
case "java", "maven", "ant", "gradle", "jenkins", "jenkins_ci", "kafka", "logstash", "mule", "nifi", "solr", "spark", "storm", "struts", "tomcat", "zookeeper", "log4j":
|
||||
return pkg.JavaPkg
|
||||
case "javascript", "node", "nodejs", "node.js", "npm", "yarn", "apache", "jquery", "next.js", "prismjs":
|
||||
return pkg.NpmPkg
|
||||
case "c", "c++", "c/c++", "conan", "gnu_c++", "qt":
|
||||
return pkg.ConanPkg
|
||||
case "dart":
|
||||
return pkg.DartPubPkg
|
||||
case "redhat", "rpm", "redhat_enterprise_linux", "rhel", "suse", "suse_linux", "opensuse", "opensuse_linux", "fedora", "centos", "oracle_linux", "ol":
|
||||
return pkg.RpmPkg
|
||||
case "elixir", "hex":
|
||||
return pkg.HexPkg
|
||||
case "erlang":
|
||||
return pkg.ErlangOTPPkg
|
||||
case ".net", ".net_framework", "asp", "asp.net", "dotnet", "dotnet_framework", "c#", "csharp", "nuget":
|
||||
return pkg.DotnetPkg
|
||||
case "ruby", "gem", "nokogiri", "ruby_on_rails":
|
||||
return pkg.GemPkg
|
||||
case "rust", "cargo", "crates":
|
||||
return pkg.RustPkg
|
||||
case "python", "pip", "pypi", "flask":
|
||||
return pkg.PythonPkg
|
||||
case "kb", "knowledgebase", "msrc", "mskb", "microsoft":
|
||||
return pkg.KbPkg
|
||||
case "portage", "gentoo":
|
||||
return pkg.PortagePkg
|
||||
case "go", "golang", "gomodule":
|
||||
return pkg.GoModulePkg
|
||||
case "linux_kernel", "linux", "z/linux":
|
||||
return pkg.LinuxKernelPkg
|
||||
case "php":
|
||||
return pkg.PhpComposerPkg
|
||||
case "swift":
|
||||
return pkg.SwiftPkg
|
||||
case "wordpress", "wordpress_plugin", "wordpress_":
|
||||
return pkg.WordpressPluginPkg
|
||||
case "lua", "luarocks":
|
||||
return pkg.LuaRocksPkg
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@ -80,7 +80,7 @@ func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ f
|
||||
|
||||
// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives
|
||||
func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg)
|
||||
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
|
||||
// note: even on error, we should always run cleanup functions
|
||||
defer cleanupFn()
|
||||
if err != nil {
|
||||
@ -99,7 +99,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
|
||||
|
||||
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
|
||||
// and parse nested archives or ignore them.
|
||||
func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
|
||||
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
|
||||
// fetch the last element of the virtual path
|
||||
virtualElements := strings.Split(reader.Path(), ":")
|
||||
currentFilepath := virtualElements[len(virtualElements)-1]
|
||||
@ -109,7 +109,7 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d
|
||||
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
|
||||
}
|
||||
|
||||
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
|
||||
fileManifest, err := intFile.NewZipFileManifest(archivePath)
|
||||
if err != nil {
|
||||
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
|
||||
}
|
||||
@ -226,7 +226,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
|
||||
}
|
||||
|
||||
// fetch the manifest file
|
||||
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...)
|
||||
contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err)
|
||||
}
|
||||
@ -387,9 +387,8 @@ type parsedPomProject struct {
|
||||
|
||||
// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
|
||||
func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) {
|
||||
// Find the pom.properties/pom.xml if the names seem like a plausible match
|
||||
properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
|
||||
artifactsMap := j.buildArtifactsMap(properties)
|
||||
pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap)
|
||||
@ -520,13 +519,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
|
||||
var pkgs []pkg.Package
|
||||
|
||||
// pom.properties
|
||||
properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// pom.xml
|
||||
projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -576,7 +575,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
|
||||
}
|
||||
|
||||
if len(licenseMatches) > 0 {
|
||||
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
|
||||
contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
|
||||
}
|
||||
@ -617,7 +616,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare
|
||||
// associating each discovered package to the given parent package.
|
||||
func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
// search and parse pom.properties files & fetch the contents
|
||||
openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
|
||||
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
|
||||
}
|
||||
@ -681,8 +680,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit
|
||||
return nestedPkgs, nestedRelationships, nil
|
||||
}
|
||||
|
||||
func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
|
||||
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
|
||||
func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
|
||||
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||
}
|
||||
@ -710,8 +709,8 @@ func pomPropertiesByParentPath(ctx context.Context, archivePath string, location
|
||||
return propertiesByParentPath, nil
|
||||
}
|
||||
|
||||
func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
|
||||
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
|
||||
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
|
||||
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||
}
|
||||
|
||||
@ -72,7 +72,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// setup parser
|
||||
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
ap, cleanupFn, err := newJavaArchiveParser(
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
@ -372,7 +372,7 @@ func TestParseJar(t *testing.T) {
|
||||
UseNetwork: false,
|
||||
UseMavenLocalRepository: false,
|
||||
}
|
||||
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
parser, cleanupFn, err := newJavaArchiveParser(
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
@ -1499,7 +1499,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
|
||||
fixture, err := os.Open(fixturePath)
|
||||
require.NoError(t, err)
|
||||
|
||||
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
parser, cleanupFn, err := newJavaArchiveParser(
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
@ -1636,7 +1636,7 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) {
|
||||
|
||||
ctx := context.TODO()
|
||||
// setup parser
|
||||
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
|
||||
ap, cleanupFn, err := newJavaArchiveParser(
|
||||
file.LocationReadCloser{
|
||||
Location: file.NewLocation(fixture.Name()),
|
||||
ReadCloser: fixture,
|
||||
|
||||
@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con
|
||||
}
|
||||
|
||||
func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...)
|
||||
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con
|
||||
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
|
||||
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
|
||||
// or archiver).
|
||||
fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
|
||||
fileManifest, err := intFile.NewZipFileManifest(archivePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
|
||||
}
|
||||
|
||||
@ -1,37 +0,0 @@
|
||||
package pkg
|
||||
|
||||
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
|
||||
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
|
||||
// loading and saving of models, particularly quantized large language models.
|
||||
// The Model Name, License, and Version fields have all been lifted up to be on the syft Package.
|
||||
type GGUFFileHeader struct {
|
||||
// GGUFVersion is the GGUF format version (e.g., 3)
|
||||
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
|
||||
|
||||
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||
|
||||
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
|
||||
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||
|
||||
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
|
||||
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
|
||||
|
||||
// Parameters is the number of model parameters (if present in header)
|
||||
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
|
||||
|
||||
// TensorCount is the number of tensors in the model
|
||||
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||
|
||||
// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
|
||||
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||
RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||
|
||||
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
|
||||
// This hash is computed over the complete header metadata (including the fields extracted
|
||||
// into typed fields above) and provides a stable identifier for the model configuration
|
||||
// across different file locations or remotes. It allows matching identical models even
|
||||
// when stored in different repositories or with different filenames.
|
||||
MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
|
||||
}
|
||||
@ -1,7 +1,10 @@
|
||||
package pkg
|
||||
|
||||
// MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center).
|
||||
// This type captures both the product being patched and the KB article number for the update.
|
||||
// MicrosoftKbPatch is slightly odd in how it is expected to map onto data.
|
||||
// This is critical to grasp because there is no MSRC cataloger. The `ProductID`
|
||||
// field is expected to be the MSRC Product ID, for example:
|
||||
// "Windows 10 Version 1703 for 32-bit Systems".
|
||||
// `Kb` is expected to be the actual KB number, for example "5001028"
|
||||
type MicrosoftKbPatch struct {
|
||||
// ProductID is MSRC Product ID (e.g. "Windows 10 Version 1703 for 32-bit Systems")
|
||||
ProductID string `toml:"product_id" json:"product_id"`
|
||||
|
||||
@ -48,7 +48,6 @@ type YarnLockEntry struct {
|
||||
Dependencies map[string]string `mapstructure:"dependencies" json:"dependencies"`
|
||||
}
|
||||
|
||||
// PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification.
|
||||
type PnpmLockResolution struct {
|
||||
// Integrity is Subresource Integrity hash for verification (SRI format)
|
||||
Integrity string `mapstructure:"integrity" json:"integrity"`
|
||||
|
||||
@ -54,7 +54,6 @@ const (
|
||||
TerraformPkg Type = "terraform"
|
||||
WordpressPluginPkg Type = "wordpress-plugin"
|
||||
HomebrewPkg Type = "homebrew"
|
||||
ModelPkg Type = "model"
|
||||
)
|
||||
|
||||
// AllPkgs represents all supported package types
|
||||
@ -99,7 +98,6 @@ var AllPkgs = []Type{
|
||||
TerraformPkg,
|
||||
WordpressPluginPkg,
|
||||
HomebrewPkg,
|
||||
ModelPkg,
|
||||
}
|
||||
|
||||
// PackageURLType returns the PURL package type for the current package.
|
||||
|
||||
@ -155,7 +155,6 @@ func TestTypeFromPURL(t *testing.T) {
|
||||
expectedTypes.Remove(string(HomebrewPkg))
|
||||
expectedTypes.Remove(string(TerraformPkg))
|
||||
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
||||
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
|
||||
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
@ -4,15 +4,13 @@ import (
|
||||
"context"
|
||||
"crypto"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"github.com/opencontainers/go-digest"
|
||||
|
||||
"github.com/anchore/archiver/v3"
|
||||
stereoFile "github.com/anchore/stereoscope/pkg/file"
|
||||
intFile "github.com/anchore/syft/internal/file"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
@ -210,8 +208,18 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
|
||||
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
|
||||
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
|
||||
// unarchived.
|
||||
envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil)
|
||||
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
|
||||
envelopedUnarchiver, err := archiver.ByExtension(path)
|
||||
if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok {
|
||||
// when tar/zip files are extracted, if there are multiple entries at the same
|
||||
// location, the last entry wins
|
||||
// NOTE: this currently does not display any messages if an overwrite happens
|
||||
switch v := unarchiver.(type) {
|
||||
case *archiver.Tar:
|
||||
v.OverwriteExisting = true
|
||||
case *archiver.Zip:
|
||||
v.OverwriteExisting = true
|
||||
}
|
||||
|
||||
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
|
||||
@ -238,58 +246,15 @@ func digestOfFileContents(path string) string {
|
||||
return di.String()
|
||||
}
|
||||
|
||||
func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
|
||||
var cleanupFn = func() error { return nil }
|
||||
archive, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err)
|
||||
}
|
||||
defer archive.Close()
|
||||
|
||||
func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) {
|
||||
tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
|
||||
if err != nil {
|
||||
return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
|
||||
return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
|
||||
}
|
||||
|
||||
visitor := func(_ context.Context, file archives.FileInfo) error {
|
||||
// Protect against symlink attacks by ensuring path doesn't escape tempDir
|
||||
destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if file.IsDir() {
|
||||
return os.MkdirAll(destPath, file.Mode())
|
||||
}
|
||||
|
||||
if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
|
||||
return fmt.Errorf("failed to create parent directory: %w", err)
|
||||
}
|
||||
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open file in archive: %w", err)
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
destFile, err := os.Create(destPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create file in destination: %w", err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
if err := destFile.Chmod(file.Mode()); err != nil {
|
||||
return fmt.Errorf("failed to change mode of destination file: %w", err)
|
||||
}
|
||||
|
||||
if _, err := io.Copy(destFile, rc); err != nil {
|
||||
return fmt.Errorf("failed to copy file contents: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return tempDir, func() error {
|
||||
cleanupFn := func() error {
|
||||
return os.RemoveAll(tempDir)
|
||||
}, unarchiver.Extract(context.Background(), archive, visitor)
|
||||
}
|
||||
|
||||
return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user