diff --git a/internal/constants.go b/internal/constants.go index a6f0bdae7..ec348c187 100644 --- a/internal/constants.go +++ b/internal/constants.go @@ -3,12 +3,13 @@ package internal const ( // JSONSchemaVersion is the current schema version output by the JSON encoder // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. - JSONSchemaVersion = "16.1.3" + JSONSchemaVersion = "16.1.4" // Changelog // 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field). // 16.1.1 - correct elf package osCpe field according to the document of systemd (also add appCpe field) // 16.1.2 - placeholder for 16.1.2 changelog // 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata + // 16.1.4 - add SafeTensorsMetadata ) diff --git a/internal/packagemetadata/discover_type_names.go b/internal/packagemetadata/discover_type_names.go index 704c98c0b..4787d415a 100644 --- a/internal/packagemetadata/discover_type_names.go +++ b/internal/packagemetadata/discover_type_names.go @@ -27,6 +27,7 @@ var knownNonMetadataTypeNames = strset.New( var knownMetadataTypeNames = strset.New( "DotnetPortableExecutableEntry", "GGUFFileHeader", + "SafeTensorsMetadata", ) func DiscoverTypeNames() ([]string, error) { diff --git a/internal/packagemetadata/generated.go b/internal/packagemetadata/generated.go index 7178662f7..25b4f3fd1 100644 --- a/internal/packagemetadata/generated.go +++ b/internal/packagemetadata/generated.go @@ -64,6 +64,7 @@ func AllTypes() []any { pkg.RubyGemspec{}, pkg.RustBinaryAuditEntry{}, pkg.RustCargoLockEntry{}, + pkg.SafeTensorsMetadata{}, pkg.SnapEntry{}, pkg.SwiftPackageManagerResolvedEntry{}, pkg.SwiplPackEntry{}, diff --git a/internal/packagemetadata/names.go b/internal/packagemetadata/names.go index 5b91d8911..64b1ee76c 100644 --- a/internal/packagemetadata/names.go +++ b/internal/packagemetadata/names.go @@ -126,6 +126,7 @@ var jsonTypes = makeJSONTypes( jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"), jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"), jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"), + jsonNames(pkg.SafeTensorsMetadata{}, "safetensors-metadata"), ) func expandLegacyNameVariants(names ...string) []string { diff --git a/internal/task/package_tasks.go b/internal/task/package_tasks.go index 12e12422b..e9dceaa4f 100644 --- a/internal/task/package_tasks.go +++ b/internal/task/package_tasks.go @@ -180,6 +180,7 @@ func DefaultPackageTaskFactories() Factories { newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"), newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"), newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"), + newSimplePackageTaskFactory(ai.NewSafeTensorsCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "safetensors", "ml"), // deprecated catalogers //////////////////////////////////////// // these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible) diff --git a/schema/json/schema-16.1.4.json b/schema/json/schema-16.1.4.json new file mode 100644 index 000000000..be64d29c7 --- /dev/null +++ b/schema/json/schema-16.1.4.json @@ -0,0 +1,4311 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "anchore.io/schema/syft/json/16.1.4/document", + "$ref": "#/$defs/Document", + "$defs": { + "AlpmDbEntry": { + "properties": { + "basepackage": { + "type": "string", + "description": "BasePackage is the base package name this package was built from (source package in Arch build system)" + }, + "package": { + "type": "string", + "description": "Package is the package name as found in the desc file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the desc file" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture as defined in Arch architecture spec (e.g. x86_64, aarch64, or \"any\" for arch-independent packages)" + }, + "size": { + "type": "integer", + "description": "Size is the installed size in bytes" + }, + "packager": { + "type": "string", + "description": "Packager is the name and email of the person who packaged this (RFC822 format)" + }, + "url": { + "type": "string", + "description": "URL is the upstream project URL" + }, + "validation": { + "type": "string", + "description": "Validation is the validation method used for package integrity (e.g. pgp signature, sha256 checksum)" + }, + "reason": { + "type": "integer", + "description": "Reason is the installation reason tracked by pacman (0=explicitly installed by user, 1=installed as dependency)" + }, + "files": { + "items": { + "$ref": "#/$defs/AlpmFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + }, + "backup": { + "items": { + "$ref": "#/$defs/AlpmFileRecord" + }, + "type": "array", + "description": "Backup is the list of configuration files that pacman backs up before upgrades" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are virtual packages provided by this package (allows other packages to depend on capabilities rather than specific packages)" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the runtime dependencies required by this package" + } + }, + "type": "object", + "required": [ + "basepackage", + "package", + "version", + "description", + "architecture", + "size", + "packager", + "url", + "validation", + "reason", + "files", + "backup" + ], + "description": "AlpmDBEntry is a struct that represents the package data stored in the pacman flat-file stores for arch linux." + }, + "AlpmFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "type": { + "type": "string", + "description": "Type is the file type (e.g. regular file, directory, symlink)" + }, + "uid": { + "type": "string", + "description": "UID is the file owner user ID as recorded by pacman" + }, + "gid": { + "type": "string", + "description": "GID is the file owner group ID as recorded by pacman" + }, + "time": { + "type": "string", + "format": "date-time", + "description": "Time is the file modification timestamp" + }, + "size": { + "type": "string", + "description": "Size is the file size in bytes" + }, + "link": { + "type": "string", + "description": "Link is the symlink target path if this is a symlink" + }, + "digest": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array", + "description": "Digests contains file content hashes for integrity verification" + } + }, + "type": "object", + "description": "AlpmFileRecord represents a single file entry within an Arch Linux package with its associated metadata tracked by pacman." + }, + "ApkDbEntry": { + "properties": { + "package": { + "type": "string", + "description": "Package is the package name as found in the installed file" + }, + "originPackage": { + "type": "string", + "description": "OriginPackage is the original source package name this binary was built from (used to track which aport/source built this)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is the package maintainer name and email" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the installed file" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture" + }, + "url": { + "type": "string", + "description": "URL is the upstream project URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "size": { + "type": "integer", + "description": "Size is the package archive size in bytes (.apk file size)" + }, + "installedSize": { + "type": "integer", + "description": "InstalledSize is the total size of installed files in bytes" + }, + "pullDependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the runtime dependencies required by this package" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are virtual packages provided by this package (for capability-based dependencies)" + }, + "pullChecksum": { + "type": "string", + "description": "Checksum is the package content checksum for integrity verification" + }, + "gitCommitOfApkPort": { + "type": "string", + "description": "GitCommit is the git commit hash of the APK port definition in Alpine's aports repository" + }, + "files": { + "items": { + "$ref": "#/$defs/ApkFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + } + }, + "type": "object", + "required": [ + "package", + "originPackage", + "maintainer", + "version", + "architecture", + "url", + "description", + "size", + "installedSize", + "pullDependencies", + "provides", + "pullChecksum", + "gitCommitOfApkPort", + "files" + ], + "description": "ApkDBEntry represents all captured data for the alpine linux package manager flat-file store." + }, + "ApkFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "ownerUid": { + "type": "string", + "description": "OwnerUID is the file owner user ID" + }, + "ownerGid": { + "type": "string", + "description": "OwnerGID is the file owner group ID" + }, + "permissions": { + "type": "string", + "description": "Permissions is the file permission mode string (e.g. \"0755\", \"0644\")" + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest is the file content hash for integrity verification" + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "ApkFileRecord represents a single file listing and metadata from a APK DB entry (which may have many of these file records)." + }, + "BinarySignature": { + "properties": { + "matches": { + "items": { + "$ref": "#/$defs/ClassifierMatch" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "matches" + ], + "description": "BinarySignature represents a set of matched values within a binary file." + }, + "BitnamiSbomEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the Bitnami SPDX file" + }, + "arch": { + "type": "string", + "description": "Architecture is the target CPU architecture (amd64 or arm64 in Bitnami images)" + }, + "distro": { + "type": "string", + "description": "Distro is the distribution name this package is for (base OS like debian, ubuntu, etc.)" + }, + "revision": { + "type": "string", + "description": "Revision is the Bitnami-specific package revision number (incremented for Bitnami rebuilds of same upstream version)" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the Bitnami SPDX file" + }, + "path": { + "type": "string", + "description": "Path is the installation path in the filesystem where the package is located" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files are the file paths owned by this package (tracked via SPDX relationships)" + } + }, + "type": "object", + "required": [ + "name", + "arch", + "distro", + "revision", + "version", + "path", + "files" + ], + "description": "BitnamiSBOMEntry represents all captured data from Bitnami packages described in Bitnami' SPDX files." + }, + "CConanFileEntry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConanfileEntry represents a single \"Requires\" entry from a conanfile.txt." + }, + "CConanInfoEntry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + }, + "package_id": { + "type": "string", + "description": "PackageID is a unique package variant identifier" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConaninfoEntry represents a single \"full_requires\" entry from a conaninfo.txt." + }, + "CConanLockEntry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + }, + "package_id": { + "type": "string", + "description": "PackageID is a unique package variant identifier computed from settings/options (static hash in Conan 1.x, can have collisions with complex dependency graphs)" + }, + "prev": { + "type": "string", + "description": "Prev is the previous lock entry reference for versioning" + }, + "requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Requires are the runtime package dependencies" + }, + "build_requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "BuildRequires are the build-time dependencies (e.g. cmake, compilers)" + }, + "py_requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "PythonRequires are the Python dependencies needed for Conan recipes" + }, + "options": { + "$ref": "#/$defs/KeyValues", + "description": "Options are package configuration options as key-value pairs (e.g. shared=True, fPIC=True)" + }, + "path": { + "type": "string", + "description": "Path is the filesystem path to the package in Conan cache" + }, + "context": { + "type": "string", + "description": "Context is the build context information" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConanV1LockEntry represents a single \"node\" entry from a conan.lock V1 file." + }, + "CConanLockV2Entry": { + "properties": { + "ref": { + "type": "string", + "description": "Ref is the package reference string in format name/version@user/channel" + }, + "packageID": { + "type": "string", + "description": "PackageID is a unique package variant identifier (dynamic in Conan 2.0, more accurate than V1)" + }, + "username": { + "type": "string", + "description": "Username is the Conan user/organization name" + }, + "channel": { + "type": "string", + "description": "Channel is the Conan channel name indicating stability/purpose (e.g. stable, testing, experimental)" + }, + "recipeRevision": { + "type": "string", + "description": "RecipeRevision is a git-like revision hash (RREV) of the recipe" + }, + "packageRevision": { + "type": "string", + "description": "PackageRevision is a git-like revision hash of the built binary package" + }, + "timestamp": { + "type": "string", + "description": "TimeStamp is when this package was built/locked" + } + }, + "type": "object", + "required": [ + "ref" + ], + "description": "ConanV2LockEntry represents a single \"node\" entry from a conan.lock V2 file." + }, + "CPE": { + "properties": { + "cpe": { + "type": "string", + "description": "Value is the CPE string identifier." + }, + "source": { + "type": "string", + "description": "Source is the source where this CPE was obtained or generated from." + } + }, + "type": "object", + "required": [ + "cpe" + ], + "description": "CPE represents a Common Platform Enumeration identifier used for matching packages to known vulnerabilities in security databases." + }, + "ClassifierMatch": { + "properties": { + "classifier": { + "type": "string" + }, + "location": { + "$ref": "#/$defs/Location" + } + }, + "type": "object", + "required": [ + "classifier", + "location" + ], + "description": "ClassifierMatch represents a single matched value within a binary file and the \"class\" name the search pattern represents." + }, + "CocoaPodfileLockEntry": { + "properties": { + "checksum": { + "type": "string", + "description": "Checksum is the SHA-1 hash of the podspec file for integrity verification (generated via `pod ipc spec ... | openssl sha1`), ensuring all team members use the same pod specification version" + } + }, + "type": "object", + "required": [ + "checksum" + ], + "description": "CocoaPodfileLockEntry represents a single entry from the \"Pods\" section of a Podfile.lock file." + }, + "CondaLink": { + "properties": { + "source": { + "type": "string", + "description": "Source is the original path where the package was extracted from cache." + }, + "type": { + "type": "integer", + "description": "Type indicates the link type (1 for hard link, 2 for soft link, 3 for copy)." + } + }, + "type": "object", + "required": [ + "source", + "type" + ], + "description": "CondaLink represents link metadata from a Conda package's link.json file describing package installation source." + }, + "CondaMetadataEntry": { + "properties": { + "arch": { + "type": "string", + "description": "Arch is the target CPU architecture for the package (e.g., \"arm64\", \"x86_64\")." + }, + "name": { + "type": "string", + "description": "Name is the package name as found in the conda-meta JSON file." + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the conda-meta JSON file." + }, + "build": { + "type": "string", + "description": "Build is the build string identifier (e.g., \"h90dfc92_1014\")." + }, + "build_number": { + "type": "integer", + "description": "BuildNumber is the sequential build number for this version." + }, + "channel": { + "type": "string", + "description": "Channel is the Conda channel URL where the package was retrieved from." + }, + "subdir": { + "type": "string", + "description": "Subdir is the subdirectory within the channel (e.g., \"osx-arm64\", \"linux-64\")." + }, + "noarch": { + "type": "string", + "description": "Noarch indicates if the package is platform-independent (e.g., \"python\", \"generic\")." + }, + "license": { + "type": "string", + "description": "License is the package license identifier." + }, + "license_family": { + "type": "string", + "description": "LicenseFamily is the general license category (e.g., \"MIT\", \"Apache\", \"GPL\")." + }, + "md5": { + "type": "string", + "description": "MD5 is the MD5 hash of the package archive." + }, + "sha256": { + "type": "string", + "description": "SHA256 is the SHA-256 hash of the package archive." + }, + "size": { + "type": "integer", + "description": "Size is the package archive size in bytes." + }, + "timestamp": { + "type": "integer", + "description": "Timestamp is the Unix timestamp when the package was built." + }, + "fn": { + "type": "string", + "description": "Filename is the original package archive filename (e.g., \"zlib-1.2.11-h90dfc92_1014.tar.bz2\")." + }, + "url": { + "type": "string", + "description": "URL is the full download URL for the package archive." + }, + "extracted_package_dir": { + "type": "string", + "description": "ExtractedPackageDir is the local cache directory where the package was extracted." + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends is the list of runtime dependencies with version constraints." + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files is the list of files installed by this package." + }, + "paths_data": { + "$ref": "#/$defs/CondaPathsData", + "description": "PathsData contains detailed file metadata from the paths.json file." + }, + "link": { + "$ref": "#/$defs/CondaLink", + "description": "Link contains installation source metadata from the link.json file." + } + }, + "type": "object", + "required": [ + "name", + "version", + "build", + "build_number" + ], + "description": "CondaMetaPackage represents metadata for a Conda package extracted from the conda-meta/*.json files." + }, + "CondaPathData": { + "properties": { + "_path": { + "type": "string", + "description": "Path is the file path relative to the Conda environment root." + }, + "path_type": { + "type": "string", + "description": "PathType indicates the link type for the file (e.g., \"hardlink\", \"softlink\", \"directory\")." + }, + "sha256": { + "type": "string", + "description": "SHA256 is the SHA-256 hash of the file contents." + }, + "sha256_in_prefix": { + "type": "string", + "description": "SHA256InPrefix is the SHA-256 hash of the file after prefix replacement during installation." + }, + "size_in_bytes": { + "type": "integer", + "description": "SizeInBytes is the file size in bytes." + } + }, + "type": "object", + "required": [ + "_path", + "path_type", + "sha256", + "sha256_in_prefix", + "size_in_bytes" + ], + "description": "CondaPathData represents metadata for a single file within a Conda package from the paths.json file." + }, + "CondaPathsData": { + "properties": { + "paths_version": { + "type": "integer", + "description": "PathsVersion is the schema version of the paths data format." + }, + "paths": { + "items": { + "$ref": "#/$defs/CondaPathData" + }, + "type": "array", + "description": "Paths is the list of file metadata entries for all files in the package." + } + }, + "type": "object", + "required": [ + "paths_version", + "paths" + ], + "description": "CondaPathsData represents the paths.json file structure from a Conda package containing file metadata." + }, + "Coordinates": { + "properties": { + "path": { + "type": "string", + "description": "RealPath is the canonical absolute form of the path accessed (all symbolic links have been followed and relative path components like '.' and '..' have been removed)." + }, + "layerID": { + "type": "string", + "description": "FileSystemID is an ID representing and entire filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank." + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "Coordinates contains the minimal information needed to describe how to find a file within any possible source object (e.g." + }, + "DartPubspec": { + "properties": { + "homepage": { + "type": "string", + "description": "Homepage is the package homepage URL" + }, + "repository": { + "type": "string", + "description": "Repository is the source code repository URL" + }, + "documentation": { + "type": "string", + "description": "Documentation is the documentation site URL" + }, + "publish_to": { + "type": "string", + "description": "PublishTo is the package repository to publish to, or \"none\" to prevent accidental publishing" + }, + "environment": { + "$ref": "#/$defs/DartPubspecEnvironment", + "description": "Environment is SDK version constraints for Dart and Flutter" + }, + "platforms": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Platforms are the supported platforms (Android, iOS, web, etc.)" + }, + "ignored_advisories": { + "items": { + "type": "string" + }, + "type": "array", + "description": "IgnoredAdvisories are the security advisories to explicitly ignore for this package" + } + }, + "type": "object", + "description": "DartPubspec is a struct that represents a package described in a pubspec.yaml file" + }, + "DartPubspecEnvironment": { + "properties": { + "sdk": { + "type": "string", + "description": "SDK is the Dart SDK version constraint (e.g. \"\u003e=2.12.0 \u003c3.0.0\")" + }, + "flutter": { + "type": "string", + "description": "Flutter is the Flutter SDK version constraint if this is a Flutter package" + } + }, + "type": "object", + "description": "DartPubspecEnvironment represents SDK version constraints from the environment section of pubspec.yaml." + }, + "DartPubspecLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the pubspec.lock file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the pubspec.lock file" + }, + "hosted_url": { + "type": "string", + "description": "HostedURL is the URL of the package repository for hosted packages (typically pub.dev, but can be custom repository identified by hosted-url). When PUB_HOSTED_URL environment variable changes, lockfile tracks the source." + }, + "vcs_url": { + "type": "string", + "description": "VcsURL is the URL of the VCS repository for git/path dependencies (for packages fetched from version control systems like Git)" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "DartPubspecLockEntry is a struct that represents a single entry found in the \"packages\" section in a Dart pubspec.lock file." + }, + "Descriptor": { + "properties": { + "name": { + "type": "string", + "description": "Name is the name of the tool that generated this SBOM (e.g., \"syft\")." + }, + "version": { + "type": "string", + "description": "Version is the version of the tool that generated this SBOM." + }, + "configuration": { + "description": "Configuration contains the tool configuration used during SBOM generation." + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "Descriptor identifies the tool that generated this SBOM document, including its name, version, and configuration used during catalog generation." + }, + "Digest": { + "properties": { + "algorithm": { + "type": "string", + "description": "Algorithm specifies the hash algorithm used (e.g., \"sha256\", \"md5\")." + }, + "value": { + "type": "string", + "description": "Value is the hexadecimal string representation of the hash." + } + }, + "type": "object", + "required": [ + "algorithm", + "value" + ], + "description": "Digest represents a cryptographic hash of file contents." + }, + "Document": { + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/Package" + }, + "type": "array", + "description": "Artifacts is the list of packages discovered and placed into the catalog" + }, + "artifactRelationships": { + "items": { + "$ref": "#/$defs/Relationship" + }, + "type": "array" + }, + "files": { + "items": { + "$ref": "#/$defs/File" + }, + "type": "array", + "description": "note: must have omitempty" + }, + "source": { + "$ref": "#/$defs/Source", + "description": "Source represents the original object that was cataloged" + }, + "distro": { + "$ref": "#/$defs/LinuxRelease", + "description": "Distro represents the Linux distribution that was detected from the source" + }, + "descriptor": { + "$ref": "#/$defs/Descriptor", + "description": "Descriptor is a block containing self-describing information about syft" + }, + "schema": { + "$ref": "#/$defs/Schema", + "description": "Schema is a block reserved for defining the version for the shape of this JSON document and where to find the schema document to validate the shape" + } + }, + "type": "object", + "required": [ + "artifacts", + "artifactRelationships", + "source", + "distro", + "descriptor", + "schema" + ], + "description": "Document represents the syft cataloging findings as a JSON document" + }, + "DotnetDepsEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the deps.json file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the deps.json file" + }, + "path": { + "type": "string", + "description": "Path is the relative path to the package within the deps structure (e.g. \"app.metrics/3.0.0\")" + }, + "sha512": { + "type": "string", + "description": "Sha512 is the SHA-512 hash of the NuGet package content WITHOUT the signed content for verification (won't match hash from NuGet API or manual calculation of .nupkg file)" + }, + "hashPath": { + "type": "string", + "description": "HashPath is the relative path to the .nupkg.sha512 hash file (e.g. \"app.metrics.3.0.0.nupkg.sha512\")" + }, + "type": { + "type": "string", + "description": "Type is type of entry could be package or project for internal refs" + }, + "executables": { + "additionalProperties": { + "$ref": "#/$defs/DotnetPortableExecutableEntry" + }, + "type": "object", + "description": "Executables are the map of .NET Portable Executable files within this package with their version resources" + } + }, + "type": "object", + "required": [ + "name", + "version", + "path", + "sha512", + "hashPath" + ], + "description": "DotnetDepsEntry is a struct that represents a single entry found in the \"libraries\" section in a .NET [*.]deps.json file." + }, + "DotnetPackagesLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the packages.lock.json file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the packages.lock.json file" + }, + "contentHash": { + "type": "string", + "description": "ContentHash is the hash of the package content for verification" + }, + "type": { + "type": "string", + "description": "Type is the dependency type indicating how this dependency was added (Direct=explicit in project file, Transitive=pulled in by another package, Project=project reference)" + } + }, + "type": "object", + "required": [ + "name", + "version", + "contentHash", + "type" + ], + "description": "DotnetPackagesLockEntry is a struct that represents a single entry found in the \"dependencies\" section in a .NET packages.lock.json file." + }, + "DotnetPortableExecutableEntry": { + "properties": { + "assemblyVersion": { + "type": "string", + "description": "AssemblyVersion is the .NET assembly version number (strong-named version)" + }, + "legalCopyright": { + "type": "string", + "description": "LegalCopyright is the copyright notice string" + }, + "comments": { + "type": "string", + "description": "Comments are additional comments or description embedded in PE resources" + }, + "internalName": { + "type": "string", + "description": "InternalName is the internal name of the file" + }, + "companyName": { + "type": "string", + "description": "CompanyName is the company that produced the file" + }, + "productName": { + "type": "string", + "description": "ProductName is the name of the product this file is part of" + }, + "productVersion": { + "type": "string", + "description": "ProductVersion is the version of the product (may differ from AssemblyVersion)" + } + }, + "type": "object", + "required": [ + "assemblyVersion", + "legalCopyright", + "companyName", + "productName", + "productVersion" + ], + "description": "DotnetPortableExecutableEntry is a struct that represents a single entry found within \"VersionResources\" section of a .NET Portable Executable binary file." + }, + "DpkgArchiveEntry": { + "properties": { + "package": { + "type": "string", + "description": "Package is the package name as found in the status file" + }, + "source": { + "type": "string", + "description": "Source is the source package name this binary was built from (one source can produce multiple binary packages)" + }, + "version": { + "type": "string", + "description": "Version is the binary package version as found in the status file" + }, + "sourceVersion": { + "type": "string", + "description": "SourceVersion is the source package version (may differ from binary version when binNMU rebuilds occur)" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target architecture per Debian spec (specific arch like amd64/arm64, wildcard like any, architecture-independent \"all\", or \"source\" for source packages)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is the package maintainer's name and email in RFC822 format (name must come first, then email in angle brackets)" + }, + "installedSize": { + "type": "integer", + "description": "InstalledSize is the total size of installed files in kilobytes" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are the virtual packages provided by this package (allows other packages to depend on capabilities. Can include versioned provides like \"libdigest-md5-perl (= 2.55.01)\")" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the packages required for this package to function (will not be installed unless these requirements are met, creates strict ordering constraint)" + }, + "preDepends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "PreDepends are the packages that must be installed and configured BEFORE even starting installation of this package (stronger than Depends, discouraged unless absolutely necessary as it adds strict constraints for apt)" + }, + "files": { + "items": { + "$ref": "#/$defs/DpkgFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + } + }, + "type": "object", + "required": [ + "package", + "source", + "version", + "sourceVersion", + "architecture", + "maintainer", + "installedSize", + "files" + ], + "description": "DpkgArchiveEntry represents package metadata extracted from a .deb archive file." + }, + "DpkgDbEntry": { + "properties": { + "package": { + "type": "string", + "description": "Package is the package name as found in the status file" + }, + "source": { + "type": "string", + "description": "Source is the source package name this binary was built from (one source can produce multiple binary packages)" + }, + "version": { + "type": "string", + "description": "Version is the binary package version as found in the status file" + }, + "sourceVersion": { + "type": "string", + "description": "SourceVersion is the source package version (may differ from binary version when binNMU rebuilds occur)" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target architecture per Debian spec (specific arch like amd64/arm64, wildcard like any, architecture-independent \"all\", or \"source\" for source packages)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is the package maintainer's name and email in RFC822 format (name must come first, then email in angle brackets)" + }, + "installedSize": { + "type": "integer", + "description": "InstalledSize is the total size of installed files in kilobytes" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides are the virtual packages provided by this package (allows other packages to depend on capabilities. Can include versioned provides like \"libdigest-md5-perl (= 2.55.01)\")" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the packages required for this package to function (will not be installed unless these requirements are met, creates strict ordering constraint)" + }, + "preDepends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "PreDepends are the packages that must be installed and configured BEFORE even starting installation of this package (stronger than Depends, discouraged unless absolutely necessary as it adds strict constraints for apt)" + }, + "files": { + "items": { + "$ref": "#/$defs/DpkgFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package" + } + }, + "type": "object", + "required": [ + "package", + "source", + "version", + "sourceVersion", + "architecture", + "maintainer", + "installedSize", + "files" + ], + "description": "DpkgDBEntry represents all captured data for a Debian package DB entry; available fields are described at http://manpages.ubuntu.com/manpages/xenial/man1/dpkg-query.1.html in the --showformat section." + }, + "DpkgFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest is the file content hash (typically MD5 for dpkg compatibility with legacy systems)" + }, + "isConfigFile": { + "type": "boolean", + "description": "IsConfigFile is whether this file is marked as a configuration file (dpkg will preserve user modifications during upgrades)" + } + }, + "type": "object", + "required": [ + "path", + "isConfigFile" + ], + "description": "DpkgFileRecord represents a single file attributed to a debian package." + }, + "ELFSecurityFeatures": { + "properties": { + "symbolTableStripped": { + "type": "boolean", + "description": "SymbolTableStripped indicates whether debugging symbols have been removed." + }, + "stackCanary": { + "type": "boolean", + "description": "StackCanary indicates whether stack smashing protection is enabled." + }, + "nx": { + "type": "boolean", + "description": "NoExecutable indicates whether NX (no-execute) protection is enabled for the stack." + }, + "relRO": { + "type": "string", + "description": "RelocationReadOnly indicates the RELRO protection level." + }, + "pie": { + "type": "boolean", + "description": "PositionIndependentExecutable indicates whether the binary is compiled as PIE." + }, + "dso": { + "type": "boolean", + "description": "DynamicSharedObject indicates whether the binary is a shared library." + }, + "safeStack": { + "type": "boolean", + "description": "LlvmSafeStack represents a compiler-based security mechanism that separates the stack into a safe stack for storing return addresses and other critical data, and an unsafe stack for everything else, to mitigate stack-based memory corruption errors\nsee https://clang.llvm.org/docs/SafeStack.html" + }, + "cfi": { + "type": "boolean", + "description": "ControlFlowIntegrity represents runtime checks to ensure a program's control flow adheres to the legal paths determined at compile time, thus protecting against various types of control-flow hijacking attacks\nsee https://clang.llvm.org/docs/ControlFlowIntegrity.html" + }, + "fortify": { + "type": "boolean", + "description": "ClangFortifySource is a broad suite of extensions to libc aimed at catching misuses of common library functions\nsee https://android.googlesource.com/platform//bionic/+/d192dbecf0b2a371eb127c0871f77a9caf81c4d2/docs/clang_fortify_anatomy.md" + } + }, + "type": "object", + "required": [ + "symbolTableStripped", + "nx", + "relRO", + "pie", + "dso" + ], + "description": "ELFSecurityFeatures captures security hardening and protection mechanisms in ELF binaries." + }, + "ElfBinaryPackageNoteJsonPayload": { + "properties": { + "type": { + "type": "string", + "description": "Type is the type of the package (e.g. \"rpm\", \"deb\", \"apk\", etc.)" + }, + "architecture": { + "type": "string", + "description": "Architecture of the binary package (e.g. \"amd64\", \"arm\", etc.)" + }, + "osCPE": { + "type": "string", + "description": "OSCPE is a CPE name for the OS, typically corresponding to CPE_NAME in os-release (e.g. cpe:/o:fedoraproject:fedora:33)\n\nDeprecated: in Syft 2.0 the struct tag will be corrected to `osCpe` to match the systemd spec casing." + }, + "appCpe": { + "type": "string", + "description": "AppCpe is a CPE name for the upstream Application, as found in NVD CPE search (e.g. cpe:2.3:a:gnu:coreutils:5.0)" + }, + "os": { + "type": "string", + "description": "OS is the OS name, typically corresponding to ID in os-release (e.g. \"fedora\")" + }, + "osVersion": { + "type": "string", + "description": "osVersion is the version of the OS, typically corresponding to VERSION_ID in os-release (e.g. \"33\")" + }, + "system": { + "type": "string", + "description": "System is a context-specific name for the system that the binary package is intended to run on or a part of" + }, + "vendor": { + "type": "string", + "description": "Vendor is the individual or organization that produced the source code for the binary" + }, + "sourceRepo": { + "type": "string", + "description": "SourceRepo is the URL to the source repository for which the binary was built from" + }, + "commit": { + "type": "string", + "description": "Commit is the commit hash of the source repository for which the binary was built from" + } + }, + "type": "object", + "description": "ELFBinaryPackageNoteJSONPayload Represents metadata captured from the .note.package section of an ELF-formatted binary" + }, + "ElixirMixLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the mix.lock file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the mix.lock file" + }, + "pkgHash": { + "type": "string", + "description": "PkgHash is the outer checksum (SHA-256) of the entire Hex package tarball for integrity verification (preferred method, replaces deprecated inner checksum)" + }, + "pkgHashExt": { + "type": "string", + "description": "PkgHashExt is the extended package hash format (inner checksum is deprecated - SHA-256 of concatenated file contents excluding CHECKSUM file, now replaced by outer checksum)" + } + }, + "type": "object", + "required": [ + "name", + "version", + "pkgHash", + "pkgHashExt" + ], + "description": "ElixirMixLockEntry is a struct that represents a single entry in a mix.lock file" + }, + "ErlangRebarLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the rebar.lock file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the rebar.lock file" + }, + "pkgHash": { + "type": "string", + "description": "PkgHash is the outer checksum (SHA-256) of the entire Hex package tarball for integrity verification (preferred method over deprecated inner checksum)" + }, + "pkgHashExt": { + "type": "string", + "description": "PkgHashExt is the extended package hash format (inner checksum deprecated - was SHA-256 of concatenated file contents)" + } + }, + "type": "object", + "required": [ + "name", + "version", + "pkgHash", + "pkgHashExt" + ], + "description": "ErlangRebarLockEntry represents a single package entry from the \"deps\" section within an Erlang rebar.lock file." + }, + "Executable": { + "properties": { + "format": { + "type": "string", + "description": "Format denotes either ELF, Mach-O, or PE" + }, + "hasExports": { + "type": "boolean", + "description": "HasExports indicates whether the binary exports symbols." + }, + "hasEntrypoint": { + "type": "boolean", + "description": "HasEntrypoint indicates whether the binary has an entry point function." + }, + "importedLibraries": { + "items": { + "type": "string" + }, + "type": "array", + "description": "ImportedLibraries lists the shared libraries required by this executable." + }, + "elfSecurityFeatures": { + "$ref": "#/$defs/ELFSecurityFeatures", + "description": "ELFSecurityFeatures contains ELF-specific security hardening information when Format is ELF." + } + }, + "type": "object", + "required": [ + "format", + "hasExports", + "hasEntrypoint", + "importedLibraries" + ], + "description": "Executable contains metadata about binary files and their security features." + }, + "File": { + "properties": { + "id": { + "type": "string", + "description": "ID is a unique identifier for this file within the SBOM." + }, + "location": { + "$ref": "#/$defs/Coordinates", + "description": "Location is the file path and layer information where this file was found." + }, + "metadata": { + "$ref": "#/$defs/FileMetadataEntry", + "description": "Metadata contains filesystem metadata such as permissions, ownership, and file type." + }, + "contents": { + "type": "string", + "description": "Contents is the file contents for small files." + }, + "digests": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array", + "description": "Digests contains cryptographic hashes of the file contents." + }, + "licenses": { + "items": { + "$ref": "#/$defs/FileLicense" + }, + "type": "array", + "description": "Licenses contains license information discovered within this file." + }, + "executable": { + "$ref": "#/$defs/Executable", + "description": "Executable contains executable metadata if this file is a binary." + }, + "unknowns": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Unknowns contains unknown fields for forward compatibility." + } + }, + "type": "object", + "required": [ + "id", + "location" + ], + "description": "File represents a file discovered during cataloging with its metadata, content digests, licenses, and relationships to packages." + }, + "FileLicense": { + "properties": { + "value": { + "type": "string", + "description": "Value is the raw license identifier or text as found in the file." + }, + "spdxExpression": { + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." + }, + "type": { + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." + }, + "evidence": { + "$ref": "#/$defs/FileLicenseEvidence", + "description": "Evidence contains supporting evidence for this license detection." + } + }, + "type": "object", + "required": [ + "value", + "spdxExpression", + "type" + ], + "description": "FileLicense represents license information discovered within a file's contents or metadata, including the matched license text and SPDX expression." + }, + "FileLicenseEvidence": { + "properties": { + "confidence": { + "type": "integer", + "description": "Confidence is the confidence score for this license detection (0-100)." + }, + "offset": { + "type": "integer", + "description": "Offset is the byte offset where the license text starts in the file." + }, + "extent": { + "type": "integer", + "description": "Extent is the length of the license text in bytes." + } + }, + "type": "object", + "required": [ + "confidence", + "offset", + "extent" + ], + "description": "FileLicenseEvidence contains supporting evidence for a license detection in a file, including the byte offset, extent, and confidence level." + }, + "FileMetadataEntry": { + "properties": { + "mode": { + "type": "integer", + "description": "Mode is the Unix file permission mode in octal format." + }, + "type": { + "type": "string", + "description": "Type is the file type (e.g., \"RegularFile\", \"Directory\", \"SymbolicLink\")." + }, + "linkDestination": { + "type": "string", + "description": "LinkDestination is the target path for symbolic links." + }, + "userID": { + "type": "integer", + "description": "UserID is the file owner user ID." + }, + "groupID": { + "type": "integer", + "description": "GroupID is the file owner group ID." + }, + "mimeType": { + "type": "string", + "description": "MIMEType is the MIME type of the file contents." + }, + "size": { + "type": "integer", + "description": "Size is the file size in bytes." + } + }, + "type": "object", + "required": [ + "mode", + "type", + "userID", + "groupID", + "mimeType", + "size" + ], + "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file." + }, + "GgufFileHeader": { + "properties": { + "ggufVersion": { + "type": "integer", + "description": "GGUFVersion is the GGUF format version (e.g., 3)" + }, + "fileSize": { + "type": "integer", + "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)" + }, + "architecture": { + "type": "string", + "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")" + }, + "quantization": { + "type": "string", + "description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")" + }, + "parameters": { + "type": "integer", + "description": "Parameters is the number of model parameters (if present in header)" + }, + "tensorCount": { + "type": "integer", + "description": "TensorCount is the number of tensors in the model" + }, + "header": { + "type": "object", + "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication." + }, + "metadataHash": { + "type": "string", + "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames." + }, + "parts": { + "items": { + "$ref": "#/$defs/GgufFileHeader" + }, + "type": "array", + "description": "Parts contains headers from additional GGUF files that were merged\ninto this package during post-processing (e.g., from OCI layers without model names)." + } + }, + "type": "object", + "required": [ + "ggufVersion", + "tensorCount" + ], + "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file." + }, + "GithubActionsUseStatement": { + "properties": { + "value": { + "type": "string", + "description": "Value is the action reference (e.g. \"actions/checkout@v3\")" + }, + "comment": { + "type": "string", + "description": "Comment is the inline comment associated with this uses statement" + } + }, + "type": "object", + "required": [ + "value" + ], + "description": "GitHubActionsUseStatement represents a single 'uses' statement in a GitHub Actions workflow file referencing an action or reusable workflow." + }, + "GoModuleBuildinfoEntry": { + "properties": { + "goBuildSettings": { + "$ref": "#/$defs/KeyValues", + "description": "BuildSettings contains the Go build settings and flags used to compile the binary (e.g., GOARCH, GOOS, CGO_ENABLED)." + }, + "goCompiledVersion": { + "type": "string", + "description": "GoCompiledVersion is the version of Go used to compile the binary." + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture for the binary (extracted from GOARCH build setting)." + }, + "h1Digest": { + "type": "string", + "description": "H1Digest is the Go module hash in h1: format for the main module from go.sum." + }, + "mainModule": { + "type": "string", + "description": "MainModule is the main module path for the binary (e.g., \"github.com/anchore/syft\")." + }, + "goCryptoSettings": { + "items": { + "type": "string" + }, + "type": "array", + "description": "GoCryptoSettings contains FIPS and cryptographic configuration settings if present." + }, + "goExperiments": { + "items": { + "type": "string" + }, + "type": "array", + "description": "GoExperiments lists experimental Go features enabled during compilation (e.g., \"arenas\", \"cgocheck2\")." + } + }, + "type": "object", + "required": [ + "goCompiledVersion", + "architecture" + ], + "description": "GolangBinaryBuildinfoEntry represents all captured data for a Golang binary" + }, + "GoModuleEntry": { + "properties": { + "h1Digest": { + "type": "string", + "description": "H1Digest is the Go module hash in h1: format from go.sum for verifying module contents." + } + }, + "type": "object", + "description": "GolangModuleEntry represents all captured data for a Golang source scan with go.mod/go.sum" + }, + "GoSourceEntry": { + "properties": { + "h1Digest": { + "type": "string", + "description": "H1Digest is the Go module hash in h1: format from go.sum for verifying module contents." + }, + "os": { + "type": "string", + "description": "OperatingSystem is the target OS for build constraints (e.g., \"linux\", \"darwin\", \"windows\")." + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture for build constraints (e.g., \"amd64\", \"arm64\")." + }, + "buildTags": { + "type": "string", + "description": "BuildTags are the build tags used to conditionally compile code (e.g., \"integration,debug\")." + }, + "cgoEnabled": { + "type": "boolean", + "description": "CgoEnabled indicates whether CGO was enabled for this package." + } + }, + "type": "object", + "required": [ + "cgoEnabled" + ], + "description": "GolangSourceEntry represents all captured data for a Golang package found through source analysis" + }, + "HaskellHackageStackEntry": { + "properties": { + "pkgHash": { + "type": "string", + "description": "PkgHash is the package content hash for verification" + } + }, + "type": "object", + "description": "HackageStackYamlEntry represents a single entry from the \"extra-deps\" section of a stack.yaml file." + }, + "HaskellHackageStackLockEntry": { + "properties": { + "pkgHash": { + "type": "string", + "description": "PkgHash is the package content hash for verification" + }, + "snapshotURL": { + "type": "string", + "description": "SnapshotURL is the URL to the Stack snapshot this package came from" + } + }, + "type": "object", + "description": "HackageStackYamlLockEntry represents a single entry from the \"packages\" section of a stack.yaml.lock file." + }, + "HomebrewFormula": { + "properties": { + "tap": { + "type": "string", + "description": "Tap is Homebrew tap this formula belongs to (e.g. \"homebrew/core\")" + }, + "homepage": { + "type": "string", + "description": "Homepage is the upstream project homepage URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable formula description" + } + }, + "type": "object", + "description": "HomebrewFormula represents metadata about a Homebrew formula package extracted from formula JSON files." + }, + "IDLikes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "IDLikes represents a list of distribution IDs that this Linux distribution is similar to or derived from, as defined in os-release ID_LIKE field." + }, + "JavaArchive": { + "properties": { + "virtualPath": { + "type": "string", + "description": "VirtualPath is path within the archive hierarchy, where nested entries are delimited with ':' (for nested JARs)" + }, + "manifest": { + "$ref": "#/$defs/JavaManifest", + "description": "Manifest is parsed META-INF/MANIFEST.MF contents" + }, + "pomProperties": { + "$ref": "#/$defs/JavaPomProperties", + "description": "PomProperties is parsed pom.properties file contents" + }, + "pomProject": { + "$ref": "#/$defs/JavaPomProject", + "description": "PomProject is parsed pom.xml file contents" + }, + "digest": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array", + "description": "ArchiveDigests is cryptographic hashes of the archive file" + } + }, + "type": "object", + "required": [ + "virtualPath" + ], + "description": "JavaArchive encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship." + }, + "JavaJvmInstallation": { + "properties": { + "release": { + "$ref": "#/$defs/JavaVMRelease", + "description": "Release is JVM release information and version details" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files are the list of files that are part of this JVM installation" + } + }, + "type": "object", + "required": [ + "release", + "files" + ], + "description": "JavaVMInstallation represents a Java Virtual Machine installation discovered on the system with its release information and file list." + }, + "JavaManifest": { + "properties": { + "main": { + "$ref": "#/$defs/KeyValues", + "description": "Main is main manifest attributes as key-value pairs" + }, + "sections": { + "items": { + "$ref": "#/$defs/KeyValues" + }, + "type": "array", + "description": "Sections are the named sections from the manifest (e.g. per-entry attributes)" + } + }, + "type": "object", + "description": "JavaManifest represents the fields of interest extracted from a Java archive's META-INF/MANIFEST.MF file." + }, + "JavaPomParent": { + "properties": { + "groupId": { + "type": "string", + "description": "GroupID is the parent Maven group identifier" + }, + "artifactId": { + "type": "string", + "description": "ArtifactID is the parent Maven artifact identifier" + }, + "version": { + "type": "string", + "description": "Version is the parent version (child inherits configuration from this specific version of parent POM)" + } + }, + "type": "object", + "required": [ + "groupId", + "artifactId", + "version" + ], + "description": "JavaPomParent contains the fields within the \u003cparent\u003e tag in a pom.xml file" + }, + "JavaPomProject": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the pom.xml file within the archive" + }, + "parent": { + "$ref": "#/$defs/JavaPomParent", + "description": "Parent is the parent POM reference for inheritance (child POMs inherit configuration from parent)" + }, + "groupId": { + "type": "string", + "description": "GroupID is Maven group identifier (reversed domain name like org.apache.maven)" + }, + "artifactId": { + "type": "string", + "description": "ArtifactID is Maven artifact identifier (project name)" + }, + "version": { + "type": "string", + "description": "Version is project version (together with groupId and artifactId forms Maven coordinates groupId:artifactId:version)" + }, + "name": { + "type": "string", + "description": "Name is a human-readable project name (displayed in Maven-generated documentation)" + }, + "description": { + "type": "string", + "description": "Description is detailed project description" + }, + "url": { + "type": "string", + "description": "URL is the project URL (typically project website or repository)" + } + }, + "type": "object", + "required": [ + "path", + "groupId", + "artifactId", + "version", + "name" + ], + "description": "JavaPomProject represents fields of interest extracted from a Java archive's pom.xml file." + }, + "JavaPomProperties": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the pom.properties file within the archive" + }, + "name": { + "type": "string", + "description": "Name is the project name" + }, + "groupId": { + "type": "string", + "description": "GroupID is Maven group identifier uniquely identifying the project across all projects (follows reversed domain name convention like com.company.project)" + }, + "artifactId": { + "type": "string", + "description": "ArtifactID is Maven artifact identifier, the name of the jar/artifact (unique within the groupId scope)" + }, + "version": { + "type": "string", + "description": "Version is artifact version" + }, + "scope": { + "type": "string", + "description": "Scope is dependency scope determining when dependency is available (compile=default all phases, test=test compilation/execution only, runtime=runtime and test not compile, provided=expected from JDK or container)" + }, + "extraFields": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Extra is additional custom properties not in standard Maven coordinates" + } + }, + "type": "object", + "required": [ + "path", + "name", + "groupId", + "artifactId", + "version" + ], + "description": "JavaPomProperties represents the fields of interest extracted from a Java archive's pom.properties file." + }, + "JavaVMRelease": { + "properties": { + "implementor": { + "type": "string", + "description": "Implementor is extracted with the `java.vendor` JVM property" + }, + "implementorVersion": { + "type": "string", + "description": "ImplementorVersion is extracted with the `java.vendor.version` JVM property" + }, + "javaRuntimeVersion": { + "type": "string", + "description": "JavaRuntimeVersion is extracted from the 'java.runtime.version' JVM property" + }, + "javaVersion": { + "type": "string", + "description": "JavaVersion matches that from `java -version` command output" + }, + "javaVersionDate": { + "type": "string", + "description": "JavaVersionDate is extracted from the 'java.version.date' JVM property" + }, + "libc": { + "type": "string", + "description": "Libc can either be 'glibc' or 'musl'" + }, + "modules": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Modules is a list of JVM modules that are packaged" + }, + "osArch": { + "type": "string", + "description": "OsArch is the target CPU architecture" + }, + "osName": { + "type": "string", + "description": "OsName is the name of the target runtime operating system environment" + }, + "osVersion": { + "type": "string", + "description": "OsVersion is the version of the target runtime operating system environment" + }, + "source": { + "type": "string", + "description": "Source refers to the origin repository of OpenJDK source" + }, + "buildSource": { + "type": "string", + "description": "BuildSource Git SHA of the build repository" + }, + "buildSourceRepo": { + "type": "string", + "description": "BuildSourceRepo refers to rhe repository URL for the build source" + }, + "sourceRepo": { + "type": "string", + "description": "SourceRepo refers to the OpenJDK repository URL" + }, + "fullVersion": { + "type": "string", + "description": "FullVersion is extracted from the 'java.runtime.version' JVM property" + }, + "semanticVersion": { + "type": "string", + "description": "SemanticVersion is derived from the OpenJDK version" + }, + "buildInfo": { + "type": "string", + "description": "BuildInfo contains additional build information" + }, + "jvmVariant": { + "type": "string", + "description": "JvmVariant specifies the JVM variant (e.g., Hotspot or OpenJ9)" + }, + "jvmVersion": { + "type": "string", + "description": "JvmVersion is extracted from the 'java.vm.version' JVM property" + }, + "imageType": { + "type": "string", + "description": "ImageType can be 'JDK' or 'JRE'" + }, + "buildType": { + "type": "string", + "description": "BuildType can be 'commercial' (used in some older oracle JDK distributions)" + } + }, + "type": "object", + "description": "JavaVMRelease represents JVM version and build information extracted from the release file in a Java installation." + }, + "JavascriptNpmPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in package.json" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in package.json" + }, + "author": { + "type": "string", + "description": "Author is package author name" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "url": { + "type": "string", + "description": "URL is repository or project URL" + }, + "private": { + "type": "boolean", + "description": "Private is whether this is a private package" + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "homepage", + "description", + "url", + "private" + ], + "description": "NpmPackage represents the contents of a javascript package.json file." + }, + "JavascriptNpmPackageLockEntry": { + "properties": { + "resolved": { + "type": "string", + "description": "Resolved is URL where this package was downloaded from (registry source)" + }, + "integrity": { + "type": "string", + "description": "Integrity is Subresource Integrity hash for verification using standard SRI format (sha512-... or sha1-...). npm changed from SHA-1 to SHA-512 in newer versions. For registry sources this is the integrity from registry, for remote tarballs it's SHA-512 of the file. npm verifies tarball matches this hash before unpacking, throwing EINTEGRITY error if mismatch detected." + }, + "dependencies": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Dependencies is a map of dependencies and their version markers, i.e. \"lodash\": \"^1.0.0\"" + } + }, + "type": "object", + "required": [ + "resolved", + "integrity", + "dependencies" + ], + "description": "NpmPackageLockEntry represents a single entry within the \"packages\" section of a package-lock.json file." + }, + "JavascriptPnpmLockEntry": { + "properties": { + "resolution": { + "$ref": "#/$defs/PnpmLockResolution", + "description": "Resolution is the resolution information for the package" + }, + "dependencies": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Dependencies is a map of dependencies and their versions" + } + }, + "type": "object", + "required": [ + "resolution", + "dependencies" + ], + "description": "PnpmLockEntry represents a single entry in the \"packages\" section of a pnpm-lock.yaml file." + }, + "JavascriptYarnLockEntry": { + "properties": { + "resolved": { + "type": "string", + "description": "Resolved is URL where this package was downloaded from" + }, + "integrity": { + "type": "string", + "description": "Integrity is Subresource Integrity hash for verification (SRI format)" + }, + "dependencies": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Dependencies is a map of dependencies and their versions" + } + }, + "type": "object", + "required": [ + "resolved", + "integrity", + "dependencies" + ], + "description": "YarnLockEntry represents a single entry section of a yarn.lock file." + }, + "KeyValue": { + "properties": { + "key": { + "type": "string", + "description": "Key is the key name" + }, + "value": { + "type": "string", + "description": "Value is the value associated with the key" + } + }, + "type": "object", + "required": [ + "key", + "value" + ], + "description": "KeyValue represents a single key-value pair." + }, + "KeyValues": { + "items": { + "$ref": "#/$defs/KeyValue" + }, + "type": "array", + "description": "KeyValues represents an ordered collection of key-value pairs that preserves insertion order." + }, + "License": { + "properties": { + "value": { + "type": "string", + "description": "Value is the raw license identifier or expression as found." + }, + "spdxExpression": { + "type": "string", + "description": "SPDXExpression is the parsed SPDX license expression." + }, + "type": { + "type": "string", + "description": "Type is the license type classification (e.g., declared, concluded, discovered)." + }, + "urls": { + "items": { + "type": "string" + }, + "type": "array", + "description": "URLs are URLs where license text or information can be found." + }, + "locations": { + "items": { + "$ref": "#/$defs/Location" + }, + "type": "array", + "description": "Locations are file locations where this license was discovered." + }, + "contents": { + "type": "string", + "description": "Contents is the full license text content." + } + }, + "type": "object", + "required": [ + "value", + "spdxExpression", + "type", + "urls", + "locations" + ], + "description": "License represents software license information discovered for a package, including SPDX expressions and supporting evidence locations." + }, + "LinuxKernelArchive": { + "properties": { + "name": { + "type": "string", + "description": "Name is kernel name (typically \"Linux\")" + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture" + }, + "version": { + "type": "string", + "description": "Version is kernel version string" + }, + "extendedVersion": { + "type": "string", + "description": "ExtendedVersion is additional version information" + }, + "buildTime": { + "type": "string", + "description": "BuildTime is when the kernel was built" + }, + "author": { + "type": "string", + "description": "Author is who built the kernel" + }, + "format": { + "type": "string", + "description": "Format is kernel image format (e.g. bzImage, zImage)" + }, + "rwRootFS": { + "type": "boolean", + "description": "RWRootFS is whether root filesystem is mounted read-write" + }, + "swapDevice": { + "type": "integer", + "description": "SwapDevice is swap device number" + }, + "rootDevice": { + "type": "integer", + "description": "RootDevice is root device number" + }, + "videoMode": { + "type": "string", + "description": "VideoMode is default video mode setting" + } + }, + "type": "object", + "required": [ + "name", + "architecture", + "version" + ], + "description": "LinuxKernel represents all captured data for a Linux kernel" + }, + "LinuxKernelModule": { + "properties": { + "name": { + "type": "string", + "description": "Name is module name" + }, + "version": { + "type": "string", + "description": "Version is module version string" + }, + "sourceVersion": { + "type": "string", + "description": "SourceVersion is the source code version identifier" + }, + "path": { + "type": "string", + "description": "Path is the filesystem path to the .ko kernel object file (absolute path)" + }, + "description": { + "type": "string", + "description": "Description is a human-readable module description" + }, + "author": { + "type": "string", + "description": "Author is module author name and email" + }, + "license": { + "type": "string", + "description": "License is module license (e.g. GPL, BSD) which must be compatible with kernel" + }, + "kernelVersion": { + "type": "string", + "description": "KernelVersion is kernel version this module was built for" + }, + "versionMagic": { + "type": "string", + "description": "VersionMagic is version magic string for compatibility checking (includes kernel version, SMP status, module loading capabilities like \"3.17.4-302.fc21.x86_64 SMP mod_unload modversions\"). Module will NOT load if vermagic doesn't match running kernel." + }, + "parameters": { + "additionalProperties": { + "$ref": "#/$defs/LinuxKernelModuleParameter" + }, + "type": "object", + "description": "Parameters are the module parameters that can be configured at load time (user-settable values like module options)" + } + }, + "type": "object", + "description": "LinuxKernelModule represents a loadable kernel module (.ko file) with its metadata, parameters, and dependencies." + }, + "LinuxKernelModuleParameter": { + "properties": { + "type": { + "type": "string", + "description": "Type is parameter data type (e.g. int, string, bool, array types)" + }, + "description": { + "type": "string", + "description": "Description is a human-readable parameter description explaining what the parameter controls" + } + }, + "type": "object", + "description": "LinuxKernelModuleParameter represents a configurable parameter for a kernel module with its type and description." + }, + "LinuxRelease": { + "properties": { + "prettyName": { + "type": "string", + "description": "PrettyName is a human-readable operating system name with version." + }, + "name": { + "type": "string", + "description": "Name is the operating system name without version information." + }, + "id": { + "type": "string", + "description": "ID is the lower-case operating system identifier (e.g., \"ubuntu\", \"rhel\")." + }, + "idLike": { + "$ref": "#/$defs/IDLikes", + "description": "IDLike is a list of operating system IDs this distribution is similar to or derived from." + }, + "version": { + "type": "string", + "description": "Version is the operating system version including codename if available." + }, + "versionID": { + "type": "string", + "description": "VersionID is the operating system version number or identifier." + }, + "versionCodename": { + "type": "string", + "description": "VersionCodename is the operating system release codename (e.g., \"jammy\", \"bullseye\")." + }, + "buildID": { + "type": "string", + "description": "BuildID is a build identifier for the operating system." + }, + "imageID": { + "type": "string", + "description": "ImageID is an identifier for container or cloud images." + }, + "imageVersion": { + "type": "string", + "description": "ImageVersion is the version for container or cloud images." + }, + "variant": { + "type": "string", + "description": "Variant is the operating system variant name (e.g., \"Server\", \"Workstation\")." + }, + "variantID": { + "type": "string", + "description": "VariantID is the lower-case operating system variant identifier." + }, + "homeURL": { + "type": "string", + "description": "HomeURL is the homepage URL for the operating system." + }, + "supportURL": { + "type": "string", + "description": "SupportURL is the support or help URL for the operating system." + }, + "bugReportURL": { + "type": "string", + "description": "BugReportURL is the bug reporting URL for the operating system." + }, + "privacyPolicyURL": { + "type": "string", + "description": "PrivacyPolicyURL is the privacy policy URL for the operating system." + }, + "cpeName": { + "type": "string", + "description": "CPEName is the Common Platform Enumeration name for the operating system." + }, + "supportEnd": { + "type": "string", + "description": "SupportEnd is the end of support date or version identifier." + }, + "extendedSupport": { + "type": "boolean", + "description": "ExtendedSupport indicates whether extended security or support is available." + } + }, + "type": "object", + "description": "LinuxRelease contains Linux distribution identification and version information extracted from /etc/os-release or similar system files." + }, + "Location": { + "properties": { + "path": { + "type": "string", + "description": "RealPath is the canonical absolute form of the path accessed (all symbolic links have been followed and relative path components like '.' and '..' have been removed)." + }, + "layerID": { + "type": "string", + "description": "FileSystemID is an ID representing and entire filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank." + }, + "accessPath": { + "type": "string", + "description": "AccessPath is the path used to retrieve file contents (which may or may not have hardlinks / symlinks in the path)" + }, + "annotations": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Arbitrary key-value pairs that can be used to annotate a location" + } + }, + "type": "object", + "required": [ + "path", + "accessPath" + ], + "description": "Location represents a path relative to a particular filesystem resolved to a specific file.Reference." + }, + "LuarocksPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the .rockspec file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the .rockspec file" + }, + "license": { + "type": "string", + "description": "License is license identifier" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "url": { + "type": "string", + "description": "URL is the source download URL" + }, + "dependencies": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Dependencies are the map of dependency names to version constraints" + } + }, + "type": "object", + "required": [ + "name", + "version", + "license", + "homepage", + "description", + "url", + "dependencies" + ], + "description": "LuaRocksPackage represents a Lua package managed by the LuaRocks package manager with metadata from .rockspec files." + }, + "MicrosoftKbPatch": { + "properties": { + "product_id": { + "type": "string", + "description": "ProductID is MSRC Product ID (e.g. \"Windows 10 Version 1703 for 32-bit Systems\")" + }, + "kb": { + "type": "string", + "description": "Kb is Knowledge Base article number (e.g. \"5001028\")" + } + }, + "type": "object", + "required": [ + "product_id", + "kb" + ], + "description": "MicrosoftKbPatch represents a Windows Knowledge Base patch identifier associated with a specific Microsoft product from the MSRC (Microsoft Security Response Center)." + }, + "NixDerivation": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the .drv file in Nix store" + }, + "system": { + "type": "string", + "description": "System is target system string indicating where derivation can be built (e.g. \"x86_64-linux\", \"aarch64-darwin\"). Must match current system for local builds." + }, + "inputDerivations": { + "items": { + "$ref": "#/$defs/NixDerivationReference" + }, + "type": "array", + "description": "InputDerivations are the list of other derivations that were inputs to this build (dependencies)" + }, + "inputSources": { + "items": { + "type": "string" + }, + "type": "array", + "description": "InputSources are the list of source file paths that were inputs to this build" + } + }, + "type": "object", + "description": "NixDerivation represents a Nix .drv file that describes how to build a package including inputs, outputs, and build instructions." + }, + "NixDerivationReference": { + "properties": { + "path": { + "type": "string", + "description": "Path is path to the referenced .drv file" + }, + "outputs": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Outputs are which outputs of the referenced derivation were used (e.g. [\"out\"], [\"bin\", \"dev\"])" + } + }, + "type": "object", + "description": "NixDerivationReference represents a reference to another derivation used as a build input or runtime dependency." + }, + "NixStoreEntry": { + "properties": { + "path": { + "type": "string", + "description": "Path is full store path for this output (e.g. /nix/store/abc123...-package-1.0)" + }, + "output": { + "type": "string", + "description": "Output is the specific output name for multi-output packages (empty string for default \"out\" output, can be \"bin\", \"dev\", \"doc\", etc.)" + }, + "outputHash": { + "type": "string", + "description": "OutputHash is hash prefix of the store path basename (first part before the dash)" + }, + "derivation": { + "$ref": "#/$defs/NixDerivation", + "description": "Derivation is information about the .drv file that describes how this package was built" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files are the list of files under the nix/store path for this package" + } + }, + "type": "object", + "required": [ + "outputHash" + ], + "description": "NixStoreEntry represents a package in the Nix store (/nix/store) with its derivation information and metadata." + }, + "OpamPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the .opam file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the .opam file" + }, + "licenses": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Licenses are the list of applicable licenses" + }, + "url": { + "type": "string", + "description": "URL is download URL for the package source" + }, + "checksum": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Checksums are the list of checksums for verification" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the list of required dependencies" + } + }, + "type": "object", + "required": [ + "name", + "version", + "licenses", + "url", + "checksum", + "homepage", + "dependencies" + ], + "description": "OpamPackage represents an OCaml package managed by the OPAM package manager with metadata from .opam files." + }, + "Package": { + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "type": { + "type": "string" + }, + "foundBy": { + "type": "string" + }, + "locations": { + "items": { + "$ref": "#/$defs/Location" + }, + "type": "array" + }, + "licenses": { + "$ref": "#/$defs/licenses" + }, + "language": { + "type": "string" + }, + "cpes": { + "$ref": "#/$defs/cpes" + }, + "purl": { + "type": "string" + }, + "metadataType": { + "type": "string" + }, + "metadata": { + "anyOf": [ + { + "type": "null" + }, + { + "$ref": "#/$defs/AlpmDbEntry" + }, + { + "$ref": "#/$defs/ApkDbEntry" + }, + { + "$ref": "#/$defs/BinarySignature" + }, + { + "$ref": "#/$defs/BitnamiSbomEntry" + }, + { + "$ref": "#/$defs/CConanFileEntry" + }, + { + "$ref": "#/$defs/CConanInfoEntry" + }, + { + "$ref": "#/$defs/CConanLockEntry" + }, + { + "$ref": "#/$defs/CConanLockV2Entry" + }, + { + "$ref": "#/$defs/CocoaPodfileLockEntry" + }, + { + "$ref": "#/$defs/CondaMetadataEntry" + }, + { + "$ref": "#/$defs/DartPubspec" + }, + { + "$ref": "#/$defs/DartPubspecLockEntry" + }, + { + "$ref": "#/$defs/DotnetDepsEntry" + }, + { + "$ref": "#/$defs/DotnetPackagesLockEntry" + }, + { + "$ref": "#/$defs/DotnetPortableExecutableEntry" + }, + { + "$ref": "#/$defs/DpkgArchiveEntry" + }, + { + "$ref": "#/$defs/DpkgDbEntry" + }, + { + "$ref": "#/$defs/ElfBinaryPackageNoteJsonPayload" + }, + { + "$ref": "#/$defs/ElixirMixLockEntry" + }, + { + "$ref": "#/$defs/ErlangRebarLockEntry" + }, + { + "$ref": "#/$defs/GgufFileHeader" + }, + { + "$ref": "#/$defs/GithubActionsUseStatement" + }, + { + "$ref": "#/$defs/GoModuleBuildinfoEntry" + }, + { + "$ref": "#/$defs/GoModuleEntry" + }, + { + "$ref": "#/$defs/GoSourceEntry" + }, + { + "$ref": "#/$defs/HaskellHackageStackEntry" + }, + { + "$ref": "#/$defs/HaskellHackageStackLockEntry" + }, + { + "$ref": "#/$defs/HomebrewFormula" + }, + { + "$ref": "#/$defs/JavaArchive" + }, + { + "$ref": "#/$defs/JavaJvmInstallation" + }, + { + "$ref": "#/$defs/JavascriptNpmPackage" + }, + { + "$ref": "#/$defs/JavascriptNpmPackageLockEntry" + }, + { + "$ref": "#/$defs/JavascriptPnpmLockEntry" + }, + { + "$ref": "#/$defs/JavascriptYarnLockEntry" + }, + { + "$ref": "#/$defs/LinuxKernelArchive" + }, + { + "$ref": "#/$defs/LinuxKernelModule" + }, + { + "$ref": "#/$defs/LuarocksPackage" + }, + { + "$ref": "#/$defs/MicrosoftKbPatch" + }, + { + "$ref": "#/$defs/NixStoreEntry" + }, + { + "$ref": "#/$defs/OpamPackage" + }, + { + "$ref": "#/$defs/PeBinary" + }, + { + "$ref": "#/$defs/PhpComposerInstalledEntry" + }, + { + "$ref": "#/$defs/PhpComposerLockEntry" + }, + { + "$ref": "#/$defs/PhpPearEntry" + }, + { + "$ref": "#/$defs/PhpPeclEntry" + }, + { + "$ref": "#/$defs/PortageDbEntry" + }, + { + "$ref": "#/$defs/PythonPackage" + }, + { + "$ref": "#/$defs/PythonPdmLockEntry" + }, + { + "$ref": "#/$defs/PythonPipRequirementsEntry" + }, + { + "$ref": "#/$defs/PythonPipfileLockEntry" + }, + { + "$ref": "#/$defs/PythonPoetryLockEntry" + }, + { + "$ref": "#/$defs/PythonUvLockEntry" + }, + { + "$ref": "#/$defs/RDescription" + }, + { + "$ref": "#/$defs/RpmArchive" + }, + { + "$ref": "#/$defs/RpmDbEntry" + }, + { + "$ref": "#/$defs/RubyGemspec" + }, + { + "$ref": "#/$defs/RustCargoAuditEntry" + }, + { + "$ref": "#/$defs/RustCargoLockEntry" + }, + { + "$ref": "#/$defs/SafetensorsMetadata" + }, + { + "$ref": "#/$defs/SnapEntry" + }, + { + "$ref": "#/$defs/SwiftPackageManagerLockEntry" + }, + { + "$ref": "#/$defs/SwiplpackPackage" + }, + { + "$ref": "#/$defs/TerraformLockProviderEntry" + }, + { + "$ref": "#/$defs/WordpressPluginEntry" + } + ] + } + }, + "type": "object", + "required": [ + "id", + "name", + "version", + "type", + "foundBy", + "locations", + "licenses", + "language", + "cpes", + "purl" + ], + "description": "Package represents a pkg.Package object specialized for JSON marshaling and unmarshalling." + }, + "PeBinary": { + "properties": { + "VersionResources": { + "$ref": "#/$defs/KeyValues", + "description": "VersionResources contains key-value pairs extracted from the PE file's version resource section (e.g., FileVersion, ProductName, CompanyName)." + } + }, + "type": "object", + "required": [ + "VersionResources" + ], + "description": "PEBinary represents metadata captured from a Portable Executable formatted binary (dll, exe, etc.)" + }, + "PhpComposerAuthors": { + "properties": { + "name": { + "type": "string", + "description": "Name is author's full name" + }, + "email": { + "type": "string", + "description": "Email is author's email address" + }, + "homepage": { + "type": "string", + "description": "Homepage is author's personal or company website" + } + }, + "type": "object", + "required": [ + "name" + ], + "description": "PhpComposerAuthors represents author information for a PHP Composer package from the authors field in composer.json." + }, + "PhpComposerExternalReference": { + "properties": { + "type": { + "type": "string", + "description": "Type is reference type (git for source VCS, zip/tar for dist archives)" + }, + "url": { + "type": "string", + "description": "URL is the URL to the resource (git repository URL or archive download URL)" + }, + "reference": { + "type": "string", + "description": "Reference is git commit hash or version tag for source, or archive version for dist" + }, + "shasum": { + "type": "string", + "description": "Shasum is SHA hash of the archive file for integrity verification (dist only)" + } + }, + "type": "object", + "required": [ + "type", + "url", + "reference" + ], + "description": "PhpComposerExternalReference represents source or distribution information for a PHP package, indicating where the package code is retrieved from." + }, + "PhpComposerInstalledEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is package name in vendor/package format (e.g. symfony/console)" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "source": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Source is the source repository information for development (typically git repo, used when passing --prefer-source). Originates from source code repository." + }, + "dist": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Dist is distribution archive information for production (typically zip/tar, default install method). Packaged version of released code." + }, + "require": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Require is runtime dependencies with version constraints (package will not install unless these requirements can be met)" + }, + "provide": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Provide is virtual packages/functionality provided by this package (allows other packages to depend on capabilities)" + }, + "require-dev": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "RequireDev is development-only dependencies (not installed in production, only when developing this package or running tests)" + }, + "suggest": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Suggest is optional but recommended dependencies (suggestions for packages that would extend functionality)" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of license identifiers (SPDX format)" + }, + "type": { + "type": "string", + "description": "Type is package type indicating purpose (library=reusable code, project=application, metapackage=aggregates dependencies, etc.)" + }, + "notification-url": { + "type": "string", + "description": "NotificationURL is the URL to notify when package is installed (for tracking/statistics)" + }, + "bin": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Bin is the list of binary/executable files that should be added to PATH" + }, + "authors": { + "items": { + "$ref": "#/$defs/PhpComposerAuthors" + }, + "type": "array", + "description": "Authors are the list of package authors with name/email/homepage" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "keywords": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Keywords are the list of keywords for package discovery/search" + }, + "time": { + "type": "string", + "description": "Time is timestamp when this package version was released" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "dist" + ], + "description": "PhpComposerInstalledEntry represents a single package entry from a composer v1/v2 \"installed.json\" files (very similar to composer.lock files)." + }, + "PhpComposerLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is package name in vendor/package format (e.g. symfony/console)" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "source": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Source is the source repository information for development (typically git repo, used when passing --prefer-source). Originates from source code repository." + }, + "dist": { + "$ref": "#/$defs/PhpComposerExternalReference", + "description": "Dist is distribution archive information for production (typically zip/tar, default install method). Packaged version of released code." + }, + "require": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Require is runtime dependencies with version constraints (package will not install unless these requirements can be met)" + }, + "provide": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Provide is virtual packages/functionality provided by this package (allows other packages to depend on capabilities)" + }, + "require-dev": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "RequireDev is development-only dependencies (not installed in production, only when developing this package or running tests)" + }, + "suggest": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "Suggest is optional but recommended dependencies (suggestions for packages that would extend functionality)" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of license identifiers (SPDX format)" + }, + "type": { + "type": "string", + "description": "Type is package type indicating purpose (library=reusable code, project=application, metapackage=aggregates dependencies, etc.)" + }, + "notification-url": { + "type": "string", + "description": "NotificationURL is the URL to notify when package is installed (for tracking/statistics)" + }, + "bin": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Bin is the list of binary/executable files that should be added to PATH" + }, + "authors": { + "items": { + "$ref": "#/$defs/PhpComposerAuthors" + }, + "type": "array", + "description": "Authors are the list of package authors with name/email/homepage" + }, + "description": { + "type": "string", + "description": "Description is a human-readable package description" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "keywords": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Keywords are the list of keywords for package discovery/search" + }, + "time": { + "type": "string", + "description": "Time is timestamp when this package version was released" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "dist" + ], + "description": "PhpComposerLockEntry represents a single package entry found from a composer.lock file." + }, + "PhpPearEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name" + }, + "channel": { + "type": "string", + "description": "Channel is PEAR channel this package is from" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of applicable licenses" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "PhpPearEntry represents a single package entry found within php pear metadata files." + }, + "PhpPeclEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name" + }, + "channel": { + "type": "string", + "description": "Channel is PEAR channel this package is from" + }, + "version": { + "type": "string", + "description": "Version is the package version" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array", + "description": "License is the list of applicable licenses" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "PhpPeclEntry represents a single package entry found within php pecl metadata files." + }, + "PnpmLockResolution": { + "properties": { + "integrity": { + "type": "string", + "description": "Integrity is Subresource Integrity hash for verification (SRI format)" + } + }, + "type": "object", + "required": [ + "integrity" + ], + "description": "PnpmLockResolution contains package resolution metadata from pnpm lockfiles, including the integrity hash used for verification." + }, + "PortageDbEntry": { + "properties": { + "installedSize": { + "type": "integer", + "description": "InstalledSize is total size of installed files in bytes" + }, + "licenses": { + "type": "string", + "description": "Licenses is license string which may be an expression (e.g. \"GPL-2 OR Apache-2.0\")" + }, + "files": { + "items": { + "$ref": "#/$defs/PortageFileRecord" + }, + "type": "array", + "description": "Files are the files installed by this package (tracked in CONTENTS file)" + } + }, + "type": "object", + "required": [ + "installedSize", + "files" + ], + "description": "PortageEntry represents a single package entry in the portage DB flat-file store." + }, + "PortageFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the file path relative to the filesystem root" + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest is file content hash (MD5 for regular files in CONTENTS format: \"obj filename md5hash mtime\")" + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "PortageFileRecord represents a single file attributed to a portage package." + }, + "PythonDirectURLOriginInfo": { + "properties": { + "url": { + "type": "string", + "description": "URL is the source URL from which the package was installed." + }, + "commitId": { + "type": "string", + "description": "CommitID is the VCS commit hash if installed from version control." + }, + "vcs": { + "type": "string", + "description": "VCS is the version control system type (e.g., \"git\", \"hg\")." + } + }, + "type": "object", + "required": [ + "url" + ], + "description": "PythonDirectURLOriginInfo represents installation source metadata from direct_url.json for packages installed from VCS or direct URLs." + }, + "PythonFileDigest": { + "properties": { + "algorithm": { + "type": "string", + "description": "Algorithm is the hash algorithm used (e.g., \"sha256\")." + }, + "value": { + "type": "string", + "description": "Value is the hex-encoded hash digest value." + } + }, + "type": "object", + "required": [ + "algorithm", + "value" + ], + "description": "PythonFileDigest represents the file metadata for a single file attributed to a python package." + }, + "PythonFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the installed file path from the RECORD file." + }, + "digest": { + "$ref": "#/$defs/PythonFileDigest", + "description": "Digest contains the hash algorithm and value for file integrity verification." + }, + "size": { + "type": "string", + "description": "Size is the file size in bytes as a string." + } + }, + "type": "object", + "required": [ + "path" + ], + "description": "PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package" + }, + "PythonPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name from the Name field in PKG-INFO or METADATA." + }, + "version": { + "type": "string", + "description": "Version is the package version from the Version field in PKG-INFO or METADATA." + }, + "author": { + "type": "string", + "description": "Author is the package author name from the Author field." + }, + "authorEmail": { + "type": "string", + "description": "AuthorEmail is the package author's email address from the Author-Email field." + }, + "platform": { + "type": "string", + "description": "Platform indicates the target platform for the package (e.g., \"any\", \"linux\", \"win32\")." + }, + "files": { + "items": { + "$ref": "#/$defs/PythonFileRecord" + }, + "type": "array", + "description": "Files are the installed files listed in the RECORD file for wheels or installed-files.txt for eggs." + }, + "sitePackagesRootPath": { + "type": "string", + "description": "SitePackagesRootPath is the root directory path containing the package (e.g., \"/usr/lib/python3.9/site-packages\")." + }, + "topLevelPackages": { + "items": { + "type": "string" + }, + "type": "array", + "description": "TopLevelPackages are the top-level Python module names from top_level.txt file." + }, + "directUrlOrigin": { + "$ref": "#/$defs/PythonDirectURLOriginInfo", + "description": "DirectURLOrigin contains VCS or direct URL installation information from direct_url.json." + }, + "requiresPython": { + "type": "string", + "description": "RequiresPython specifies the Python version requirement (e.g., \"\u003e=3.6\")." + }, + "requiresDist": { + "items": { + "type": "string" + }, + "type": "array", + "description": "RequiresDist lists the package dependencies with version specifiers from Requires-Dist fields." + }, + "providesExtra": { + "items": { + "type": "string" + }, + "type": "array", + "description": "ProvidesExtra lists optional feature names that can be installed via extras (e.g., \"dev\", \"test\")." + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "authorEmail", + "platform", + "sitePackagesRootPath" + ], + "description": "PythonPackage represents all captured data for a python egg or wheel package (specifically as outlined in the PyPA core metadata specification https://packaging.python.org/en/latest/specifications/core-metadata/)." + }, + "PythonPdmFileEntry": { + "properties": { + "url": { + "type": "string", + "description": "URL is the file download URL" + }, + "digest": { + "$ref": "#/$defs/PythonFileDigest", + "description": "Digest is the hash digest of the file hosted at the URL" + } + }, + "type": "object", + "required": [ + "url", + "digest" + ] + }, + "PythonPdmLockEntry": { + "properties": { + "summary": { + "type": "string", + "description": "Summary provides a description of the package" + }, + "files": { + "items": { + "$ref": "#/$defs/PythonPdmFileEntry" + }, + "type": "array", + "description": "Files are the package files with their paths and hash digests (for the base package without extras)" + }, + "marker": { + "type": "string", + "description": "Marker is the \"environment\" --conditional expressions that determine whether a package should be installed based on the runtime environment" + }, + "requiresPython": { + "type": "string", + "description": "RequiresPython specifies the Python version requirement (e.g., \"\u003e=3.6\")." + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the dependency specifications for the base package (without extras)" + }, + "extras": { + "items": { + "$ref": "#/$defs/PythonPdmLockExtraVariant" + }, + "type": "array", + "description": "Extras contains variants for different extras combinations (PDM may have multiple entries per package)" + } + }, + "type": "object", + "required": [ + "summary", + "files" + ], + "description": "PythonPdmLockEntry represents a single package entry within a pdm.lock file." + }, + "PythonPdmLockExtraVariant": { + "properties": { + "extras": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Extras are the optional extras enabled for this variant (e.g., [\"toml\"], [\"dev\"], or [\"toml\", \"dev\"])" + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the dependencies specific to this extras variant" + }, + "files": { + "items": { + "$ref": "#/$defs/PythonPdmFileEntry" + }, + "type": "array", + "description": "Files are the package files specific to this variant (only populated if different from base)" + }, + "marker": { + "type": "string", + "description": "Marker is the environment conditional expression for this variant (e.g., \"python_version \u003c \\\"3.11\\\"\")" + } + }, + "type": "object", + "required": [ + "extras" + ], + "description": "PythonPdmLockExtraVariant represents a specific extras combination variant within a PDM lock file." + }, + "PythonPipRequirementsEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name from the requirements file." + }, + "extras": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Extras are the optional features to install from the package (e.g., package[dev,test])." + }, + "versionConstraint": { + "type": "string", + "description": "VersionConstraint specifies version requirements (e.g., \"\u003e=1.0,\u003c2.0\")." + }, + "url": { + "type": "string", + "description": "URL is the direct download URL or VCS URL if specified instead of a PyPI package." + }, + "markers": { + "type": "string", + "description": "Markers are environment marker expressions for conditional installation (e.g., \"python_version \u003e= '3.8'\")." + } + }, + "type": "object", + "required": [ + "name", + "versionConstraint" + ], + "description": "PythonRequirementsEntry represents a single entry within a [*-]requirements.txt file." + }, + "PythonPipfileLockEntry": { + "properties": { + "hashes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Hashes are the package file hash values in the format \"algorithm:digest\" for integrity verification." + }, + "index": { + "type": "string", + "description": "Index is the PyPI index name where the package should be fetched from." + } + }, + "type": "object", + "required": [ + "hashes", + "index" + ], + "description": "PythonPipfileLockEntry represents a single package entry within a Pipfile.lock file." + }, + "PythonPoetryLockDependencyEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the dependency package name." + }, + "version": { + "type": "string", + "description": "Version is the locked version or version constraint for the dependency." + }, + "optional": { + "type": "boolean", + "description": "Optional indicates whether this dependency is optional (only needed for certain extras)." + }, + "markers": { + "type": "string", + "description": "Markers are environment marker expressions that conditionally enable the dependency (e.g., \"python_version \u003e= '3.8'\")." + }, + "extras": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Extras are the optional feature names from the dependency that should be installed." + } + }, + "type": "object", + "required": [ + "name", + "version", + "optional" + ], + "description": "PythonPoetryLockDependencyEntry represents a single dependency entry within a Poetry lock file." + }, + "PythonPoetryLockEntry": { + "properties": { + "index": { + "type": "string", + "description": "Index is the package repository name where the package should be fetched from." + }, + "dependencies": { + "items": { + "$ref": "#/$defs/PythonPoetryLockDependencyEntry" + }, + "type": "array", + "description": "Dependencies are the package's runtime dependencies with version constraints." + }, + "extras": { + "items": { + "$ref": "#/$defs/PythonPoetryLockExtraEntry" + }, + "type": "array", + "description": "Extras are optional feature groups that include additional dependencies." + } + }, + "type": "object", + "required": [ + "index", + "dependencies" + ], + "description": "PythonPoetryLockEntry represents a single package entry within a Pipfile.lock file." + }, + "PythonPoetryLockExtraEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the optional feature name (e.g., \"dev\", \"test\")." + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the package names required when this extra is installed." + } + }, + "type": "object", + "required": [ + "name", + "dependencies" + ], + "description": "PythonPoetryLockExtraEntry represents an optional feature group in a Poetry lock file." + }, + "PythonUvLockDependencyEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the dependency package name." + }, + "optional": { + "type": "boolean", + "description": "Optional indicates whether this dependency is optional (only needed for certain extras)." + }, + "markers": { + "type": "string", + "description": "Markers are environment marker expressions that conditionally enable the dependency (e.g., \"python_version \u003e= '3.8'\")." + }, + "extras": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Extras are the optional feature names from the dependency that should be installed." + } + }, + "type": "object", + "required": [ + "name", + "optional" + ], + "description": "PythonUvLockDependencyEntry represents a single dependency entry within a uv lock file." + }, + "PythonUvLockEntry": { + "properties": { + "index": { + "type": "string", + "description": "Index is the package repository name where the package should be fetched from." + }, + "dependencies": { + "items": { + "$ref": "#/$defs/PythonUvLockDependencyEntry" + }, + "type": "array", + "description": "Dependencies are the package's runtime dependencies with version constraints." + }, + "extras": { + "items": { + "$ref": "#/$defs/PythonUvLockExtraEntry" + }, + "type": "array", + "description": "Extras are optional feature groups that include additional dependencies." + } + }, + "type": "object", + "required": [ + "index", + "dependencies" + ], + "description": "PythonUvLockEntry represents a single package entry within a uv.lock file." + }, + "PythonUvLockExtraEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the optional feature name (e.g., \"dev\", \"test\")." + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the package names required when this extra is installed." + } + }, + "type": "object", + "required": [ + "name", + "dependencies" + ], + "description": "PythonUvLockExtraEntry represents an optional feature group in a uv lock file." + }, + "RDescription": { + "properties": { + "title": { + "type": "string", + "description": "Title is short one-line package title" + }, + "description": { + "type": "string", + "description": "Description is detailed package description" + }, + "author": { + "type": "string", + "description": "Author is package author(s)" + }, + "maintainer": { + "type": "string", + "description": "Maintainer is current package maintainer" + }, + "url": { + "items": { + "type": "string" + }, + "type": "array", + "description": "URL is the list of related URLs" + }, + "repository": { + "type": "string", + "description": "Repository is CRAN or other repository name" + }, + "built": { + "type": "string", + "description": "Built is R version and platform this was built with" + }, + "needsCompilation": { + "type": "boolean", + "description": "NeedsCompilation is whether this package requires compilation" + }, + "imports": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Imports are the packages imported in the NAMESPACE" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Depends are the packages this package depends on" + }, + "suggests": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Suggests are the optional packages that extend functionality" + } + }, + "type": "object", + "description": "RDescription represents metadata from an R package DESCRIPTION file containing package information, dependencies, and author details." + }, + "Relationship": { + "properties": { + "parent": { + "type": "string", + "description": "Parent is the ID of the parent artifact in this relationship." + }, + "child": { + "type": "string", + "description": "Child is the ID of the child artifact in this relationship." + }, + "type": { + "type": "string", + "description": "Type is the relationship type (e.g., \"contains\", \"dependency-of\", \"ancestor-of\")." + }, + "metadata": { + "description": "Metadata contains additional relationship-specific metadata." + } + }, + "type": "object", + "required": [ + "parent", + "child", + "type" + ], + "description": "Relationship represents a directed relationship between two artifacts in the SBOM, such as package-contains-file or package-depends-on-package." + }, + "RpmArchive": { + "properties": { + "name": { + "type": "string", + "description": "Name is the RPM package name as found in the RPM database." + }, + "version": { + "type": "string", + "description": "Version is the upstream version of the package." + }, + "epoch": { + "oneOf": [ + { + "type": "integer", + "description": "Epoch is the version epoch used to force upgrade ordering (null if not set)." + }, + { + "type": "null" + } + ] + }, + "architecture": { + "type": "string", + "description": "Arch is the target CPU architecture (e.g., \"x86_64\", \"aarch64\", \"noarch\")." + }, + "release": { + "type": "string", + "description": "Release is the package release number or distribution-specific version suffix." + }, + "sourceRpm": { + "type": "string", + "description": "SourceRpm is the source RPM filename that was used to build this package." + }, + "signatures": { + "items": { + "$ref": "#/$defs/RpmSignature" + }, + "type": "array", + "description": "Signatures contains GPG signature metadata for package verification." + }, + "size": { + "type": "integer", + "description": "Size is the total installed size of the package in bytes." + }, + "vendor": { + "type": "string", + "description": "Vendor is the organization that packaged the software." + }, + "modularityLabel": { + "type": "string", + "description": "ModularityLabel identifies the module stream for modular RPM packages (e.g., \"nodejs:12:20200101\")." + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides lists the virtual packages and capabilities this package provides." + }, + "requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Requires lists the dependencies required by this package." + }, + "files": { + "items": { + "$ref": "#/$defs/RpmFileRecord" + }, + "type": "array", + "description": "Files are the file records for all files owned by this package." + } + }, + "type": "object", + "required": [ + "name", + "version", + "epoch", + "architecture", + "release", + "sourceRpm", + "size", + "vendor", + "files" + ], + "description": "RpmArchive represents package metadata extracted directly from a .rpm archive file, containing the same information as an RPM database entry." + }, + "RpmDbEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is the RPM package name as found in the RPM database." + }, + "version": { + "type": "string", + "description": "Version is the upstream version of the package." + }, + "epoch": { + "oneOf": [ + { + "type": "integer", + "description": "Epoch is the version epoch used to force upgrade ordering (null if not set)." + }, + { + "type": "null" + } + ] + }, + "architecture": { + "type": "string", + "description": "Arch is the target CPU architecture (e.g., \"x86_64\", \"aarch64\", \"noarch\")." + }, + "release": { + "type": "string", + "description": "Release is the package release number or distribution-specific version suffix." + }, + "sourceRpm": { + "type": "string", + "description": "SourceRpm is the source RPM filename that was used to build this package." + }, + "signatures": { + "items": { + "$ref": "#/$defs/RpmSignature" + }, + "type": "array", + "description": "Signatures contains GPG signature metadata for package verification." + }, + "size": { + "type": "integer", + "description": "Size is the total installed size of the package in bytes." + }, + "vendor": { + "type": "string", + "description": "Vendor is the organization that packaged the software." + }, + "modularityLabel": { + "type": "string", + "description": "ModularityLabel identifies the module stream for modular RPM packages (e.g., \"nodejs:12:20200101\")." + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Provides lists the virtual packages and capabilities this package provides." + }, + "requires": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Requires lists the dependencies required by this package." + }, + "files": { + "items": { + "$ref": "#/$defs/RpmFileRecord" + }, + "type": "array", + "description": "Files are the file records for all files owned by this package." + } + }, + "type": "object", + "required": [ + "name", + "version", + "epoch", + "architecture", + "release", + "sourceRpm", + "size", + "vendor", + "files" + ], + "description": "RpmDBEntry represents all captured data from a RPM DB package entry." + }, + "RpmFileRecord": { + "properties": { + "path": { + "type": "string", + "description": "Path is the absolute file path where the file is installed." + }, + "mode": { + "type": "integer", + "description": "Mode is the file permission mode bits following Unix stat.h conventions." + }, + "size": { + "type": "integer", + "description": "Size is the file size in bytes." + }, + "digest": { + "$ref": "#/$defs/Digest", + "description": "Digest contains the hash algorithm and value for file integrity verification." + }, + "userName": { + "type": "string", + "description": "UserName is the owner username for the file." + }, + "groupName": { + "type": "string", + "description": "GroupName is the group name for the file." + }, + "flags": { + "type": "string", + "description": "Flags indicates the file type (e.g., \"%config\", \"%doc\", \"%ghost\")." + } + }, + "type": "object", + "required": [ + "path", + "mode", + "size", + "digest", + "userName", + "groupName", + "flags" + ], + "description": "RpmFileRecord represents the file metadata for a single file attributed to a RPM package." + }, + "RpmSignature": { + "properties": { + "algo": { + "type": "string", + "description": "PublicKeyAlgorithm is the public key algorithm used for signing (e.g., \"RSA\")." + }, + "hash": { + "type": "string", + "description": "HashAlgorithm is the hash algorithm used for the signature (e.g., \"SHA256\")." + }, + "created": { + "type": "string", + "description": "Created is the timestamp when the signature was created." + }, + "issuer": { + "type": "string", + "description": "IssuerKeyID is the GPG key ID that created the signature." + } + }, + "type": "object", + "required": [ + "algo", + "hash", + "created", + "issuer" + ], + "description": "RpmSignature represents a GPG signature for an RPM package used for authenticity verification." + }, + "RubyGemspec": { + "properties": { + "name": { + "type": "string", + "description": "Name is gem name as specified in the gemspec" + }, + "version": { + "type": "string", + "description": "Version is gem version as specified in the gemspec" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Files is logical list of files in the gem (NOT directly usable as filesystem paths. Example: bundler gem lists \"lib/bundler/vendor/uri/lib/uri/ldap.rb\" but actual path is \"/usr/local/lib/ruby/3.2.0/bundler/vendor/uri/lib/uri/ldap.rb\". Would need gem installation path, ruby version, and env vars like GEM_HOME to resolve actual paths.)" + }, + "authors": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Authors are the list of gem authors (stored as array regardless of using `author` or `authors` method in gemspec)" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + } + }, + "type": "object", + "required": [ + "name", + "version" + ], + "description": "RubyGemspec represents all metadata parsed from the *.gemspec file" + }, + "RustCargoAuditEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is crate name as specified in audit section of the build binary" + }, + "version": { + "type": "string", + "description": "Version is crate version as specified in audit section of the build binary" + }, + "source": { + "type": "string", + "description": "Source is the source registry or repository where this crate came from" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source" + ], + "description": "RustBinaryAuditEntry represents Rust crate metadata extracted from a compiled binary using cargo-auditable format." + }, + "RustCargoLockEntry": { + "properties": { + "name": { + "type": "string", + "description": "Name is crate name as specified in Cargo.toml" + }, + "version": { + "type": "string", + "description": "Version is crate version as specified in Cargo.toml" + }, + "source": { + "type": "string", + "description": "Source is the source registry or repository URL in format \"registry+https://github.com/rust-lang/crates.io-index\" for registry packages" + }, + "checksum": { + "type": "string", + "description": "Checksum is content checksum for registry packages only (hexadecimal string). Cargo doesn't require or include checksums for git dependencies. Used to detect MITM attacks by verifying downloaded crate matches lockfile checksum." + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the list of dependencies with version constraints" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "checksum", + "dependencies" + ], + "description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information." + }, + "SafetensorsMetadata": { + "properties": { + "format": { + "type": "string", + "description": "Format is the source format label (always \"safetensors\" for this metadata type).\nPresent because the Docker AI model config blob carries an explicit format field\nthat can also be \"gguf\", and recording it here makes the origin explicit." + }, + "architecture": { + "type": "string", + "description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"), sourced from the Hugging Face config.json\n\"architectures\" array." + }, + "quantization": { + "type": "string", + "description": "Quantization describes tensor precision (e.g., \"BF16\", \"F16\", \"F32\", \"INT8\")." + }, + "parameters": { + "type": "string", + "description": "Parameters is the parameter count as reported by upstream. Stored as a string\nbecause Docker AI and Hugging Face labels use notation like \"2.68B\" or \"35B-A3B\"." + }, + "tensorCount": { + "type": "integer", + "description": "TensorCount is the number of tensor entries in the file header." + }, + "totalSize": { + "type": "string", + "description": "TotalSize is the total byte size of tensor data across all shards when known\n(from the Docker AI model config \"size\" field or the sharded index \"total_size\")." + }, + "torchDtype": { + "type": "string", + "description": "TorchDtype is the Hugging Face torch_dtype (e.g., \"bfloat16\", \"float16\")." + }, + "transformersVersion": { + "type": "string", + "description": "TransformersVersion is the transformers library version recorded in config.json." + }, + "shardCount": { + "type": "integer", + "description": "ShardCount is the number of .safetensors shards for a sharded model (1 for a\nsingle-file model)." + }, + "userMetadata": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "UserMetadata is the optional \"__metadata__\" map from a .safetensors file header\n(string-to-string key/values set by the producer)." + }, + "metadataHash": { + "type": "string", + "description": "MetadataHash is an xxhash of the normalized header metadata, providing a stable\nidentifier for identical model content across repositories or filenames." + }, + "parts": { + "items": { + "$ref": "#/$defs/SafetensorsMetadata" + }, + "type": "array", + "description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing." + } + }, + "type": "object", + "description": "SafeTensorsMetadata represents metadata extracted from a SafeTensors model." + }, + "Schema": { + "properties": { + "version": { + "type": "string", + "description": "Version is the JSON schema version for this document format." + }, + "url": { + "type": "string", + "description": "URL is the URL to the JSON schema definition document." + } + }, + "type": "object", + "required": [ + "version", + "url" + ], + "description": "Schema specifies the JSON schema version and URL reference that defines the structure and validation rules for this document format." + }, + "SnapEntry": { + "properties": { + "snapType": { + "type": "string", + "description": "SnapType indicates the snap type (base, kernel, app, gadget, or snapd)." + }, + "base": { + "type": "string", + "description": "Base is the base snap name that this snap depends on (e.g., \"core20\", \"core22\")." + }, + "snapName": { + "type": "string", + "description": "SnapName is the snap package name." + }, + "snapVersion": { + "type": "string", + "description": "SnapVersion is the snap package version." + }, + "architecture": { + "type": "string", + "description": "Architecture is the target CPU architecture (e.g., \"amd64\", \"arm64\")." + } + }, + "type": "object", + "required": [ + "snapType", + "base", + "snapName", + "snapVersion", + "architecture" + ], + "description": "SnapEntry represents metadata for a Snap package extracted from snap.yaml or snapcraft.yaml files." + }, + "Source": { + "properties": { + "id": { + "type": "string", + "description": "ID is a unique identifier for the analyzed source artifact." + }, + "name": { + "type": "string", + "description": "Name is the name of the analyzed artifact (e.g., image name, directory path)." + }, + "version": { + "type": "string", + "description": "Version is the version of the analyzed artifact (e.g., image tag)." + }, + "supplier": { + "type": "string", + "description": "Supplier is supplier information, which can be user-provided for NTIA minimum elements compliance." + }, + "type": { + "type": "string", + "description": "Type is the source type (e.g., \"image\", \"directory\", \"file\")." + }, + "metadata": { + "description": "Metadata contains additional source-specific metadata." + } + }, + "type": "object", + "required": [ + "id", + "name", + "version", + "type", + "metadata" + ], + "description": "Source represents the artifact that was analyzed to generate this SBOM, such as a container image, directory, or file archive." + }, + "SwiftPackageManagerLockEntry": { + "properties": { + "revision": { + "type": "string", + "description": "Revision is git commit hash of the resolved package" + } + }, + "type": "object", + "required": [ + "revision" + ], + "description": "SwiftPackageManagerResolvedEntry represents a resolved dependency from a Package.resolved file with its locked version and source location." + }, + "SwiplpackPackage": { + "properties": { + "name": { + "type": "string", + "description": "Name is the package name as found in the .toml file" + }, + "version": { + "type": "string", + "description": "Version is the package version as found in the .toml file" + }, + "author": { + "type": "string", + "description": "Author is author name" + }, + "authorEmail": { + "type": "string", + "description": "AuthorEmail is author email address" + }, + "packager": { + "type": "string", + "description": "Packager is packager name (if different from author)" + }, + "packagerEmail": { + "type": "string", + "description": "PackagerEmail is packager email address" + }, + "homepage": { + "type": "string", + "description": "Homepage is project homepage URL" + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Dependencies are the list of required dependencies" + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "authorEmail", + "packager", + "packagerEmail", + "homepage", + "dependencies" + ], + "description": "SwiplPackEntry represents a SWI-Prolog package from the pack system with metadata about the package and its dependencies." + }, + "TerraformLockProviderEntry": { + "properties": { + "url": { + "type": "string", + "description": "URL is the provider source address (e.g., \"registry.terraform.io/hashicorp/aws\")." + }, + "constraints": { + "type": "string", + "description": "Constraints specifies the version constraints for the provider (e.g., \"~\u003e 4.0\")." + }, + "version": { + "type": "string", + "description": "Version is the locked provider version selected during terraform init." + }, + "hashes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Hashes are cryptographic checksums for the provider plugin archives across different platforms." + } + }, + "type": "object", + "required": [ + "url", + "constraints", + "version", + "hashes" + ], + "description": "TerraformLockProviderEntry represents a single provider entry in a Terraform dependency lock file (.terraform.lock.hcl)." + }, + "WordpressPluginEntry": { + "properties": { + "pluginInstallDirectory": { + "type": "string", + "description": "PluginInstallDirectory is directory name where the plugin is installed" + }, + "author": { + "type": "string", + "description": "Author is plugin author name" + }, + "authorUri": { + "type": "string", + "description": "AuthorURI is author's website URL" + } + }, + "type": "object", + "required": [ + "pluginInstallDirectory" + ], + "description": "WordpressPluginEntry represents all metadata parsed from the wordpress plugin file" + }, + "cpes": { + "items": { + "$ref": "#/$defs/CPE" + }, + "type": "array" + }, + "licenses": { + "items": { + "$ref": "#/$defs/License" + }, + "type": "array" + } + } +} diff --git a/schema/json/schema-latest.json b/schema/json/schema-latest.json index 125c92005..be64d29c7 100644 --- a/schema/json/schema-latest.json +++ b/schema/json/schema-latest.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "anchore.io/schema/syft/json/16.1.3/document", + "$id": "anchore.io/schema/syft/json/16.1.4/document", "$ref": "#/$defs/Document", "$defs": { "AlpmDbEntry": { @@ -2741,6 +2741,9 @@ { "$ref": "#/$defs/RustCargoLockEntry" }, + { + "$ref": "#/$defs/SafetensorsMetadata" + }, { "$ref": "#/$defs/SnapEntry" }, @@ -4029,6 +4032,66 @@ ], "description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information." }, + "SafetensorsMetadata": { + "properties": { + "format": { + "type": "string", + "description": "Format is the source format label (always \"safetensors\" for this metadata type).\nPresent because the Docker AI model config blob carries an explicit format field\nthat can also be \"gguf\", and recording it here makes the origin explicit." + }, + "architecture": { + "type": "string", + "description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"), sourced from the Hugging Face config.json\n\"architectures\" array." + }, + "quantization": { + "type": "string", + "description": "Quantization describes tensor precision (e.g., \"BF16\", \"F16\", \"F32\", \"INT8\")." + }, + "parameters": { + "type": "string", + "description": "Parameters is the parameter count as reported by upstream. Stored as a string\nbecause Docker AI and Hugging Face labels use notation like \"2.68B\" or \"35B-A3B\"." + }, + "tensorCount": { + "type": "integer", + "description": "TensorCount is the number of tensor entries in the file header." + }, + "totalSize": { + "type": "string", + "description": "TotalSize is the total byte size of tensor data across all shards when known\n(from the Docker AI model config \"size\" field or the sharded index \"total_size\")." + }, + "torchDtype": { + "type": "string", + "description": "TorchDtype is the Hugging Face torch_dtype (e.g., \"bfloat16\", \"float16\")." + }, + "transformersVersion": { + "type": "string", + "description": "TransformersVersion is the transformers library version recorded in config.json." + }, + "shardCount": { + "type": "integer", + "description": "ShardCount is the number of .safetensors shards for a sharded model (1 for a\nsingle-file model)." + }, + "userMetadata": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "UserMetadata is the optional \"__metadata__\" map from a .safetensors file header\n(string-to-string key/values set by the producer)." + }, + "metadataHash": { + "type": "string", + "description": "MetadataHash is an xxhash of the normalized header metadata, providing a stable\nidentifier for identical model content across repositories or filenames." + }, + "parts": { + "items": { + "$ref": "#/$defs/SafetensorsMetadata" + }, + "type": "array", + "description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing." + } + }, + "type": "object", + "description": "SafeTensorsMetadata represents metadata extracted from a SafeTensors model." + }, "Schema": { "properties": { "version": { diff --git a/syft/format/internal/spdxutil/helpers/source_info.go b/syft/format/internal/spdxutil/helpers/source_info.go index a26c1d74b..5d8c48e47 100644 --- a/syft/format/internal/spdxutil/helpers/source_info.go +++ b/syft/format/internal/spdxutil/helpers/source_info.go @@ -83,7 +83,7 @@ func SourceInfo(p pkg.Package) string { case pkg.TerraformPkg: answer = "acquired package info from Terraform dependency lock file" case pkg.ModelPkg: - answer = "acquired package info from AI artifact (e.g. GGUF File)" + answer = "acquired package info from AI model artifact" default: answer = "acquired package info from the following paths" } diff --git a/syft/pkg/cataloger/ai/cataloger.go b/syft/pkg/cataloger/ai/cataloger.go index d566edba2..e4f3ac19f 100644 --- a/syft/pkg/cataloger/ai/cataloger.go +++ b/syft/pkg/cataloger/ai/cataloger.go @@ -1,6 +1,6 @@ /* Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models, -including support for GGUF (GPT-Generated Unified Format) model files. +including support for GGUF (GPT-Generated Unified Format) and SafeTensors model files. */ package ai @@ -10,8 +10,9 @@ import ( ) const ( - catalogerName = "gguf-cataloger" - ggufLayerMediaType = "application/vnd.docker.ai*" + catalogerName = "gguf-cataloger" + ggufLayerMediaType = "application/vnd.docker.ai*" + safeTensorsCatalogerName = "safetensors-cataloger" ) // NewGGUFCataloger returns a new cataloger instance for GGUF model files. @@ -23,3 +24,17 @@ func NewGGUFCataloger() pkg.Cataloger { WithParserByMediaType(parseGGUFModel, ggufLayerMediaType). WithProcessors(ggufMergeProcessor) } + +// NewSafeTensorsCataloger returns a cataloger for SafeTensors model files, +// covering three discovery paths: +// - **/*.safetensors files (single-file models; header-only parse) +// - **/model.safetensors.index.json files (sharded models) +// - application/vnd.docker.ai.model.config.v0.1+json OCI layers (Docker Model +// Runner artifacts whose config advertises format=="safetensors") +func NewSafeTensorsCataloger() pkg.Cataloger { + return generic.NewCataloger(safeTensorsCatalogerName). + WithParserByGlobs(parseSafeTensorsFile, "**/*.safetensors"). + WithParserByGlobs(parseSafeTensorsIndex, "**/*.safetensors.index.json"). + WithParserByMediaType(parseSafeTensorsOCIConfig, dockerAIModelConfigMediaType). + WithProcessors(safeTensorsMergeProcessor) +} diff --git a/syft/pkg/cataloger/ai/package.go b/syft/pkg/cataloger/ai/package.go index 67c6570ae..32280765d 100644 --- a/syft/pkg/cataloger/ai/package.go +++ b/syft/pkg/cataloger/ai/package.go @@ -20,3 +20,18 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st return p } + +func newSafeTensorsPackage(metadata *pkg.SafeTensorsMetadata, modelName, version, license string, locations ...file.Location) pkg.Package { + p := pkg.Package{ + Name: modelName, + Version: version, + Locations: file.NewLocationSet(locations...), + Type: pkg.ModelPkg, + Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...), + Metadata: *metadata, + // PURL is intentionally not set: package-url has not yet finalized ML model support. + } + p.SetID() + + return p +} diff --git a/syft/pkg/cataloger/ai/parse_safetensors.go b/syft/pkg/cataloger/ai/parse_safetensors.go new file mode 100644 index 000000000..d919c14ff --- /dev/null +++ b/syft/pkg/cataloger/ai/parse_safetensors.go @@ -0,0 +1,174 @@ +package ai + +import ( + "encoding/binary" + "encoding/json" + "fmt" + "io" + "sort" + "strings" + + "github.com/cespare/xxhash/v2" +) + +// SafeTensors file format: [8 bytes u64 LE header size] [N bytes JSON header] [tensor data]. +// Reference: https://github.com/huggingface/safetensors#format +const ( + maxSafeTensorsHeaderSize = 100 * 1024 * 1024 // 100MB ceiling on header JSON to prevent OOM +) + +// safeTensorsHeader is the decoded JSON header. Tensor entries live alongside a +// reserved "__metadata__" key holding a string-to-string producer map. We decode +// tensor entries into a generic map so we can iterate and count without a fixed +// schema for every field. +type safeTensorsHeader struct { + metadata map[string]string + tensors map[string]safeTensorsEntry +} + +// safeTensorsEntry describes a single tensor within the header JSON. +type safeTensorsEntry struct { + DType string `json:"dtype"` + Shape []int64 `json:"shape"` + DataOffsets []int64 `json:"data_offsets"` +} + +// readSafeTensorsHeader reads and parses the JSON header from a .safetensors file. +// It returns the decoded header plus the on-disk size of the header JSON in bytes. +func readSafeTensorsHeader(r io.Reader) (*safeTensorsHeader, uint64, error) { + var lenBuf [8]byte + if _, err := io.ReadFull(r, lenBuf[:]); err != nil { + return nil, 0, fmt.Errorf("failed to read header length: %w", err) + } + headerLen := binary.LittleEndian.Uint64(lenBuf[:]) + if headerLen == 0 { + return nil, 0, fmt.Errorf("safetensors header length is zero") + } + if headerLen > maxSafeTensorsHeaderSize { + return nil, 0, fmt.Errorf("safetensors header size %d exceeds maximum %d", headerLen, maxSafeTensorsHeaderSize) + } + + body := make([]byte, headerLen) + if _, err := io.ReadFull(r, body); err != nil { + return nil, 0, fmt.Errorf("failed to read header body: %w", err) + } + + var raw map[string]json.RawMessage + if err := json.Unmarshal(body, &raw); err != nil { + return nil, 0, fmt.Errorf("failed to decode safetensors header JSON: %w", err) + } + + h := &safeTensorsHeader{tensors: make(map[string]safeTensorsEntry, len(raw))} + for key, val := range raw { + if key == "__metadata__" { + if err := json.Unmarshal(val, &h.metadata); err != nil { + return nil, 0, fmt.Errorf("failed to decode __metadata__: %w", err) + } + continue + } + var entry safeTensorsEntry + if err := json.Unmarshal(val, &entry); err != nil { + // Not all entries must conform; skip anything we cannot decode rather than fail. + continue + } + h.tensors[key] = entry + } + + return h, headerLen, nil +} + +// parameterCount sums the element counts across all tensors in the header. +func (h *safeTensorsHeader) parameterCount() uint64 { + var total uint64 + for _, t := range h.tensors { + count := uint64(1) + for _, dim := range t.Shape { + if dim <= 0 { + count = 0 + break + } + count *= uint64(dim) + } + total += count + } + return total +} + +// dominantDType returns the dtype that accounts for the largest fraction of parameters. +// For mixed-precision models the "dominant" dtype is still a useful summary. +func (h *safeTensorsHeader) dominantDType() string { + sizeByDType := make(map[string]uint64) + for _, t := range h.tensors { + count := uint64(1) + for _, dim := range t.Shape { + if dim <= 0 { + count = 0 + break + } + count *= uint64(dim) + } + sizeByDType[t.DType] += count + } + var best string + var bestSize uint64 + for dtype, size := range sizeByDType { + if size > bestSize || (size == bestSize && dtype < best) { + best = dtype + bestSize = size + } + } + return best +} + +// metadataHash returns a stable xxhash64 over the tensor entries + __metadata__. +// Tensor keys are sorted to keep the hash deterministic across producers. +func (h *safeTensorsHeader) metadataHash() string { + type entry struct { + Name string `json:"name"` + Entry safeTensorsEntry `json:"entry"` + } + entries := make([]entry, 0, len(h.tensors)) + for name, t := range h.tensors { + entries = append(entries, entry{Name: name, Entry: t}) + } + sort.Slice(entries, func(i, j int) bool { return entries[i].Name < entries[j].Name }) + + type hashInput struct { + Tensors []entry `json:"tensors"` + Metadata map[string]string `json:"metadata,omitempty"` + } + b, err := json.Marshal(hashInput{Tensors: entries, Metadata: h.metadata}) + if err != nil { + return "" + } + return fmt.Sprintf("%016x", xxhash.Sum64(b)) +} + +// normalizeDType maps a safetensors/torch dtype label to an uppercase quantization +// shorthand matching conventions used elsewhere in syft (e.g., BF16, F16, I8). +func normalizeDType(dtype string) string { + switch strings.ToUpper(dtype) { + case "BF16": + return "BF16" + case "F16", "FP16", "FLOAT16", "HALF": + return "F16" + case "F32", "FP32", "FLOAT32", "FLOAT": + return "F32" + case "F64", "FP64", "FLOAT64", "DOUBLE": + return "F64" + case "I8", "INT8": + return "I8" + case "U8", "UINT8": + return "U8" + case "I16", "INT16": + return "I16" + case "I32", "INT32": + return "I32" + case "I64", "INT64": + return "I64" + case "BOOL": + return "BOOL" + default: + return strings.ToUpper(dtype) + } +} diff --git a/syft/pkg/cataloger/ai/parse_safetensors_model.go b/syft/pkg/cataloger/ai/parse_safetensors_model.go new file mode 100644 index 000000000..6c3987492 --- /dev/null +++ b/syft/pkg/cataloger/ai/parse_safetensors_model.go @@ -0,0 +1,305 @@ +package ai + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "path" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/unknown" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +// parseSafeTensorsFile parses a single .safetensors file by reading only its +// JSON header, then enriches the resulting package with metadata from sibling +// config.json and README.md files when the resolver can find them. +func parseSafeTensorsFile(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + defer internal.CloseAndLogError(reader, reader.Path()) + + header, _, err := readSafeTensorsHeader(&io.LimitedReader{R: reader, N: maxSafeTensorsHeaderSize + 8}) + if err != nil { + return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err) + } + + md := pkg.SafeTensorsMetadata{ + Format: "safetensors", + TensorCount: uint64(len(header.tensors)), + Quantization: normalizeDType(header.dominantDType()), + ShardCount: 1, + UserMetadata: header.metadata, + MetadataHash: header.metadataHash(), + } + if p := header.parameterCount(); p > 0 { + md.Parameters = formatParameterCount(p) + } + + name, version, license := enrichFromSiblings(resolver, reader.Path(), &md) + if name == "" { + name = modelNameFromPath(reader.Path()) + } + + p := newSafeTensorsPackage( + &md, + name, + version, + license, + reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ) + + return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse safetensors file") +} + +// parseSafeTensorsIndex parses a model.safetensors.index.json file for a sharded +// model. The index lists every tensor and the shard file it lives in; from this +// we derive tensor count, unique shard count, and (when present) the producer- +// declared total_size. +func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + defer internal.CloseAndLogError(reader, reader.Path()) + + var doc struct { + Metadata struct { + TotalSize json.Number `json:"total_size"` + } `json:"metadata"` + WeightMap map[string]string `json:"weight_map"` + } + if err := json.NewDecoder(reader).Decode(&doc); err != nil { + return nil, nil, fmt.Errorf("failed to decode safetensors index JSON: %w", err) + } + + shards := make(map[string]struct{}, 4) + for _, shard := range doc.WeightMap { + shards[shard] = struct{}{} + } + + md := pkg.SafeTensorsMetadata{ + Format: "safetensors", + TensorCount: uint64(len(doc.WeightMap)), + ShardCount: len(shards), + } + if doc.Metadata.TotalSize != "" { + md.TotalSize = formatByteSize(doc.Metadata.TotalSize.String()) + } + + name, version, license := enrichFromSiblings(resolver, reader.Path(), &md) + if name == "" { + name = modelNameFromIndexPath(reader.Path()) + } + + p := newSafeTensorsPackage( + &md, + name, + version, + license, + reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ) + + return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse safetensors index") +} + +// enrichFromSiblings looks for a sibling config.json and README.md next to the +// safetensors artifact and folds their values into the metadata struct. It +// returns a name, version, and license string derived from those sources, with +// the caller free to fall back to a filename-derived default. +func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsMetadata) (name, version, license string) { + if resolver == nil { + return "", "", "" + } + dir := path.Dir(sourcePath) + + if cfg := readSiblingJSON(resolver, path.Join(dir, "config.json")); cfg != nil { + if md.Architecture == "" && len(cfg.Architectures) > 0 { + md.Architecture = cfg.Architectures[0] + } + if md.TorchDtype == "" { + md.TorchDtype = cfg.TorchDtype + } + if md.TransformersVersion == "" { + md.TransformersVersion = cfg.TransformersVersion + } + if cfg.NameOrPath != "" { + name = path.Base(cfg.NameOrPath) + } + } + + if fm := readReadmeFrontmatter(resolver, path.Join(dir, "README.md")); fm != nil { + if license == "" { + license = fm.License + } + if name == "" && len(fm.BaseModel) > 0 { + name = path.Base(fm.BaseModel[0]) + } + } + + return name, version, license +} + +// hfConfig is a minimal projection of Hugging Face config.json fields we care about. +type hfConfig struct { + Architectures []string `json:"architectures"` + TorchDtype string `json:"torch_dtype"` + TransformersVersion string `json:"transformers_version"` + NameOrPath string `json:"_name_or_path"` +} + +func readSiblingJSON(resolver file.Resolver, p string) *hfConfig { + locations, err := resolver.FilesByPath(p) + if err != nil || len(locations) == 0 { + return nil + } + rc, err := resolver.FileContentsByLocation(locations[0]) + if err != nil { + return nil + } + defer internal.CloseAndLogError(rc, p) + + var cfg hfConfig + if err := json.NewDecoder(rc).Decode(&cfg); err != nil { + log.Debugf("failed to decode %s: %v", p, err) + return nil + } + return &cfg +} + +// readmeFrontmatter holds the subset of YAML frontmatter fields we extract. +type readmeFrontmatter struct { + License string `yaml:"license"` + BaseModel []string `yaml:"base_model"` +} + +// readReadmeFrontmatter extracts the leading YAML frontmatter block from a README. +// The block is delimited by "---" lines at the start of the file. +func readReadmeFrontmatter(resolver file.Resolver, p string) *readmeFrontmatter { + locations, err := resolver.FilesByPath(p) + if err != nil || len(locations) == 0 { + return nil + } + rc, err := resolver.FileContentsByLocation(locations[0]) + if err != nil { + return nil + } + defer internal.CloseAndLogError(rc, p) + + buf, err := io.ReadAll(io.LimitReader(rc, 1024*1024)) + if err != nil { + return nil + } + return parseFrontmatter(buf) +} + +// parseFrontmatter pulls the YAML block between the first and second "---" lines +// of a file (if present) and decodes known fields from it. +func parseFrontmatter(buf []byte) *readmeFrontmatter { + trimmed := bytes.TrimLeft(buf, "\xef\xbb\xbf \t\r\n") + if !bytes.HasPrefix(trimmed, []byte("---")) { + return nil + } + rest := trimmed[3:] + // trim the newline directly following the opening delimiter + if i := bytes.IndexByte(rest, '\n'); i >= 0 { + rest = rest[i+1:] + } + end := bytes.Index(rest, []byte("\n---")) + if end < 0 { + return nil + } + var fm readmeFrontmatter + if err := yaml.Unmarshal(rest[:end], &fm); err != nil { + log.Debugf("failed to parse README frontmatter: %v", err) + return nil + } + // base_model may also appear as a scalar; yaml.Unmarshal will fail silently in that case. + if fm.License == "" && len(fm.BaseModel) == 0 { + var alt struct { + License string `yaml:"license"` + BaseModel string `yaml:"base_model"` + } + if err := yaml.Unmarshal(rest[:end], &alt); err == nil { + fm.License = alt.License + if alt.BaseModel != "" { + fm.BaseModel = []string{alt.BaseModel} + } + } + } + return &fm +} + +// modelNameFromPath turns "/models/foo/model.safetensors" into "foo". +// For a bare filename "weights.safetensors" we return "weights". +func modelNameFromPath(p string) string { + base := strings.TrimSuffix(filepath.Base(p), ".safetensors") + dir := filepath.Base(filepath.Dir(p)) + if dir != "" && dir != "." && dir != string(filepath.Separator) { + return dir + } + return base +} + +// modelNameFromIndexPath derives a model name from the index filename's parent +// directory, defaulting to "safetensors-model" if no useful directory name exists. +func modelNameFromIndexPath(p string) string { + dir := filepath.Base(filepath.Dir(p)) + if dir != "" && dir != "." && dir != string(filepath.Separator) { + return dir + } + return "safetensors-model" +} + +// formatParameterCount prints a count like 6_700_000_000 as "6.7B" using B/M/K +// thresholds matching the notation used by Hugging Face and Docker AI labels. +func formatParameterCount(n uint64) string { + switch { + case n >= 1_000_000_000: + return fmt.Sprintf("%.2fB", float64(n)/1_000_000_000) + case n >= 1_000_000: + return fmt.Sprintf("%.2fM", float64(n)/1_000_000) + case n >= 1_000: + return fmt.Sprintf("%.2fK", float64(n)/1_000) + default: + return fmt.Sprintf("%d", n) + } +} + +// formatByteSize turns a numeric string (bytes) into a human-friendly size like +// "71.90GB". Non-numeric inputs are passed through unchanged so we never lose +// producer-declared strings such as "71.90GB". +func formatByteSize(s string) string { + var n uint64 + if _, err := fmt.Sscanf(s, "%d", &n); err != nil || n == 0 { + return s + } + const ( + kb = 1024 + mb = kb * 1024 + gb = mb * 1024 + tb = gb * 1024 + ) + switch { + case n >= tb: + return fmt.Sprintf("%.2fTB", float64(n)/float64(tb)) + case n >= gb: + return fmt.Sprintf("%.2fGB", float64(n)/float64(gb)) + case n >= mb: + return fmt.Sprintf("%.2fMB", float64(n)/float64(mb)) + case n >= kb: + return fmt.Sprintf("%.2fKB", float64(n)/float64(kb)) + default: + return fmt.Sprintf("%dB", n) + } +} + +// integrity checks +var ( + _ generic.Parser = parseSafeTensorsFile + _ generic.Parser = parseSafeTensorsIndex +) diff --git a/syft/pkg/cataloger/ai/parse_safetensors_oci.go b/syft/pkg/cataloger/ai/parse_safetensors_oci.go new file mode 100644 index 000000000..ced49f3a9 --- /dev/null +++ b/syft/pkg/cataloger/ai/parse_safetensors_oci.go @@ -0,0 +1,215 @@ +package ai + +import ( + "context" + "encoding/json" + "fmt" + "io" + "strings" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/unknown" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +// Docker AI OCI media types used by Docker Model Runner artifacts. +const ( + dockerAIModelConfigMediaType = "application/vnd.docker.ai.model.config.v0.1+json" + dockerAIModelFileMediaType = "application/vnd.docker.ai.model.file" + dockerAILicenseMediaType = "application/vnd.docker.ai.license" +) + +// dockerAIModelConfig mirrors the JSON shape of the vnd.docker.ai.model.config +// blob written by Docker Model Runner for AI artifacts. Only fields we use are +// declared; unknown fields are ignored. +type dockerAIModelConfig struct { + Config struct { + Format string `json:"format"` + Quantization string `json:"quantization"` + Parameters string `json:"parameters"` + Size string `json:"size"` + SafeTensors struct { + TensorCount json.Number `json:"tensor_count"` + } `json:"safetensors"` + } `json:"config"` +} + +// parseSafeTensorsOCIConfig parses a Docker AI model-config blob. When the blob +// advertises format=="safetensors" it emits a single named package whose +// metadata is enriched by scanning sibling OCI layers (README.md for license + +// base_model name, config.json for architecture, LICENSE text for a license +// fallback). For any other format it emits nothing so the GGUF cataloger can +// claim the image. +func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + defer internal.CloseAndLogError(reader, reader.Path()) + + body, err := io.ReadAll(io.LimitReader(reader, 1024*1024)) + if err != nil { + return nil, nil, fmt.Errorf("failed to read docker AI model config: %w", err) + } + + var cfg dockerAIModelConfig + if err := json.Unmarshal(body, &cfg); err != nil { + return nil, nil, fmt.Errorf("failed to decode docker AI model config: %w", err) + } + + if !strings.EqualFold(cfg.Config.Format, "safetensors") { + return nil, nil, nil + } + + md := pkg.SafeTensorsMetadata{ + Format: "safetensors", + Quantization: cfg.Config.Quantization, + Parameters: cfg.Config.Parameters, + TotalSize: cfg.Config.Size, + } + if n, err := cfg.Config.SafeTensors.TensorCount.Int64(); err == nil && n > 0 { + md.TensorCount = uint64(n) + } + + name, license := enrichFromDockerAILayers(resolver, &md) + + p := newSafeTensorsPackage( + &md, + name, + "", + license, + reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ) + + return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse docker AI safetensors config") +} + +// enrichFromDockerAILayers walks sibling Docker AI layers via the OCI resolver +// and mines them for a model name, architecture, and license. README.md carries +// YAML frontmatter with license + base_model; HF config.json carries +// architectures/torch_dtype/transformers_version; the vnd.docker.ai.license +// blob is plain license text. +func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadata) (name, license string) { + ociResolver, ok := resolver.(file.OCIMediaTypeResolver) + if !ok { + return "", "" + } + + modelFileLocations, err := ociResolver.FilesByMediaType(dockerAIModelFileMediaType) + if err != nil { + log.Debugf("failed to list docker AI model-file layers: %v", err) + } + for _, loc := range modelFileLocations { + rc, err := resolver.FileContentsByLocation(loc) + if err != nil { + continue + } + buf, readErr := io.ReadAll(io.LimitReader(rc, 4*1024*1024)) + internal.CloseAndLogError(rc, loc.RealPath) + if readErr != nil { + continue + } + classifyAndMerge(buf, md, &name, &license) + } + + if license == "" { + license = readDockerAILicense(resolver, ociResolver) + } + + return name, license +} + +// classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md, +// config.json, generation_config.json, tokenizer.json, etc.) and folds useful +// fields into the metadata struct and out-parameters. +func classifyAndMerge(buf []byte, md *pkg.SafeTensorsMetadata, name, license *string) { + trimmed := trimLeadingWhitespace(buf) + switch { + case hasPrefix(trimmed, "---"): + if fm := parseFrontmatter(buf); fm != nil { + if *license == "" { + *license = fm.License + } + if *name == "" && len(fm.BaseModel) > 0 { + *name = lastPathSegment(fm.BaseModel[0]) + } + } + case hasPrefix(trimmed, "{"): + var cfg hfConfig + if err := json.Unmarshal(buf, &cfg); err != nil { + return + } + if md.Architecture == "" && len(cfg.Architectures) > 0 { + md.Architecture = cfg.Architectures[0] + } + if md.TorchDtype == "" { + md.TorchDtype = cfg.TorchDtype + } + if md.TransformersVersion == "" { + md.TransformersVersion = cfg.TransformersVersion + } + if *name == "" && cfg.NameOrPath != "" { + *name = lastPathSegment(cfg.NameOrPath) + } + } +} + +// readDockerAILicense extracts a short license identifier from the first line +// of a vnd.docker.ai.license layer. Docker packages the full license text, so +// we only peek at a prefix looking for well-known titles like "Apache License". +func readDockerAILicense(resolver file.Resolver, ociResolver file.OCIMediaTypeResolver) string { + locations, err := ociResolver.FilesByMediaType(dockerAILicenseMediaType) + if err != nil || len(locations) == 0 { + return "" + } + rc, err := resolver.FileContentsByLocation(locations[0]) + if err != nil { + return "" + } + defer internal.CloseAndLogError(rc, locations[0].RealPath) + + buf, err := io.ReadAll(io.LimitReader(rc, 2048)) + if err != nil { + return "" + } + text := strings.ToLower(string(buf)) + switch { + case strings.Contains(text, "apache license") && strings.Contains(text, "version 2.0"): + return "Apache-2.0" + case strings.Contains(text, "mit license"): + return "MIT" + case strings.Contains(text, "bsd 3-clause"): + return "BSD-3-Clause" + case strings.Contains(text, "bsd 2-clause"): + return "BSD-2-Clause" + case strings.Contains(text, "gnu general public license") && strings.Contains(text, "version 3"): + return "GPL-3.0" + } + return "" +} + +func hasPrefix(b []byte, s string) bool { + return len(b) >= len(s) && string(b[:len(s)]) == s +} + +func trimLeadingWhitespace(b []byte) []byte { + i := 0 + for i < len(b) && (b[i] == ' ' || b[i] == '\t' || b[i] == '\r' || b[i] == '\n') { + i++ + } + // strip a leading UTF-8 BOM if present + if len(b)-i >= 3 && b[i] == 0xEF && b[i+1] == 0xBB && b[i+2] == 0xBF { + i += 3 + } + return b[i:] +} + +func lastPathSegment(s string) string { + if i := strings.LastIndexAny(s, "/\\"); i >= 0 { + return s[i+1:] + } + return s +} + +// integrity check +var _ generic.Parser = parseSafeTensorsOCIConfig diff --git a/syft/pkg/cataloger/ai/processor.go b/syft/pkg/cataloger/ai/processor.go index c3ca8c3e7..aecf82625 100644 --- a/syft/pkg/cataloger/ai/processor.go +++ b/syft/pkg/cataloger/ai/processor.go @@ -57,3 +57,44 @@ func ggufMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, err er return namedPkgs, rels, err } + +// safeTensorsMergeProcessor mirrors ggufMergeProcessor for SafeTensors packages. +// When scanning an OCI AI artifact, the model-config blob produces one named +// package and individual .safetensors shard layers (if we ever decide to parse +// them directly) would produce nameless packages. Any nameless SafeTensors +// packages are collapsed into the named one's Parts slice. +func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { + if err != nil { + return pkgs, rels, err + } + if len(pkgs) == 0 { + return pkgs, rels, err + } + + var namedPkgs []pkg.Package + var namelessParts []pkg.SafeTensorsMetadata + for _, p := range pkgs { + if p.Name != "" { + namedPkgs = append(namedPkgs, p) + continue + } + if md, ok := p.Metadata.(pkg.SafeTensorsMetadata); ok { + md.MetadataHash = "" + namelessParts = append(namelessParts, md) + } + } + + if len(namedPkgs) == 0 { + return nil, rels, err + } + + if len(namedPkgs) == 1 && len(namelessParts) > 0 { + winner := &namedPkgs[0] + if md, ok := winner.Metadata.(pkg.SafeTensorsMetadata); ok { + md.Parts = namelessParts + winner.Metadata = md + } + } + + return namedPkgs, rels, err +} diff --git a/syft/pkg/safetensors.go b/syft/pkg/safetensors.go new file mode 100644 index 000000000..c89da1a5d --- /dev/null +++ b/syft/pkg/safetensors.go @@ -0,0 +1,59 @@ +package pkg + +// SafeTensorsMetadata represents metadata extracted from a SafeTensors model. +// SafeTensors is a simple, safe serialization format for storing tensors, used +// as the default weight format for Hugging Face transformer models. Syft may +// populate this struct from three sources: +// - a single .safetensors file (header-only parse) +// - a sharded model described by model.safetensors.index.json +// - a Docker AI OCI model artifact config blob (vnd.docker.ai.model.config.v0.1+json) +// +// The Model Name, License, and Version fields have all been lifted up to be on +// the syft Package. +type SafeTensorsMetadata struct { + // Format is the source format label (always "safetensors" for this metadata type). + // Present because the Docker AI model config blob carries an explicit format field + // that can also be "gguf", and recording it here makes the origin explicit. + Format string `json:"format,omitempty" cyclonedx:"format"` + + // Architecture is the model architecture (e.g., "LlamaForCausalLM", + // "Qwen3MoeForConditionalGeneration"), sourced from the Hugging Face config.json + // "architectures" array. + Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"` + + // Quantization describes tensor precision (e.g., "BF16", "F16", "F32", "INT8"). + Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"` + + // Parameters is the parameter count as reported by upstream. Stored as a string + // because Docker AI and Hugging Face labels use notation like "2.68B" or "35B-A3B". + Parameters string `json:"parameters,omitempty" cyclonedx:"parameters"` + + // TensorCount is the number of tensor entries in the file header. + TensorCount uint64 `json:"tensorCount,omitempty" cyclonedx:"tensorCount"` + + // TotalSize is the total byte size of tensor data across all shards when known + // (from the Docker AI model config "size" field or the sharded index "total_size"). + TotalSize string `json:"totalSize,omitempty" cyclonedx:"totalSize"` + + // TorchDtype is the Hugging Face torch_dtype (e.g., "bfloat16", "float16"). + TorchDtype string `json:"torchDtype,omitempty" cyclonedx:"torchDtype"` + + // TransformersVersion is the transformers library version recorded in config.json. + TransformersVersion string `json:"transformersVersion,omitempty" cyclonedx:"transformersVersion"` + + // ShardCount is the number of .safetensors shards for a sharded model (1 for a + // single-file model). + ShardCount int `json:"shardCount,omitempty" cyclonedx:"shardCount"` + + // UserMetadata is the optional "__metadata__" map from a .safetensors file header + // (string-to-string key/values set by the producer). + UserMetadata map[string]string `json:"userMetadata,omitempty" cyclonedx:"userMetadata"` + + // MetadataHash is an xxhash of the normalized header metadata, providing a stable + // identifier for identical model content across repositories or filenames. + MetadataHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"` + + // Parts contains metadata from additional SafeTensors shards or OCI layers that + // were merged into this package during post-processing. + Parts []SafeTensorsMetadata `json:"parts,omitempty" cyclonedx:"parts"` +} diff --git a/syft/source/ocimodelsource/oci_model_source.go b/syft/source/ocimodelsource/oci_model_source.go index 8fe4aae49..6518715ea 100644 --- a/syft/source/ocimodelsource/oci_model_source.go +++ b/syft/source/ocimodelsource/oci_model_source.go @@ -52,7 +52,7 @@ func NewFromRegistry(ctx context.Context, cfg Config) (source.Source, error) { } metadata := buildMetadata(art) - tempDir, resolver, err := fetchAndStoreGGUFHeaders(ctx, client, art) + tempDir, resolver, err := fetchAndStoreModelHeaders(ctx, client, art) if err != nil { return nil, err } @@ -77,38 +77,111 @@ func validateAndFetchArtifact(ctx context.Context, client *registryClient, refer return nil, err } - if len(art.GGUFLayers) == 0 { - return nil, fmt.Errorf("model artifact has no GGUF layers") + if art.Format == "" { + return nil, fmt.Errorf("model artifact has no GGUF or SafeTensors weight layers") } return art, nil } -// fetchAndStoreGGUFHeaders fetches GGUF layer headers and stores them in temp files. -func fetchAndStoreGGUFHeaders(ctx context.Context, client *registryClient, artifact *modelArtifact) (string, *fileresolver.ContainerImageModel, error) { - tempDir, err := os.MkdirTemp("", "syft-oci-gguf") +// fetchAndStoreModelHeaders fetches the blobs needed to catalog a Docker AI +// model artifact and stores them on disk so the ContainerImageModel resolver +// can serve them by media type: +// +// - For GGUF: the first maxHeaderBytes of each weight layer (existing behavior). +// - For SafeTensors: the model-config blob (already in memory as RawConfig) +// plus each companion layer in full. We deliberately skip the multi-GB +// safetensors weight layers — the config blob carries aggregate metadata +// (format, quantization, parameter count, tensor count, total size) that +// the cataloger needs, and individual shard headers are not yet used. +func fetchAndStoreModelHeaders(ctx context.Context, client *registryClient, artifact *modelArtifact) (string, *fileresolver.ContainerImageModel, error) { + tempDir, err := os.MkdirTemp("", "syft-oci-model") if err != nil { return "", nil, fmt.Errorf("failed to create temp directory: %w", err) } + cleanup := func() { + if osErr := os.RemoveAll(tempDir); osErr != nil { + log.Errorf("unable to remove temp directory (%s): %v", tempDir, osErr) + } + } + layerFiles := make(map[string]fileresolver.LayerInfo) + + // GGUF weight-layer headers (unchanged). for _, layer := range artifact.GGUFLayers { li, err := fetchSingleGGUFHeader(ctx, client, artifact.Reference, layer, tempDir) if err != nil { - osErr := os.RemoveAll(tempDir) - if osErr != nil { - log.Errorf("unable to remove temp directory (%s): %v", tempDir, err) - } + cleanup() return "", nil, err } layerFiles[layer.Digest.String()] = li } + // For SafeTensors artifacts, expose the model-config blob to the resolver + // so parseSafeTensorsOCIConfig can match it by media type. RawConfig was + // already fetched as part of the manifest walk. + if artifact.Format == modelFormatSafeTensors && len(artifact.RawConfig) > 0 { + li, err := storeConfigBlobAsLayer(artifact, tempDir) + if err != nil { + cleanup() + return "", nil, err + } + layerFiles[artifact.Manifest.Config.Digest.String()] = li + } + + // Companion layers (README, config.json, tokenizer.json, LICENSE). Small by + // convention; fetched in full up to maxCompanionBytes. + if artifact.Format == modelFormatSafeTensors { + for _, layer := range artifact.CompanionLayers { + li, err := fetchCompanionLayer(ctx, client, artifact.Reference, layer, tempDir) + if err != nil { + cleanup() + return "", nil, err + } + layerFiles[layer.Digest.String()] = li + } + } + resolver := fileresolver.NewContainerImageModel(tempDir, layerFiles) return tempDir, resolver, nil } +// storeConfigBlobAsLayer writes the already-fetched raw config bytes to a temp +// file so the resolver can serve them via media type. +func storeConfigBlobAsLayer(artifact *modelArtifact, tempDir string) (fileresolver.LayerInfo, error) { + digest := artifact.Manifest.Config.Digest.String() + safeDigest := strings.ReplaceAll(digest, ":", "-") + tempPath := filepath.Join(tempDir, safeDigest+".config.json") + if err := os.WriteFile(tempPath, artifact.RawConfig, 0600); err != nil { + return fileresolver.LayerInfo{}, fmt.Errorf("failed to write config blob: %w", err) + } + return fileresolver.LayerInfo{ + TempPath: tempPath, + MediaType: string(artifact.Manifest.Config.MediaType), + }, nil +} + +// fetchCompanionLayer downloads a companion (non-weight) layer to a temp file. +// Unlike weight layers we fetch up to maxCompanionBytes, which comfortably +// covers READMEs, HF config.json, tokenizer.json, and LICENSE text. +func fetchCompanionLayer(ctx context.Context, client *registryClient, ref name.Reference, layer v1.Descriptor, tempDir string) (fileresolver.LayerInfo, error) { + data, err := client.fetchBlobRange(ctx, ref, layer.Digest, maxCompanionBytes) + if err != nil { + return fileresolver.LayerInfo{}, fmt.Errorf("failed to fetch companion layer: %w", err) + } + safeDigest := strings.ReplaceAll(layer.Digest.String(), ":", "-") + tempPath := filepath.Join(tempDir, safeDigest+".blob") + if err := os.WriteFile(tempPath, data, 0600); err != nil { + return fileresolver.LayerInfo{}, fmt.Errorf("failed to write companion temp file: %w", err) + } + return fileresolver.LayerInfo{ + TempPath: tempPath, + MediaType: string(layer.MediaType), + }, nil +} + // fetchSingleGGUFHeader fetches a single GGUF layer header and writes it to a temp file. func fetchSingleGGUFHeader(ctx context.Context, client *registryClient, ref name.Reference, layer v1.Descriptor, tempDir string) (fileresolver.LayerInfo, error) { headerData, err := client.fetchBlobRange(ctx, ref, layer.Digest, maxHeaderBytes) diff --git a/syft/source/ocimodelsource/registry_client.go b/syft/source/ocimodelsource/registry_client.go index 7574fda6c..5445a87dc 100644 --- a/syft/source/ocimodelsource/registry_client.go +++ b/syft/source/ocimodelsource/registry_client.go @@ -26,9 +26,22 @@ const ( // Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/ modelConfigMediaTypePrefix = "application/vnd.docker.ai.model.config." ggufLayerMediaType = "application/vnd.docker.ai.gguf.v3" + safetensorsLayerMediaType = "application/vnd.docker.ai.safetensors" - // Maximum bytes to read/return for GGUF headers + // Companion metadata layers packaged alongside the weight tensors. + // model.file covers README.md / config.json / tokenizer.json / generation_config.json. + modelFileMediaType = "application/vnd.docker.ai.model.file" + licenseMediaType = "application/vnd.docker.ai.license" + + // Weight format labels surfaced on modelArtifact.Format. + modelFormatGGUF = "gguf" + modelFormatSafeTensors = "safetensors" + + // Maximum bytes to read/return for weight-layer headers (GGUF + safetensors). maxHeaderBytes = 8 * 1024 * 1024 // 8 MB + // Maximum bytes to fetch for a companion metadata layer (README, config.json, license). + // These blobs are small by convention; cap well below a safetensors header. + maxCompanionBytes = 4 * 1024 * 1024 // 4 MB ) // registryClient handles OCI registry interactions for model artifacts. @@ -110,7 +123,25 @@ type modelArtifact struct { RawManifest []byte RawConfig []byte ManifestDigest string - GGUFLayers []v1.Descriptor + + // Format identifies the weight storage format advertised by the manifest's + // layer media types. Empty means no recognized weight layers were found. + Format string + + // GGUFLayers are descriptors for layers carrying GGUF-format weights. + // We fetch the first few MB of each to read the header. + GGUFLayers []v1.Descriptor + + // SafeTensorsLayers are descriptors for layers carrying SafeTensors-format weights. + // For safetensors we do NOT fetch these layers — the model-config blob already + // contains the aggregate metadata we need — but we record them here for counting + // and for future per-shard parsing. + SafeTensorsLayers []v1.Descriptor + + // CompanionLayers are non-weight layers (README, config.json, license) that + // we do fetch (in full, given their small size) so companion-file parsing + // in the safetensors cataloger can find them via media type. + CompanionLayers []v1.Descriptor } func (c *registryClient) fetchModelArtifact(ctx context.Context, refStr string) (*modelArtifact, error) { @@ -151,18 +182,39 @@ func (c *registryClient) fetchModelArtifact(ctx context.Context, refStr string) } ggufLayers := extractGGUFLayers(manifest) + safetensorsLayers := extractSafeTensorsLayers(manifest) + companionLayers := extractCompanionLayers(manifest) return &modelArtifact{ - Reference: ref, - Manifest: manifest, - Config: configFile, - RawManifest: desc.Manifest, - RawConfig: rawConfig, - ManifestDigest: desc.Digest.String(), - GGUFLayers: ggufLayers, + Reference: ref, + Manifest: manifest, + Config: configFile, + RawManifest: desc.Manifest, + RawConfig: rawConfig, + ManifestDigest: desc.Digest.String(), + Format: detectModelFormat(len(ggufLayers), len(safetensorsLayers)), + GGUFLayers: ggufLayers, + SafeTensorsLayers: safetensorsLayers, + CompanionLayers: companionLayers, }, nil } +// detectModelFormat returns a single format string when either GGUF or +// SafeTensors weight layers are present. When both appear (not expected in +// practice for Docker Model Runner artifacts), GGUF wins because the GGUF +// cataloger is the more established path. Empty result means the manifest has +// no recognized weight layers. +func detectModelFormat(ggufCount, safetensorsCount int) string { + switch { + case ggufCount > 0: + return modelFormatGGUF + case safetensorsCount > 0: + return modelFormatSafeTensors + default: + return "" + } +} + // isModelArtifact checks if the manifest represents a model artifact. func isModelArtifact(manifest *v1.Manifest) bool { return strings.HasPrefix(string(manifest.Config.MediaType), modelConfigMediaTypePrefix) @@ -179,6 +231,33 @@ func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor { return ggufLayers } +// extractSafeTensorsLayers extracts SafeTensors weight-layer descriptors from +// the manifest. +func extractSafeTensorsLayers(manifest *v1.Manifest) []v1.Descriptor { + var out []v1.Descriptor + for _, layer := range manifest.Layers { + if string(layer.MediaType) == safetensorsLayerMediaType { + out = append(out, layer) + } + } + return out +} + +// extractCompanionLayers extracts small, non-weight layers that carry +// cataloger-relevant metadata: README.md / config.json / tokenizer.json / +// generation_config.json under vnd.docker.ai.model.file, and the LICENSE under +// vnd.docker.ai.license. +func extractCompanionLayers(manifest *v1.Manifest) []v1.Descriptor { + var out []v1.Descriptor + for _, layer := range manifest.Layers { + switch string(layer.MediaType) { + case modelFileMediaType, licenseMediaType: + out = append(out, layer) + } + } + return out +} + func (c *registryClient) fetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) { repo := ref.Context()