diff --git a/.github/workflows/update-cpe-dictionary-index.yml b/.github/workflows/update-cpe-dictionary-index.yml index 21037dd95..98e1a7a4c 100644 --- a/.github/workflows/update-cpe-dictionary-index.yml +++ b/.github/workflows/update-cpe-dictionary-index.yml @@ -14,6 +14,9 @@ env: jobs: upgrade-cpe-dictionary-index: runs-on: ubuntu-latest + permissions: + contents: read + packages: write if: github.repository == 'anchore/syft' # only run for main repo steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #v5.0.0 @@ -22,18 +25,31 @@ jobs: - name: Bootstrap environment uses: ./.github/actions/bootstrap + id: bootstrap - - name: Bootstrap environment - uses: ./.github/actions/bootstrap + - name: Login to GitHub Container Registry + run: | + echo "${{ secrets.GITHUB_TOKEN }}" | ${{ steps.bootstrap.outputs.oras }} login ghcr.io -u ${{ github.actor }} --password-stdin - - run: | - make generate-cpe-dictionary-index + - name: Pull CPE cache from registry + run: make generate:cpe-index:cache:pull - - uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a #v2.1.0 + - name: Update CPE cache from NVD API + run: make generate:cpe-index:cache:update + env: + NVD_API_KEY: ${{ secrets.NVD_API_KEY }} + + - name: Generate CPE dictionary index + run: make generate:cpe-index:build + + - name: Push updated CPE cache to registry + run: make generate:cpe-index:cache:push + + - uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 #v2.1.4 id: generate-token with: - app_id: ${{ secrets.TOKEN_APP_ID }} - private_key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }} + app-id: ${{ secrets.TOKEN_APP_ID }} + private-key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }} - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e #v7.0.8 with: diff --git a/Taskfile.yaml b/Taskfile.yaml index 7fb020ab5..3fb15daaa 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -1,5 +1,9 @@ version: "3" + +includes: + generate:cpe-index: ./task.d/generate/cpe-index.yaml + vars: OWNER: anchore PROJECT: syft @@ -511,10 +515,11 @@ tasks: - "gofmt -s -w ./internal/spdxlicense" generate-cpe-dictionary-index: - desc: Generate the CPE index based off of the latest available CPE dictionary - dir: "syft/pkg/cataloger/internal/cpegenerate/dictionary" + desc: Generate the CPE index from local cache cmds: - - "go generate" + - task: generate:cpe-index:cache:pull + - task: generate:cpe-index:cache:update + - task: generate:cpe-index:build ## Build-related targets ################################# diff --git a/go.mod b/go.mod index b23f615f8..80c4316e7 100644 --- a/go.mod +++ b/go.mod @@ -270,7 +270,7 @@ require ( golang.org/x/sys v0.37.0 // indirect golang.org/x/term v0.36.0 // indirect golang.org/x/text v0.30.0 // indirect - golang.org/x/time v0.12.0 // indirect + golang.org/x/time v0.12.0 golang.org/x/tools v0.38.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect google.golang.org/api v0.203.0 // indirect diff --git a/internal/packagemetadata/names.go b/internal/packagemetadata/names.go index e075bc794..64afc83ef 100644 --- a/internal/packagemetadata/names.go +++ b/internal/packagemetadata/names.go @@ -98,7 +98,7 @@ var jsonTypes = makeJSONTypes( jsonNames(pkg.PEBinary{}, "pe-binary"), jsonNames(pkg.PhpComposerLockEntry{}, "php-composer-lock-entry", "PhpComposerJsonMetadata"), jsonNamesWithoutLookup(pkg.PhpComposerInstalledEntry{}, "php-composer-installed-entry", "PhpComposerJsonMetadata"), // the legacy value is split into two types, where the other is preferred - jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"), + jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"), //nolint:staticcheck jsonNames(pkg.PhpPearEntry{}, "php-pear-entry"), jsonNames(pkg.PortageEntry{}, "portage-db-entry", "PortageMetadata"), jsonNames(pkg.PythonPackage{}, "python-package", "PythonPackageMetadata"), diff --git a/internal/task/package_tasks.go b/internal/task/package_tasks.go index bd7e4bce5..6f04f015f 100644 --- a/internal/task/package_tasks.go +++ b/internal/task/package_tasks.go @@ -178,9 +178,9 @@ func DefaultPackageTaskFactories() Factories { // deprecated catalogers //////////////////////////////////////// // these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible) - newSimplePackageTaskFactory(dotnet.NewDotnetDepsCataloger, pkgcataloging.DeprecatedTag), // TODO: remove in syft v2.0 - newSimplePackageTaskFactory(dotnet.NewDotnetPortableExecutableCataloger, pkgcataloging.DeprecatedTag), // TODO: remove in syft v2.0 - newSimplePackageTaskFactory(php.NewPeclCataloger, pkgcataloging.DeprecatedTag), // TODO: remove in syft v2.0 - newSimplePackageTaskFactory(nix.NewStoreCataloger, pkgcataloging.DeprecatedTag), // TODO: remove in syft v2.0 + newSimplePackageTaskFactory(dotnet.NewDotnetDepsCataloger, pkgcataloging.DeprecatedTag), //nolint:staticcheck // TODO: remove in syft v2.0 + newSimplePackageTaskFactory(dotnet.NewDotnetPortableExecutableCataloger, pkgcataloging.DeprecatedTag), //nolint:staticcheck // TODO: remove in syft v2.0 + newSimplePackageTaskFactory(php.NewPeclCataloger, pkgcataloging.DeprecatedTag), //nolint:staticcheck // TODO: remove in syft v2.0 + newSimplePackageTaskFactory(nix.NewStoreCataloger, pkgcataloging.DeprecatedTag), //nolint:staticcheck // TODO: remove in syft v2.0 } } diff --git a/syft/pkg/cataloger/dotnet/binary_cataloger.go b/syft/pkg/cataloger/dotnet/binary_cataloger.go index 8d51af915..5be797c6f 100644 --- a/syft/pkg/cataloger/dotnet/binary_cataloger.go +++ b/syft/pkg/cataloger/dotnet/binary_cataloger.go @@ -12,6 +12,7 @@ import ( // binary cataloger will search for .dll and .exe files and create packages based off of the version resources embedded // as a resource directory within the executable. If there is no evidence of a .NET runtime (a CLR header) then no // package will be created. +// // Deprecated: use depsBinaryCataloger instead which combines the PE and deps.json data which yields more accurate results (will be removed in syft v2.0). type binaryCataloger struct { } diff --git a/syft/pkg/cataloger/dotnet/cataloger.go b/syft/pkg/cataloger/dotnet/cataloger.go index 384469db3..fe85798ee 100644 --- a/syft/pkg/cataloger/dotnet/cataloger.go +++ b/syft/pkg/cataloger/dotnet/cataloger.go @@ -13,12 +13,14 @@ func NewDotnetDepsBinaryCataloger(config CatalogerConfig) pkg.Cataloger { } // NewDotnetDepsCataloger returns a cataloger based on deps.json file contents. +// // Deprecated: use NewDotnetDepsBinaryCataloger instead which combines the PE and deps.json data which yields more accurate results (will be removed in syft v2.0). func NewDotnetDepsCataloger() pkg.Cataloger { return &depsCataloger{} } // NewDotnetPortableExecutableCataloger returns a cataloger based on PE file contents. +// // Deprecated: use NewDotnetDepsBinaryCataloger instead which combines the PE and deps.json data which yields more accurate results (will be removed in syft v2.0). func NewDotnetPortableExecutableCataloger() pkg.Cataloger { return &binaryCataloger{} diff --git a/syft/pkg/cataloger/dotnet/deps_cataloger.go b/syft/pkg/cataloger/dotnet/deps_cataloger.go index e39ec80df..91ca126e4 100644 --- a/syft/pkg/cataloger/dotnet/deps_cataloger.go +++ b/syft/pkg/cataloger/dotnet/deps_cataloger.go @@ -9,6 +9,7 @@ import ( ) // depsCataloger will search for deps.json file contents. +// // Deprecated: use depsBinaryCataloger instead which combines the PE and deps.json data which yields more accurate results (will be removed in syft v2.0). type depsCataloger struct { } diff --git a/syft/pkg/cataloger/internal/cpegenerate/README.md b/syft/pkg/cataloger/internal/cpegenerate/README.md new file mode 100644 index 000000000..2b648de1b --- /dev/null +++ b/syft/pkg/cataloger/internal/cpegenerate/README.md @@ -0,0 +1,167 @@ +# CPE Generation + +This package generates Common Platform Enumeration (CPE) identifiers for software packages discovered by Syft. +CPEs are standardized identifiers that enable vulnerability matching by linking packages to known vulnerabilities in databases like the National Vulnerability Database (NVD). + +## Overview + +CPE generation in Syft uses a **two-tier approach** to balance accuracy and coverage: + +1. **Dictionary Lookups** (Authoritative): Pre-validated CPEs from the official NIST CPE dictionary +2. **Heuristic Generation** (Fallback): Intelligent generation based on package metadata and ecosystem-specific patterns + +This dual approach ensures: +- **High accuracy** for packages in the NIST dictionary (no false positives) +- **Broad coverage** for packages not yet in the dictionary (maximizes vulnerability detection) +- **Fast performance** with an embedded, indexed CPE dictionary (~814KB) + +## Why It Matters + +CPEs link discovered packages to security vulnerabilities (CVEs) in tools like Grype. Without accurate CPE generation, vulnerability scanning misses security issues. + +## How It Works + +### Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Syft Package Discovery │ +└──────────────────┬──────────────────────────────────────┘ + │ + ▼ + ┌─────────────────────┐ + │ CPE Generation │ + │ (this package) │ + └──────────┬──────────┘ + │ + ┌───────────┴────────────┐ + │ │ + ▼ ▼ +┌──────────────────┐ ┌─────────────────────┐ +│ Dictionary │ │ Heuristic │ +│ Lookup │ │ Generation │ +│ │ │ │ +│ • Embedded index │ │ • Ecosystem rules │ +│ • ~22K entries │ │ • Vendor/product │ +│ • 11 ecosystems │ │ candidates │ +└──────────────────┘ │ • Curated mappings │ + │ • Smart filters │ + └─────────────────────┘ +``` + +### Dictionary Generation Process + +The dictionary is generated offline and embedded into the Syft binary for fast, offline lookups. + +**Location**: `dictionary/index-generator/` + +**Process**: +1. **Fetch**: Retrieves CPE data from NVD Products API using incremental updates +2. **Cache**: Stores raw API responses in ORAS registry for reuse (`.cpe-cache/`) +3. **Filter**: + - Removes CPEs without reference URLs + - Excludes hardware (`h`) and OS (`o`) CPEs (keeps only applications `a`) +4. **Index by Ecosystem**: + - Extracts package names from reference URLs (npm, pypi, rubygems, etc.) + - Creates index: `ecosystem → package_name → [CPE strings]` +5. **Embed**: Generates `data/cpe-index.json` embedded via `go:embed` directive + +### Runtime CPE Lookup/Generation + +**Entry Point**: `generate.go` + +When Syft discovers a package: + +1. **Check for Declared CPEs**: If package metadata already contains CPEs (from SBOM imports), skip generation +2. **Try Dictionary Lookup** (`FromDictionaryFind`): + - Loads embedded CPE index (singleton, loaded once) + - Looks up by ecosystem + package name + - Returns pre-validated CPEs if found + - Marks source as `NVDDictionaryLookupSource` +3. **Fallback to Heuristic Generation** (`FromPackageAttributes`): + - Generates vendor/product/targetSW candidates using ecosystem-specific logic + - Creates CPE permutations from candidates + - Applies filters to remove known false positives + - Marks source as `GeneratedSource` + +### Supported Ecosystems + +**Dictionary Lookups** (11 ecosystems): +npm, RubyGems, PyPI, Jenkins Plugins, crates.io, PHP, Go Modules, WordPress Plugins/Themes + +**Heuristic Generation** (all package types): +All dictionary ecosystems plus Java, .NET/NuGet, Alpine APK, Debian/RPM, and any other package type Syft discovers + +### Ecosystem-Specific Intelligence + +The heuristic generator uses per-ecosystem strategies: + +- **Java**: Extracts vendor from groupId, product from artifactId +- **Python**: Parses author fields, adds `_project` suffix variants +- **Go**: Extracts org/repo from module paths (`github.com/org/repo`) +- **JavaScript**: Handles npm scope patterns (`@scope/package`) + +### Curated Mappings & Filters + +- **500+ curated mappings**: `curl` → `haxx`, `spring-boot` → `pivotal`, etc. +- **Filters**: Prevent false positives (Jenkins plugins vs. core, Jira client vs. server) +- **Validation**: Ensures CPE syntax correctness before returning + +## Implementation Details + +### Embedded Index Format + +```json +{ + "ecosystems": { + "npm": { + "lodash": ["cpe:2.3:a:lodash:lodash:*:*:*:*:*:node.js:*:*"] + }, + "pypi": { + "Django": ["cpe:2.3:a:djangoproject:django:*:*:*:*:*:python:*:*"] + } + } +} +``` + +The dictionary generator maps packages to ecosystems using reference URL patterns (npmjs.com, pypi.org, rubygems.org, etc.). + +## Maintenance + +### Updating the CPE Dictionary + +The CPE dictionary should be updated periodically to include new packages: + +```bash +# Full workflow: pull cache → update from NVD → build index +make generate:cpe-index + +# Or run individual steps: +make generate:cpe-index:cache:pull # Pull cached CPE data from ORAS +make generate:cpe-index:cache:update # Fetch updates from NVD Products API +make generate:cpe-index:build # Generate cpe-index.json from cache +``` + +**Optional**: Set `NVD_API_KEY` for faster updates (50 req/30s vs 5 req/30s) + +This workflow: +1. Pulls existing cache from ORAS registry (avoids re-fetching all ~1.5M CPEs) +2. Fetches only products modified since last update from NVD Products API +3. Builds indexed dictionary (~814KB, ~22K entries) +4. Pushes updated cache for team reuse + +### Extending CPE Generation + +**Add dictionary support for a new ecosystem:** +1. Add URL pattern in `index-generator/generate.go` +2. Regenerate index with `make generate:cpe-index` + +**Improve heuristic generation:** +1. Modify ecosystem-specific file (e.g., `java.go`, `python.go`) +2. Add curated mappings to `candidate_by_package_type.go` + +**Key files:** +- `generate.go` - Main generation logic +- `dictionary/` - Dictionary generator and embedded index +- `candidate_by_package_type.go` - Ecosystem-specific candidates +- `filter.go` - Filtering rules diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/data/cpe-index.json b/syft/pkg/cataloger/internal/cpegenerate/dictionary/data/cpe-index.json index 3091c043d..da514c524 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/dictionary/data/cpe-index.json +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/data/cpe-index.json @@ -653,6 +653,9 @@ "dbCharts": [ "cpe:2.3:a:jenkins:dbcharts:*:*:*:*:*:jenkins:*:*" ], + "deadmanssnitch": [ + "cpe:2.3:a:jenkins:dead_man\\'s_snitch:*:*:*:*:*:jenkins:*:*" + ], "debian-package-builder": [ "cpe:2.3:a:jenkins:debian_package_builder:*:*:*:*:*:jenkins:*:*" ], @@ -1360,6 +1363,9 @@ "oic-auth": [ "cpe:2.3:a:jenkins:openid_connect_authentication:*:*:*:*:*:jenkins:*:*" ], + "oidc-provider": [ + "cpe:2.3:a:jenkins:openid_connect_provider:*:*:*:*:*:jenkins:*:*" + ], "ontrack": [ "cpe:2.3:a:jenkins:ontrack:*:*:*:*:*:jenkins:*:*" ], @@ -1531,6 +1537,9 @@ "qualys-pc": [ "cpe:2.3:a:qualys:policy_compliance:*:*:*:*:*:jenkins:*:*" ], + "qualys-was": [ + "cpe:2.3:a:qualys:web_application_screening:*:*:*:*:*:jenkins:*:*" + ], "quayio-trigger": [ "cpe:2.3:a:jenkins:quay.io_trigger:*:*:*:*:*:jenkins:*:*" ], @@ -2164,6 +2173,9 @@ "@azure/ms-rest-nodeauth": [ "cpe:2.3:a:microsoft:ms-rest-nodeauth:*:*:*:*:*:node.js:*:*" ], + "@backstage/backend-common": [ + "cpe:2.3:a:linuxfoundation:backstage_backend-common:*:*:*:*:*:node.js:*:*" + ], "@backstage/plugin-auth-backend": [ "cpe:2.3:a:linuxfoundation:auth_backend:*:*:*:*:*:node.js:*:*" ], @@ -3035,6 +3047,9 @@ "electron-packager": [ "cpe:2.3:a:electron-packager_project:electron-packager:*:*:*:*:*:node.js:*:*" ], + "electron-pdf": [ + "cpe:2.3:a:fraserxu:electron-pdf:*:*:*:*:*:node.js:*:*" + ], "elliptic": [ "cpe:2.3:a:indutny:elliptic:*:*:*:*:*:node.js:*:*" ], @@ -5284,6 +5299,9 @@ "ts-process-promises": [ "cpe:2.3:a:ts-process-promises_project:ts-process-promises:*:*:*:*:*:node.js:*:*" ], + "tsup": [ + "cpe:2.3:a:egoist:tsup:*:*:*:*:*:node.js:*:*" + ], "ua-parser": [ "cpe:2.3:a:ua-parser_project:ua-parser:*:*:*:*:*:node.js:*:*" ], @@ -5552,6 +5570,9 @@ "alfnru/password_recovery": [ "cpe:2.3:a:password_recovery_project:password_recovery:*:*:*:*:*:roundcube:*:*" ], + "couleurcitron/tarteaucitron-wp": [ + "cpe:2.3:a:couleurcitron:tarteaucitron-wp:*:*:*:*:*:wordpress:*:*" + ], "dev-lancer/minecraft-motd-parser": [ "cpe:2.3:a:jgniecki:minecraft_motd_parser:*:*:*:*:*:*:*:*" ], @@ -7259,6 +7280,9 @@ "ab-press-optimizer-lite": [ "cpe:2.3:a:abpressoptimizer:ab_press_optimizer:*:*:*:*:*:wordpress:*:*" ], + "abitgone-commentsafe": [ + "cpe:2.3:a:abitgone:abitgone_commentsafe:*:*:*:*:*:wordpress:*:*" + ], "about-me": [ "cpe:2.3:a:about-me_project:about-me:*:*:*:*:*:wordpress:*:*" ], @@ -7605,6 +7629,9 @@ "advanced-backgrounds": [ "cpe:2.3:a:wpbackgrounds:advanced_wordpress_backgrounds:*:*:*:*:*:wordpress:*:*" ], + "advanced-blocks-pro": [ + "cpe:2.3:a:essamamdani:advanced_blocks_pro:*:*:*:*:*:wordpress:*:*" + ], "advanced-booking-calendar": [ "cpe:2.3:a:elbtide:advanced_booking_calendar:*:*:*:*:*:wordpress:*:*" ], @@ -7702,6 +7729,9 @@ "affiliatebooster-blocks": [ "cpe:2.3:a:affiliatebooster:affiliate_booster:*:*:*:*:*:wordpress:*:*" ], + "affiliateimportereb": [ + "cpe:2.3:a:cr1000:affiliateimportereb:*:*:*:*:*:wordpress:*:*" + ], "affiliates-manager": [ "cpe:2.3:a:wpaffiliatemanager:affiliates_manager:*:*:*:*:*:wordpress:*:*" ], @@ -8408,6 +8438,9 @@ "cpe:2.3:a:dotstore:woocommerce_category_banner_management:*:*:*:*:*:wordpress:*:*", "cpe:2.3:a:multidots:banner_management_for_woocommerce:*:*:*:*:*:wordpress:*:*" ], + "bannerlid": [ + "cpe:2.3:a:web_lid:bannerlid:*:*:*:*:*:wordpress:*:*" + ], "barcode-scanner-lite-pos-to-manage-products-inventory-and-orders": [ "cpe:2.3:a:ukrsolution:barcode_scanner_and_inventory_manager:*:*:*:*:*:wordpress:*:*" ], @@ -8516,6 +8549,9 @@ "better-elementor-addons": [ "cpe:2.3:a:kitforest:better_elementor_addons:*:*:*:*:*:wordpress:*:*" ], + "better-follow-button-for-jetpack": [ + "cpe:2.3:a:antonpug:better_flow_button_for_jetpack:*:*:*:*:*:wordpress:*:*" + ], "better-font-awesome": [ "cpe:2.3:a:better_font_awesome_project:better_font_awesome:*:*:*:*:*:wordpress:*:*" ], @@ -8770,6 +8806,9 @@ "bp-cover": [ "cpe:2.3:a:buddypress_cover_project:buddypress_cover:*:*:*:*:*:wordpress:*:*" ], + "bp-email-assign-templates": [ + "cpe:2.3:a:shanebp:bp_email_assign_templates:*:*:*:*:*:wordpress:*:*" + ], "bp-profile-search": [ "cpe:2.3:a:dontdream:bp_profile_search:*:*:*:*:*:wordpress:*:*" ], @@ -9240,6 +9279,9 @@ "chained-quiz": [ "cpe:2.3:a:kibokolabs:chained_quiz:*:*:*:*:*:wordpress:*:*" ], + "chalet-montagne-com-tools": [ + "cpe:2.3:a:alpium:chalet-montagne.com_tools:*:*:*:*:*:wordpress:*:*" + ], "chamber-dashboard-business-directory": [ "cpe:2.3:a:chamber_dashboard_business_directory_project:chamber_dashboard_business_directory:*:*:*:*:*:wordpress:*:*" ], @@ -9252,6 +9294,9 @@ "change-memory-limit": [ "cpe:2.3:a:simon99:change_memory_limit:*:*:*:*:*:wordpress:*:*" ], + "change-table-prefix": [ + "cpe:2.3:a:youngtechleads:change_table_prefix:*:*:*:*:*:wordpress:*:*" + ], "change-uploaded-file-permissions": [ "cpe:2.3:a:change_uploaded_file_permissions_project:change_uploaded_file_permissions:*:*:*:*:*:wordpress:*:*" ], @@ -9550,6 +9595,9 @@ "commenttweets": [ "cpe:2.3:a:theresehansen:commenttweets:*:*:*:*:*:wordpress:*:*" ], + "common-tools-for-site": [ + "cpe:2.3:a:chetanvaghela:common_tools_for_site:*:*:*:*:*:wordpress:*:*" + ], "commonsbooking": [ "cpe:2.3:a:wielebenwir:commonsbooking:*:*:*:*:*:wordpress:*:*" ], @@ -10041,6 +10089,9 @@ "csv-importer": [ "cpe:2.3:a:deniskobozev:csv_importer:*:*:*:*:*:wordpress:*:*" ], + "csv-mass-importer": [ + "cpe:2.3:a:aleapp:csv_mass_importer:*:*:*:*:*:wordpress:*:*" + ], "ct-commerce": [ "cpe:2.3:a:ujwolbastakoti:ct_commerce:*:*:*:*:*:wordpress:*:*" ], @@ -10798,6 +10849,9 @@ "easy-svg": [ "cpe:2.3:a:benjaminzekavica:easy_svg_support:*:*:*:*:*:wordpress:*:*" ], + "easy-svg-upload": [ + "cpe:2.3:a:delowerhossain:easy_svg_upload:*:*:*:*:*:wordpress:*:*" + ], "easy-table": [ "cpe:2.3:a:easy_table_project:easy_table:*:*:*:*:*:wordpress:*:*" ], @@ -11286,6 +11340,9 @@ "exit-intent-popups-by-optimonk": [ "cpe:2.3:a:optimonk:optimonk\\:popups\\,_personalization_\\\u0026_a\\/b_testing:*:*:*:*:*:wordpress:*:*" ], + "exit-notifier": [ + "cpe:2.3:a:cvstech:exit_notifier:*:*:*:*:*:wordpress:*:*" + ], "exmage-wp-image-links": [ "cpe:2.3:a:villatheme:exmage:*:*:*:*:*:wordpress:*:*" ], @@ -11325,6 +11382,9 @@ "exquisite-paypal-donation": [ "cpe:2.3:a:exquisite_paypal_donation_project:exquisite_paypal_donation:*:*:*:*:*:wordpress:*:*" ], + "extended-search-plugin": [ + "cpe:2.3:a:jakesnyder:enhanced_search_box:*:*:*:*:*:wordpress:*:*" + ], "extensions-for-cf7": [ "cpe:2.3:a:hasthemes:extensions_for_cf7:*:*:*:*:*:wordpress:*:*" ], @@ -11571,6 +11631,7 @@ "cpe:2.3:a:five_minute_webshop_project:five_minute_webshop:*:*:*:*:*:wordpress:*:*" ], "fl3r-feelbox": [ + "cpe:2.3:a:armandofiore:fl3r_feelbox:*:*:*:*:*:wordpress:*:*", "cpe:2.3:a:fl3r-feelbox_project:fl3r-feelbox:*:*:*:*:*:wordpress:*:*" ], "flash-album-gallery": [ @@ -12235,6 +12296,9 @@ "google-sitemap-plugin": [ "cpe:2.3:a:bestwebsoft:google_sitemap:*:*:*:*:*:wordpress:*:*" ], + "google-website-translator": [ + "cpe:2.3:a:prisna:google_website_translator:*:*:*:*:*:wordpress:*:*" + ], "googleanalytics": [ "cpe:2.3:a:sharethis:dashboard_for_google_analytics:*:*:*:*:*:wordpress:*:*" ], @@ -12634,6 +12698,9 @@ "hunk-companion": [ "cpe:2.3:a:themehunk:hunk_companion:*:*:*:*:*:wordpress:*:*" ], + "hurrytimer": [ + "cpe:2.3:a:nabillemsieh:hurrytimer:*:*:*:*:*:wordpress:*:*" + ], "hyphenator": [ "cpe:2.3:a:benedictb\\/maciejgryniuk:hyphenator:*:*:*:*:*:wordpress:*:*" ], @@ -12907,6 +12974,9 @@ "cpe:2.3:a:cm-wp:woody_code_snippets:*:*:*:*:*:wordpress:*:*", "cpe:2.3:a:webcraftic:woody_ad_snippets:*:*:*:*:*:wordpress:*:*" ], + "insert-php-code-snippet": [ + "cpe:2.3:a:f1logic:insert_php_code_snippet:*:*:*:*:*:wordpress:*:*" + ], "insight-core": [ "cpe:2.3:a:thememove:insight_core:*:*:*:*:*:wordpress:*:*" ], @@ -13011,6 +13081,9 @@ "ip-blacklist-cloud": [ "cpe:2.3:a:ip_blacklist_cloud_project:ip_blacklist_cloud:*:*:*:*:*:wordpress:*:*" ], + "ip-vault-wp-firewall": [ + "cpe:2.3:a:youtag:two-factor_authentication:*:*:*:*:*:wordpress:*:*" + ], "ip2location-country-blocker": [ "cpe:2.3:a:ip2location:country_blocker:*:*:*:*:*:wordpress:*:*" ], @@ -13557,6 +13630,9 @@ "list-category-posts": [ "cpe:2.3:a:fernandobriano:list_category_posts:*:*:*:*:*:wordpress:*:*" ], + "list-children": [ + "cpe:2.3:a:sizeable:list_children:*:*:*:*:*:wordpress:*:*" + ], "list-last-changes": [ "cpe:2.3:a:rolandbaer:list_last_changes:*:*:*:*:*:wordpress:*:*" ], @@ -13854,6 +13930,9 @@ "manual-image-crop": [ "cpe:2.3:a:manual_image_crop_project:manual_image_crop:*:*:*:*:*:wordpress:*:*" ], + "mapfig-studio": [ + "cpe:2.3:a:acugis:mapfig_studio:*:*:*:*:*:wordpress:*:*" + ], "mapping-multiple-urls-redirect-same-page": [ "cpe:2.3:a:mapping_multiple_urls_redirect_same_page_project:mapping_multiple_urls_redirect_same_page:*:*:*:*:*:wordpress:*:*" ], @@ -14237,6 +14316,9 @@ "monetize": [ "cpe:2.3:a:monetize_project:monetize:*:*:*:*:*:wordpress:*:*" ], + "monitor-chat": [ + "cpe:2.3:a:edwardstoever:monitor.chat:*:*:*:*:*:wordpress:*:*" + ], "month-name-translation-benaceur": [ "cpe:2.3:a:benaceur-php:month_name_translation_benaceur:*:*:*:*:*:wordpress:*:*" ], @@ -14306,6 +14388,9 @@ "mq-woocommerce-products-price-bulk-edit": [ "cpe:2.3:a:mq-woocommerce-products-price-bulk-edit_project:mq-woocommerce-products-price-bulk-edit:*:*:*:*:*:wordpress:*:*" ], + "ms-registration": [ + "cpe:2.3:a:alphaefficiencyteam:custom_login_and_registration:*:*:*:*:*:wordpress:*:*" + ], "ms-reviews": [ "cpe:2.3:a:ms-reviews_project:ms-reviews:*:*:*:*:*:wordpress:*:*" ], @@ -14438,7 +14523,7 @@ "cpe:2.3:a:stormhillmedia:mybook_table_bookstore:*:*:*:*:*:wordpress:*:*" ], "mycred": [ - "cpe:2.3:a:mycred:mycred:*:*:*:*:*:wordpress:*:*" + "cpe:2.3:a:wpexperts:mycred:*:*:*:*:*:wordpress:*:*" ], "mycryptocheckout": [ "cpe:2.3:a:plainviewplugins:mycryptocheckout:*:*:*:*:*:wordpress:*:*" @@ -14625,12 +14710,18 @@ "ninjafirewall": [ "cpe:2.3:a:nintechnet:ninjafirewall:*:*:*:*:*:wordpress:*:*" ], + "ninjateam-telegram": [ + "cpe:2.3:a:ninjateam:chat_for_telegram:*:*:*:*:*:wordpress:*:*" + ], "nirweb-support": [ "cpe:2.3:a:nirweb:nirweb_support:*:*:*:*:*:wordpress:*:*" ], "nitropack": [ "cpe:2.3:a:nitropack:nitropack:*:*:*:*:*:wordpress:*:*" ], + "nix-anti-spam-light": [ + "cpe:2.3:a:nixsolutions:nix_anti-spam_light:*:*:*:*:*:wordpress:*:*" + ], "nktagcloud": [ "cpe:2.3:a:better_tag_cloud_project:better_tag_cloud:*:*:*:*:*:wordpress:*:*" ], @@ -15186,6 +15277,9 @@ "cpe:2.3:a:greentreelabs:gallery_photoblocks:*:*:*:*:*:wordpress:*:*", "cpe:2.3:a:wpchill:gallery_photoblocks:*:*:*:*:*:wordpress:*:*" ], + "photokit": [ + "cpe:2.3:a:jackzhu:photokit:*:*:*:*:*:wordpress:*:*" + ], "photoshow": [ "cpe:2.3:a:codepeople:smart_image_gallery:*:*:*:*:*:wordpress:*:*" ], @@ -15511,6 +15605,9 @@ "postman-smtp": [ "cpe:2.3:a:postman-smtp_project:postman-smtp:*:*:*:*:*:wordpress:*:*" ], + "postmash": [ + "cpe:2.3:a:jmash:postmash:*:*:*:*:*:wordpress:*:*" + ], "postmatic": [ "cpe:2.3:a:gopostmatic:replyable:*:*:*:*:*:wordpress:*:*" ], @@ -15761,6 +15858,9 @@ "pure-chat": [ "cpe:2.3:a:purechat:pure_chat:*:*:*:*:*:*:*:*" ], + "pure-css-circle-progress-bar": [ + "cpe:2.3:a:shafayat:pure_css_circle_progress_bar:*:*:*:*:*:wordpress:*:*" + ], "purple-xmls-google-product-feed-for-woocommerce": [ "cpe:2.3:a:dpl:product_feed_on_woocommerce_for_google\\,_awin\\,_shareasale\\,_bing\\,_and_more:*:*:*:*:*:wordpress:*:*" ], @@ -15964,6 +16064,9 @@ "react-webcam": [ "cpe:2.3:a:react_webcam_project:react_webcam:*:*:*:*:*:wordpress:*:*" ], + "reaction-buttons": [ + "cpe:2.3:a:jakob42:reaction_buttons:*:*:*:*:*:wordpress:*:*" + ], "read-and-understood": [ "cpe:2.3:a:read_and_understood_project:read_and_understood:*:*:*:*:*:wordpress:*:*" ], @@ -16124,6 +16227,9 @@ "reservation-studio-widget": [ "cpe:2.3:a:pvmg:reservation.studio:*:*:*:*:*:wordpress:*:*" ], + "reset": [ + "cpe:2.3:a:smartzminds:reset:*:*:*:*:*:wordpress:*:*" + ], "resize-at-upload-plus": [ "cpe:2.3:a:resize_at_upload_plus_project:resize_at_upload_plus:*:*:*:*:*:wordpress:*:*" ], @@ -16527,6 +16633,9 @@ "sellkit": [ "cpe:2.3:a:artbees:sellkit:*:*:*:*:*:wordpress:*:*" ], + "send-email-only-on-reply-to-my-comment": [ + "cpe:2.3:a:yasirwazir:send_email_only_on_reply_to_my_comment:*:*:*:*:*:wordpress:*:*" + ], "send-emails-with-mandrill": [ "cpe:2.3:a:millermedia:mandrill:*:*:*:*:*:wordpress:*:*" ], @@ -17092,6 +17201,9 @@ "site-editor": [ "cpe:2.3:a:siteeditor:site_editor:*:*:*:*:*:wordpress:*:*" ], + "site-mailer": [ + "cpe:2.3:a:elementor:site_mailer:*:*:*:*:*:wordpress:*:*" + ], "site-offline": [ "cpe:2.3:a:freehtmldesigns:site_offline:*:*:*:*:*:wordpress:*:*" ], @@ -17780,6 +17892,9 @@ "svg-support": [ "cpe:2.3:a:benbodhi:svg_support:*:*:*:*:*:wordpress:*:*" ], + "svg-uploads-support": [ + "cpe:2.3:a:ablyperu:svg_uploads_support:*:*:*:*:*:wordpress:*:*" + ], "svg-vector-icon-plugin": [ "cpe:2.3:a:wp_svg_icons_project:wp_svg_icons:*:*:*:*:*:wordpress:*:*" ], @@ -17859,6 +17974,7 @@ "cpe:2.3:a:tainacan:tainacan:*:*:*:*:*:wordpress:*:*" ], "tarteaucitronjs": [ + "cpe:2.3:a:amauri:tarteaucitron.io:*:*:*:*:*:wordpress:*:*", "cpe:2.3:a:tarteaucitron.js_-_cookies_legislation_\\\u0026_gdpr_project:tarteaucitron.js_-_cookies_legislation_\\\u0026_gdpr:*:*:*:*:*:wordpress:*:*" ], "taskbuilder": [ @@ -18106,6 +18222,9 @@ "timeline-widget-addon-for-elementor": [ "cpe:2.3:a:coolplugins:timeline_widget_for_elementor:*:*:*:*:*:wordpress:*:*" ], + "timer-countdown": [ + "cpe:2.3:a:yaidier:countdown_timer:*:*:*:*:*:wordpress:*:*" + ], "timesheet": [ "cpe:2.3:a:bestwebsoft:timesheet:*:*:*:*:*:wordpress:*:*" ], @@ -18249,9 +18368,15 @@ "tripetto": [ "cpe:2.3:a:tripetto:tripetto:*:*:*:*:*:wordpress:*:*" ], + "tripplan": [ + "cpe:2.3:a:checklist:trip_plan:*:*:*:*:*:wordpress:*:*" + ], "truebooker-appointment-booking": [ "cpe:2.3:a:themetechmount:truebooker:*:*:*:*:*:wordpress:*:*" ], + "trx_addons": [ + "cpe:2.3:a:themerex:addons:*:*:*:*:*:wordpress:*:*" + ], "ts-webfonts-for-conoha": [ "cpe:2.3:a:gmo:typesquare_webfonts_for_conoha:*:*:*:*:*:wordpress:*:*" ], @@ -18457,9 +18582,15 @@ "ultimate-weather-plugin": [ "cpe:2.3:a:ultimate-weather_project:ultimate-weather:*:*:*:*:*:wordpress:*:*" ], + "ultimate-woocommerce-auction-pro": [ + "cpe:2.3:a:auctionplugin:ultimate_wordpress_auction_plugin:*:*:*:*:pro:wordpress:*:*" + ], "ultimate-wp-query-search-filter": [ "cpe:2.3:a:ultimate_wp_query_search_filter_project:ultimate_wp_query_search_filter:*:*:*:*:*:wordpress:*:*" ], + "ultimate-youtube-video-player": [ + "cpe:2.3:a:codelizar:ultimate_youtube_video_\\\u0026_shorts_player_with_vimeo:*:*:*:*:*:wordpress:*:*" + ], "ultra-companion": [ "cpe:2.3:a:wpoperation:ultra_companion:*:*:*:*:*:wordpress:*:*" ], @@ -19198,6 +19329,9 @@ "woo-esto": [ "cpe:2.3:a:rebing:woocommerce_esto:*:*:*:*:*:wordpress:*:*" ], + "woo-exfood": [ + "cpe:2.3:a:exthemes:woocommerce_food:*:*:*:*:*:wordpress:*:*" + ], "woo-floating-cart-lite": [ "cpe:2.3:a:xplodedthemes:xt_floating_cart_for_woocommerce:*:*:*:*:*:wordpress:*:*" ], @@ -19267,6 +19401,9 @@ "woo-shipping-dpd-baltic": [ "cpe:2.3:a:dpdgroup:woocommerce_shipping:*:*:*:*:*:wordpress:*:*" ], + "woo-slider-pro-drag-drop-slider-builder-for-woocommerce": [ + "cpe:2.3:a:binarycarpenter:woo_slider_pro:*:*:*:*:*:wordpress:*:*" + ], "woo-smart-compare": [ "cpe:2.3:a:wpclever:wpc_smart_compare_for_woocommerce:*:*:*:*:*:wordpress:*:*" ], @@ -19820,6 +19957,9 @@ "cpe:2.3:a:kigurumi:csv_exporter:*:*:*:*:*:wordpress:*:*", "cpe:2.3:a:wp_csv_exporter_project:wp_csv_exporter:*:*:*:*:*:wordpress:*:*" ], + "wp-curriculo-vitae": [ + "cpe:2.3:a:williamluis:wp-curriculo_vitae_free:*:*:*:*:*:wordpress:*:*" + ], "wp-custom-admin-interface": [ "cpe:2.3:a:wp_custom_admin_interface_project:wp_custom_admin_interface:*:*:*:*:*:*:*:*" ], @@ -19891,7 +20031,8 @@ "cpe:2.3:a:display_users_project:display_users:*:*:*:*:*:wordpress:*:*" ], "wp-docs": [ - "cpe:2.3:a:androidbubble:wp_docs:*:*:*:*:*:wordpress:*:*" + "cpe:2.3:a:androidbubble:wp_docs:*:*:*:*:*:wordpress:*:*", + "cpe:2.3:a:fahadmahmood:wp_docs:*:*:*:*:*:wordpress:*:*" ], "wp-domain-redirect": [ "cpe:2.3:a:wp_domain_redirect_project:wp_domain_redirect:*:*:*:*:*:wordpress:*:*" @@ -20795,6 +20936,9 @@ "wp-table-builder": [ "cpe:2.3:a:dotcamp:wp_table_builder:*:*:*:*:*:wordpress:*:*" ], + "wp-table-manager": [ + "cpe:2.3:a:joomunited:wp_table_manager:*:*:*:*:*:wordpress:*:*" + ], "wp-table-reloaded": [ "cpe:2.3:a:wp-table_reloaded_project:wp-table_reloaded:*:*:*:*:*:wordpress:*:*" ], @@ -21139,6 +21283,9 @@ "wppizza": [ "cpe:2.3:a:wp-pizza:wppizza:*:*:*:*:*:wordpress:*:*" ], + "wpquiz": [ + "cpe:2.3:a:bauc:wpquiz:*:*:*:*:*:wordpress:*:*" + ], "wprequal": [ "cpe:2.3:a:kevinbrent:wprequal:*:*:*:*:*:wordpress:*:*" ], @@ -21169,6 +21316,9 @@ "wpsolr-search-engine": [ "cpe:2.3:a:wpsolr:wpsolr-search-engine:*:*:*:*:*:wordpress:*:*" ], + "wpstickybar-sticky-bar-sticky-header": [ + "cpe:2.3:a:a17lab:wpstickybar:*:*:*:*:*:wordpress:*:*" + ], "wpstream": [ "cpe:2.3:a:wpstream:wpstream:*:*:*:*:*:wordpress:*:*" ], @@ -21276,6 +21426,9 @@ "xtremelocator": [ "cpe:2.3:a:xtremelocator:xtremelocator:*:*:*:*:*:wordpress:*:*" ], + "xv-random-quotes": [ + "cpe:2.3:a:xavivars:xv_random_quotes:*:*:*:*:*:wordpress:*:*" + ], "yabp": [ "cpe:2.3:a:tromit:yabp:*:*:*:*:*:wordpress:*:*" ], @@ -21362,6 +21515,9 @@ "yotpo-social-reviews-for-woocommerce": [ "cpe:2.3:a:yotpo:yotpo:*:*:*:*:*:wordpress:*:*" ], + "yotuwp-easy-youtube-embed": [ + "cpe:2.3:a:yotuwp:video_gallery:*:*:*:*:*:wordpress:*:*" + ], "yourchannel": [ "cpe:2.3:a:plugin:yourchannel:*:*:*:*:*:wordpress:*:*" ], @@ -21782,6 +21938,9 @@ "pressmart": [ "cpe:2.3:a:presslayouts:pressmart:*:*:*:*:*:wordpress:*:*" ], + "puzzles": [ + "cpe:2.3:a:themerex:puzzles:*:*:*:*:*:wordpress:*:*" + ], "regina-lite": [ "cpe:2.3:a:machothemes:regina_lite:*:*:*:*:*:wordpress:*:*" ], diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/generate_index.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/generate_index.go deleted file mode 100644 index 35a6e46d7..000000000 --- a/syft/pkg/cataloger/internal/cpegenerate/dictionary/generate_index.go +++ /dev/null @@ -1,3 +0,0 @@ -package dictionary - -//go:generate go run ./index-generator/ -o data/cpe-index.json diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/.gitignore b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/.gitignore new file mode 100644 index 000000000..78772006b --- /dev/null +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/.gitignore @@ -0,0 +1,6 @@ +# ORAS cache directory - raw CPE data from NVD API +.cpe-cache/ + +# Build artifacts +index-generator +.tmp-* diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/cache_manager.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/cache_manager.go new file mode 100644 index 000000000..997dc93cd --- /dev/null +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/cache_manager.go @@ -0,0 +1,370 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" +) + +const cacheDir = ".cpe-cache" + +// IncrementMetadata tracks a single fetch increment for a monthly batch +type IncrementMetadata struct { + FetchedAt time.Time `json:"fetchedAt"` + LastModStartDate time.Time `json:"lastModStartDate"` + LastModEndDate time.Time `json:"lastModEndDate"` + Products int `json:"products"` + StartIndex int `json:"startIndex"` // API pagination start index + EndIndex int `json:"endIndex"` // API pagination end index (last fetched) +} + +// MonthlyBatchMetadata tracks all increments for a specific month +type MonthlyBatchMetadata struct { + Complete bool `json:"complete"` + TotalProducts int `json:"totalProducts"` + Increments []IncrementMetadata `json:"increments"` +} + +// CacheMetadata tracks the state of the CPE cache using monthly time-based organization +type CacheMetadata struct { + LastFullRefresh time.Time `json:"lastFullRefresh"` + LastStartIndex int `json:"lastStartIndex"` // last successfully processed startIndex for resume + TotalProducts int `json:"totalProducts"` + MonthlyBatches map[string]*MonthlyBatchMetadata `json:"monthlyBatches"` // key is "YYYY-MM" +} + +// CacheManager handles local caching of CPE data +type CacheManager struct { + cacheDir string +} + +// NewCacheManager creates a new cache manager +func NewCacheManager() *CacheManager { + return &CacheManager{ + cacheDir: cacheDir, + } +} + +// EnsureCacheDir ensures the cache directory exists +func (m *CacheManager) EnsureCacheDir() error { + if err := os.MkdirAll(m.cacheDir, 0755); err != nil { + return fmt.Errorf("failed to create cache directory: %w", err) + } + return nil +} + +// LoadMetadata loads the cache metadata +func (m *CacheManager) LoadMetadata() (*CacheMetadata, error) { + metadataPath := filepath.Join(m.cacheDir, "metadata.json") + + // check if metadata file exists + if _, err := os.Stat(metadataPath); os.IsNotExist(err) { + // return empty metadata for first run + return &CacheMetadata{ + LastFullRefresh: time.Time{}, + TotalProducts: 0, + MonthlyBatches: make(map[string]*MonthlyBatchMetadata), + }, nil + } + + data, err := os.ReadFile(metadataPath) + if err != nil { + return nil, fmt.Errorf("failed to read metadata: %w", err) + } + + var metadata CacheMetadata + if err := json.Unmarshal(data, &metadata); err != nil { + return nil, fmt.Errorf("failed to unmarshal metadata: %w", err) + } + + // ensure MonthlyBatches map is initialized + if metadata.MonthlyBatches == nil { + metadata.MonthlyBatches = make(map[string]*MonthlyBatchMetadata) + } + + return &metadata, nil +} + +// SaveMetadata saves the cache metadata +func (m *CacheManager) SaveMetadata(metadata *CacheMetadata) error { + if err := m.EnsureCacheDir(); err != nil { + return err + } + + metadataPath := filepath.Join(m.cacheDir, "metadata.json") + + data, err := json.MarshalIndent(metadata, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal metadata: %w", err) + } + + if err := os.WriteFile(metadataPath, data, 0600); err != nil { + return fmt.Errorf("failed to write metadata: %w", err) + } + + return nil +} + +// SaveProductsToMonthlyFile saves products to a monthly file (initial.json or YYYY-MM.json) +// uses atomic write pattern with temp file + rename for safety +func (m *CacheManager) SaveProductsToMonthlyFile(filename string, products []NVDProduct) error { + if err := m.EnsureCacheDir(); err != nil { + return err + } + + filePath := filepath.Join(m.cacheDir, filename) + tempPath := filePath + ".tmp" + + // marshal products to JSON + data, err := json.MarshalIndent(products, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal products: %w", err) + } + + // write to temp file first + if err := os.WriteFile(tempPath, data, 0600); err != nil { + return fmt.Errorf("failed to write temp file: %w", err) + } + + // atomic rename + if err := os.Rename(tempPath, filePath); err != nil { + // cleanup temp file on error + _ = os.Remove(tempPath) + return fmt.Errorf("failed to rename temp file: %w", err) + } + + return nil +} + +// LoadMonthlyFile loads products from a monthly file +func (m *CacheManager) LoadMonthlyFile(filename string) ([]NVDProduct, error) { + filePath := filepath.Join(m.cacheDir, filename) + + data, err := os.ReadFile(filePath) + if err != nil { + if os.IsNotExist(err) { + return []NVDProduct{}, nil + } + return nil, fmt.Errorf("failed to read %s: %w", filename, err) + } + + var products []NVDProduct + if err := json.Unmarshal(data, &products); err != nil { + return nil, fmt.Errorf("failed to unmarshal %s: %w", filename, err) + } + + return products, nil +} + +// GetMonthKey returns the "YYYY-MM" key for a given time +func GetMonthKey(t time.Time) string { + return t.Format("2006-01") +} + +// SaveProducts saves products grouped by modification month +// this is called after fetching from the API to organize products into monthly files +func (m *CacheManager) SaveProducts(products []NVDProduct, isFullRefresh bool, metadata *CacheMetadata, increment IncrementMetadata) error { + if len(products) == 0 { + return nil + } + + if isFullRefresh { + return m.saveFullRefresh(products, metadata) + } + + return m.saveIncrementalUpdate(products, metadata, increment) +} + +// saveFullRefresh saves all products to initial.json +func (m *CacheManager) saveFullRefresh(products []NVDProduct, metadata *CacheMetadata) error { + if err := m.SaveProductsToMonthlyFile("initial.json", products); err != nil { + return fmt.Errorf("failed to save initial.json: %w", err) + } + + metadata.LastFullRefresh = time.Now() + metadata.TotalProducts = len(products) + metadata.LastStartIndex = 0 // reset on full refresh + metadata.MonthlyBatches = make(map[string]*MonthlyBatchMetadata) + + return nil +} + +// saveIncrementalUpdate saves products grouped by modification month to monthly files +func (m *CacheManager) saveIncrementalUpdate(products []NVDProduct, metadata *CacheMetadata, increment IncrementMetadata) error { + productsByMonth, err := groupProductsByMonth(products) + if err != nil { + return err + } + + for monthKey, monthProducts := range productsByMonth { + if err := m.saveMonthlyBatch(monthKey, monthProducts, metadata, increment); err != nil { + return err + } + } + + // update last processed index for resume capability + metadata.LastStartIndex = increment.EndIndex + + return nil +} + +// groupProductsByMonth groups products by their lastModified month +func groupProductsByMonth(products []NVDProduct) (map[string][]NVDProduct, error) { + productsByMonth := make(map[string][]NVDProduct) + + for _, product := range products { + lastMod, err := time.Parse(time.RFC3339, product.CPE.LastModified) + if err != nil { + return nil, fmt.Errorf("failed to parse lastModified for %s: %w", product.CPE.CPENameID, err) + } + + monthKey := GetMonthKey(lastMod) + productsByMonth[monthKey] = append(productsByMonth[monthKey], product) + } + + return productsByMonth, nil +} + +// saveMonthlyBatch saves products for a specific month, merging with existing data +func (m *CacheManager) saveMonthlyBatch(monthKey string, monthProducts []NVDProduct, metadata *CacheMetadata, increment IncrementMetadata) error { + filename := monthKey + ".json" + + // load existing products for this month + existing, err := m.LoadMonthlyFile(filename) + if err != nil { + return fmt.Errorf("failed to load existing %s: %w", filename, err) + } + + // merge products (newer wins) + merged := mergeProducts(existing, monthProducts) + + // atomically save merged products + if err := m.SaveProductsToMonthlyFile(filename, merged); err != nil { + return fmt.Errorf("failed to save %s: %w", filename, err) + } + + // update metadata + updateMonthlyBatchMetadata(metadata, monthKey, monthProducts, merged, increment) + + return nil +} + +// mergeProducts deduplicates products by CPENameID, with newer products overwriting older ones +func mergeProducts(existing, updated []NVDProduct) []NVDProduct { + productMap := make(map[string]NVDProduct) + + for _, p := range existing { + productMap[p.CPE.CPENameID] = p + } + for _, p := range updated { + productMap[p.CPE.CPENameID] = p + } + + merged := make([]NVDProduct, 0, len(productMap)) + for _, p := range productMap { + merged = append(merged, p) + } + + return merged +} + +// updateMonthlyBatchMetadata updates the metadata for a monthly batch +func updateMonthlyBatchMetadata(metadata *CacheMetadata, monthKey string, newProducts, allProducts []NVDProduct, increment IncrementMetadata) { + if metadata.MonthlyBatches[monthKey] == nil { + metadata.MonthlyBatches[monthKey] = &MonthlyBatchMetadata{ + Complete: false, + Increments: []IncrementMetadata{}, + } + } + + batchMeta := metadata.MonthlyBatches[monthKey] + batchMeta.Increments = append(batchMeta.Increments, IncrementMetadata{ + FetchedAt: increment.FetchedAt, + LastModStartDate: increment.LastModStartDate, + LastModEndDate: increment.LastModEndDate, + Products: len(newProducts), + StartIndex: increment.StartIndex, + EndIndex: increment.EndIndex, + }) + batchMeta.TotalProducts = len(allProducts) +} + +// LoadAllProducts loads and merges all cached products from monthly files +// returns a deduplicated slice of products (newer products override older ones by CPENameID) +func (m *CacheManager) LoadAllProducts() ([]NVDProduct, error) { + // check if cache directory exists + if _, err := os.Stat(m.cacheDir); os.IsNotExist(err) { + return []NVDProduct{}, nil + } + + productMap := make(map[string]NVDProduct) + + // load initial.json first (if it exists) + initial, err := m.LoadMonthlyFile("initial.json") + if err != nil { + return nil, fmt.Errorf("failed to load initial.json: %w", err) + } + + for _, p := range initial { + productMap[p.CPE.CPENameID] = p + } + + // load all monthly files (YYYY-MM.json) + entries, err := os.ReadDir(m.cacheDir) + if err != nil { + return nil, fmt.Errorf("failed to read cache directory: %w", err) + } + + for _, entry := range entries { + if entry.IsDir() || filepath.Ext(entry.Name()) != ".json" { + continue + } + + // skip metadata.json and initial.json + if entry.Name() == "metadata.json" || entry.Name() == "initial.json" { + continue + } + + // load monthly file + products, err := m.LoadMonthlyFile(entry.Name()) + if err != nil { + return nil, fmt.Errorf("failed to load %s: %w", entry.Name(), err) + } + + // merge products (newer wins based on lastModified) + for _, p := range products { + existing, exists := productMap[p.CPE.CPENameID] + if !exists { + productMap[p.CPE.CPENameID] = p + continue + } + + // compare lastModified timestamps to keep the newer one + newMod, _ := time.Parse(time.RFC3339, p.CPE.LastModified) + existingMod, _ := time.Parse(time.RFC3339, existing.CPE.LastModified) + + if newMod.After(existingMod) { + productMap[p.CPE.CPENameID] = p + } + } + } + + // convert map to slice + allProducts := make([]NVDProduct, 0, len(productMap)) + for _, p := range productMap { + allProducts = append(allProducts, p) + } + + return allProducts, nil +} + +// CleanCache removes the local cache directory +func (m *CacheManager) CleanCache() error { + if err := os.RemoveAll(m.cacheDir); err != nil { + return fmt.Errorf("failed to clean cache: %w", err) + } + fmt.Println("Cache cleaned successfully") + return nil +} diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/cache_manager_test.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/cache_manager_test.go new file mode 100644 index 000000000..2ed185849 --- /dev/null +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/cache_manager_test.go @@ -0,0 +1,319 @@ +package main + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCacheManager_MonthlyFileOperations(t *testing.T) { + tmpDir := t.TempDir() + cacheManager := &CacheManager{cacheDir: tmpDir} + + testProducts := []NVDProduct{ + { + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product1:1.0:*:*:*:*:*:*:*", + CPENameID: "product1-id", + LastModified: "2024-11-15T10:00:00.000Z", + Titles: []NVDTitle{{Title: "Test Product 1", Lang: "en"}}, + }, + }, + { + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product2:2.0:*:*:*:*:*:*:*", + CPENameID: "product2-id", + LastModified: "2024-11-20T10:00:00.000Z", + Titles: []NVDTitle{{Title: "Test Product 2", Lang: "en"}}, + }, + }, + } + + t.Run("save and load monthly file", func(t *testing.T) { + err := cacheManager.SaveProductsToMonthlyFile("2024-11.json", testProducts) + require.NoError(t, err) + + expectedPath := filepath.Join(tmpDir, "2024-11.json") + require.FileExists(t, expectedPath) + + loaded, err := cacheManager.LoadMonthlyFile("2024-11.json") + require.NoError(t, err) + require.Len(t, loaded, 2) + assert.Equal(t, testProducts[0].CPE.CPEName, loaded[0].CPE.CPEName) + assert.Equal(t, testProducts[1].CPE.CPEName, loaded[1].CPE.CPEName) + }) + + t.Run("atomic save with temp file", func(t *testing.T) { + err := cacheManager.SaveProductsToMonthlyFile("2024-12.json", testProducts) + require.NoError(t, err) + + // temp file should not exist after successful save + tempPath := filepath.Join(tmpDir, "2024-12.json.tmp") + require.NoFileExists(t, tempPath) + + // actual file should exist + finalPath := filepath.Join(tmpDir, "2024-12.json") + require.FileExists(t, finalPath) + }) + + t.Run("load non-existent file returns empty", func(t *testing.T) { + loaded, err := cacheManager.LoadMonthlyFile("2025-01.json") + require.NoError(t, err) + assert.Empty(t, loaded) + }) +} + +func TestCacheManager_Metadata(t *testing.T) { + tmpDir := t.TempDir() + cacheManager := &CacheManager{cacheDir: tmpDir} + + t.Run("load metadata on first run", func(t *testing.T) { + metadata, err := cacheManager.LoadMetadata() + require.NoError(t, err) + require.NotNil(t, metadata) + + assert.NotNil(t, metadata.MonthlyBatches) + assert.True(t, metadata.LastFullRefresh.IsZero()) + assert.Equal(t, 0, metadata.LastStartIndex) + assert.Equal(t, 0, metadata.TotalProducts) + }) + + t.Run("save and load metadata with monthly batches", func(t *testing.T) { + now := time.Now() + metadata := &CacheMetadata{ + LastFullRefresh: now, + LastStartIndex: 4000, + TotalProducts: 1500, + MonthlyBatches: map[string]*MonthlyBatchMetadata{ + "2024-11": { + Complete: true, + TotalProducts: 1000, + Increments: []IncrementMetadata{ + { + FetchedAt: now, + LastModStartDate: now.Add(-24 * time.Hour), + LastModEndDate: now, + Products: 1000, + StartIndex: 0, + EndIndex: 2000, + }, + }, + }, + "2024-12": { + Complete: false, + TotalProducts: 500, + Increments: []IncrementMetadata{ + { + FetchedAt: now, + LastModStartDate: now.Add(-12 * time.Hour), + LastModEndDate: now, + Products: 500, + StartIndex: 0, + EndIndex: 1000, + }, + }, + }, + }, + } + + err := cacheManager.SaveMetadata(metadata) + require.NoError(t, err) + + loadedMetadata, err := cacheManager.LoadMetadata() + require.NoError(t, err) + + assert.Equal(t, metadata.TotalProducts, loadedMetadata.TotalProducts) + assert.Equal(t, metadata.LastStartIndex, loadedMetadata.LastStartIndex) + assert.Equal(t, 2, len(loadedMetadata.MonthlyBatches)) + assert.True(t, loadedMetadata.MonthlyBatches["2024-11"].Complete) + assert.False(t, loadedMetadata.MonthlyBatches["2024-12"].Complete) + assert.Equal(t, 1000, loadedMetadata.MonthlyBatches["2024-11"].TotalProducts) + assert.Len(t, loadedMetadata.MonthlyBatches["2024-11"].Increments, 1) + }) +} + +func TestCacheManager_LoadAllProducts(t *testing.T) { + tmpDir := t.TempDir() + cacheManager := &CacheManager{cacheDir: tmpDir} + + t.Run("load and merge monthly files", func(t *testing.T) { + // save initial.json with base products + initialProducts := []NVDProduct{ + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product1:*:*:*:*:*:*:*:*", + CPENameID: "product1-id", + LastModified: "2024-10-01T10:00:00.000Z", + }}, + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product2:*:*:*:*:*:*:*:*", + CPENameID: "product2-id", + LastModified: "2024-10-15T10:00:00.000Z", + }}, + } + err := cacheManager.SaveProductsToMonthlyFile("initial.json", initialProducts) + require.NoError(t, err) + + // save 2024-11.json with updated product2 and new product3 + novemberProducts := []NVDProduct{ + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product2:*:*:*:*:*:*:*:*", + CPENameID: "product2-id", + LastModified: "2024-11-05T10:00:00.000Z", // newer version + }}, + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product3:*:*:*:*:*:*:*:*", + CPENameID: "product3-id", + LastModified: "2024-11-10T10:00:00.000Z", + }}, + } + err = cacheManager.SaveProductsToMonthlyFile("2024-11.json", novemberProducts) + require.NoError(t, err) + + // load all products + allProducts, err := cacheManager.LoadAllProducts() + require.NoError(t, err) + + // should have 3 unique products (product2 from Nov overwrites Oct version) + require.Len(t, allProducts, 3) + + // verify we got all products + cpeNames := make(map[string]string) // CPENameID -> LastModified + for _, product := range allProducts { + cpeNames[product.CPE.CPENameID] = product.CPE.LastModified + } + + assert.Contains(t, cpeNames, "product1-id") + assert.Contains(t, cpeNames, "product2-id") + assert.Contains(t, cpeNames, "product3-id") + + // product2 should be the newer version from November + assert.Equal(t, "2024-11-05T10:00:00.000Z", cpeNames["product2-id"]) + }) + + t.Run("empty directory", func(t *testing.T) { + emptyDir := t.TempDir() + emptyCache := &CacheManager{cacheDir: emptyDir} + + allProducts, err := emptyCache.LoadAllProducts() + require.NoError(t, err) + assert.Empty(t, allProducts) + }) +} + +func TestCacheManager_CleanCache(t *testing.T) { + tmpDir := t.TempDir() + cacheManager := &CacheManager{cacheDir: tmpDir} + + // create some cache files + testProducts := []NVDProduct{ + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + CPENameID: "test-id", + LastModified: "2024-11-01T10:00:00.000Z", + }}, + } + err := cacheManager.SaveProductsToMonthlyFile("initial.json", testProducts) + require.NoError(t, err) + + // verify cache exists + require.DirExists(t, tmpDir) + + // clean cache + err = cacheManager.CleanCache() + require.NoError(t, err) + + // verify cache is removed + _, err = os.Stat(tmpDir) + assert.True(t, os.IsNotExist(err)) +} + +func TestCacheManager_SaveProducts(t *testing.T) { + tmpDir := t.TempDir() + cacheManager := &CacheManager{cacheDir: tmpDir} + + t.Run("full refresh saves to initial.json", func(t *testing.T) { + metadata := &CacheMetadata{ + MonthlyBatches: make(map[string]*MonthlyBatchMetadata), + } + + products := []NVDProduct{ + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product1:*:*:*:*:*:*:*:*", + CPENameID: "p1", + LastModified: "2024-10-01T10:00:00.000Z", + }}, + } + + increment := IncrementMetadata{ + FetchedAt: time.Now(), + Products: 1, + } + + err := cacheManager.SaveProducts(products, true, metadata, increment) + require.NoError(t, err) + + // verify initial.json exists + initialPath := filepath.Join(tmpDir, "initial.json") + require.FileExists(t, initialPath) + + // verify metadata updated + assert.NotZero(t, metadata.LastFullRefresh) + assert.Equal(t, 1, metadata.TotalProducts) + assert.Empty(t, metadata.MonthlyBatches) + }) + + t.Run("incremental update groups by month", func(t *testing.T) { + metadata := &CacheMetadata{ + LastFullRefresh: time.Now().Add(-30 * 24 * time.Hour), + MonthlyBatches: make(map[string]*MonthlyBatchMetadata), + } + + products := []NVDProduct{ + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product1:*:*:*:*:*:*:*:*", + CPENameID: "p1", + LastModified: "2024-11-05T10:00:00.000Z", + }}, + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product2:*:*:*:*:*:*:*:*", + CPENameID: "p2", + LastModified: "2024-11-15T10:00:00.000Z", + }}, + {CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product3:*:*:*:*:*:*:*:*", + CPENameID: "p3", + LastModified: "2024-12-01T10:00:00.000Z", + }}, + } + + increment := IncrementMetadata{ + FetchedAt: time.Now(), + Products: 3, + } + + err := cacheManager.SaveProducts(products, false, metadata, increment) + require.NoError(t, err) + + // verify monthly files exist + nov2024Path := filepath.Join(tmpDir, "2024-11.json") + dec2024Path := filepath.Join(tmpDir, "2024-12.json") + require.FileExists(t, nov2024Path) + require.FileExists(t, dec2024Path) + + // verify metadata has monthly batches + assert.Len(t, metadata.MonthlyBatches, 2) + assert.Contains(t, metadata.MonthlyBatches, "2024-11") + assert.Contains(t, metadata.MonthlyBatches, "2024-12") + + // verify 2024-11 has 2 products + assert.Equal(t, 2, metadata.MonthlyBatches["2024-11"].TotalProducts) + assert.Len(t, metadata.MonthlyBatches["2024-11"].Increments, 1) + + // verify 2024-12 has 1 product + assert.Equal(t, 1, metadata.MonthlyBatches["2024-12"].TotalProducts) + }) +} diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate.go index c456ddec3..cc04633dd 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate.go +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate.go @@ -1,11 +1,6 @@ package main import ( - "compress/gzip" - "encoding/json" - "encoding/xml" - "fmt" - "io" "log" "slices" "strings" @@ -15,39 +10,6 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/internal/cpegenerate/dictionary" ) -func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) { - gzipReader, err := gzip.NewReader(rawGzipData) - if err != nil { - return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err) - } - defer gzipReader.Close() - - // Read XML data - data, err := io.ReadAll(gzipReader) - if err != nil { - return nil, fmt.Errorf("unable to read CPE dictionary: %w", err) - } - - // Unmarshal XML - var cpeList CpeList - if err := xml.Unmarshal(data, &cpeList); err != nil { - return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err) - } - - // Filter out data that's not applicable here - cpeList = filterCpeList(cpeList) - - // Create indexed dictionary to help with looking up CPEs - indexedDictionary := indexCPEList(cpeList) - - // Convert to JSON - jsonData, err := json.MarshalIndent(indexedDictionary, "", " ") - if err != nil { - return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err) - } - return jsonData, nil -} - // filterCpeList removes CPE items that are not applicable to software packages. func filterCpeList(cpeList CpeList) CpeList { var processedCpeList CpeList diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate_test.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate_test.go index 10f767d06..097fb01ad 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate_test.go +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/generate_test.go @@ -3,6 +3,7 @@ package main import ( "bytes" "compress/gzip" + "encoding/xml" "io" "os" "testing" @@ -15,22 +16,37 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/internal/cpegenerate/dictionary" ) -func Test_generateIndexedDictionaryJSON(t *testing.T) { +func Test_processCPEList(t *testing.T) { + // load test data from XML file (legacy format for testing backward compatibility) f, err := os.Open("testdata/official-cpe-dictionary_v2.3.xml") require.NoError(t, err) + defer f.Close() - // Create a buffer to store the gzipped data in memory + // create a buffer to store the gzipped data in memory buf := new(bytes.Buffer) w := gzip.NewWriter(buf) _, err = io.Copy(w, f) require.NoError(t, err) - // (finalize the gzip stream) + // finalize the gzip stream err = w.Close() require.NoError(t, err) - dictionaryJSON, err := generateIndexedDictionaryJSON(buf) + // decompress and parse XML to get CpeList + gzipReader, err := gzip.NewReader(buf) + require.NoError(t, err) + defer gzipReader.Close() + + data, err := io.ReadAll(gzipReader) + require.NoError(t, err) + + var cpeList CpeList + err = xml.Unmarshal(data, &cpeList) + require.NoError(t, err) + + // process the CPE list + dictionaryJSON, err := processCPEList(cpeList) assert.NoError(t, err) expected, err := os.ReadFile("./testdata/expected-cpe-index.json") @@ -40,7 +56,7 @@ func Test_generateIndexedDictionaryJSON(t *testing.T) { dictionaryJSONString := string(dictionaryJSON) if diff := cmp.Diff(expectedDictionaryJSONString, dictionaryJSONString); diff != "" { - t.Errorf("generateIndexedDictionaryJSON() mismatch (-want +got):\n%s", diff) + t.Errorf("processCPEList() mismatch (-want +got):\n%s", diff) } } diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/main.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/main.go index 1e035d9a2..be6a9ffe0 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/main.go +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/main.go @@ -1,49 +1,217 @@ -// This program downloads the latest CPE dictionary from NIST and processes it into a JSON file that can be embedded into Syft for more accurate CPE results. +// This program fetches CPE data from the NVD Products API and processes it into a JSON file that can be embedded into Syft for more accurate CPE results. +// ORAS caching is managed by Taskfile tasks - this program only works with local cache. package main import ( + "context" + "encoding/json" "errors" "flag" "fmt" "log" - "net/http" "os" + "path/filepath" + "time" ) func mainE() error { var outputFilename string - flag.StringVar(&outputFilename, "o", "", "file location to save CPE index") + var forceFullRefresh bool + var cacheOnly bool + flag.StringVar(&outputFilename, "o", "", "file location to save CPE index (required for build mode)") + flag.BoolVar(&forceFullRefresh, "full", false, "force full refresh instead of incremental update") + flag.BoolVar(&cacheOnly, "cache-only", false, "only update cache from NVD API, don't generate index") flag.Parse() - if outputFilename == "" { - return errors.New("-o is required") + // validate flags + if !cacheOnly && outputFilename == "" { + return errors.New("-o is required (unless using -cache-only)") } - // Download and decompress file - fmt.Println("Fetching CPE dictionary...") - resp, err := http.Get(cpeDictionaryURL) - if err != nil { - return fmt.Errorf("unable to get CPE dictionary: %w", err) + if cacheOnly && outputFilename != "" { + return errors.New("-cache-only and -o cannot be used together") } - defer resp.Body.Close() + + ctx := context.Background() + cacheManager := NewCacheManager() + + // MODE 1: Update cache only (called by task generate:cpe-index:update-cache) + if cacheOnly { + return updateCache(ctx, cacheManager, forceFullRefresh) + } + + // MODE 2: Generate index from existing cache (called by task generate:cpe-index:build) + return generateIndexFromCache(cacheManager, outputFilename) +} + +// updateCache fetches new/updated CPE data from NVD API and saves to local cache +func updateCache(ctx context.Context, cacheManager *CacheManager, forceFullRefresh bool) error { + metadata, err := cacheManager.LoadMetadata() + if err != nil { + return fmt.Errorf("failed to load metadata: %w", err) + } + + lastModStartDate, isFullRefresh := determineUpdateMode(metadata, forceFullRefresh) + + // use resume index if available + resumeFromIndex := 0 + if !isFullRefresh && metadata.LastStartIndex > 0 { + resumeFromIndex = metadata.LastStartIndex + fmt.Printf("Resuming from index %d...\n", resumeFromIndex) + } + + allProducts, increment, err := fetchProducts(ctx, lastModStartDate, resumeFromIndex) + if err != nil { + // if we have partial products, save them before returning error + if len(allProducts) > 0 { + fmt.Printf("\nError occurred but saving %d products fetched so far...\n", len(allProducts)) + if saveErr := saveAndReportResults(cacheManager, allProducts, isFullRefresh, metadata, increment); saveErr != nil { + fmt.Printf("WARNING: Failed to save partial progress: %v\n", saveErr) + } else { + fmt.Println("Partial progress saved successfully. Run again to resume from this point.") + } + } + return err + } + + if len(allProducts) == 0 { + fmt.Println("No products fetched (already up to date)") + return nil + } + + return saveAndReportResults(cacheManager, allProducts, isFullRefresh, metadata, increment) +} + +// determineUpdateMode decides whether to do a full refresh or incremental update +func determineUpdateMode(metadata *CacheMetadata, forceFullRefresh bool) (time.Time, bool) { + if forceFullRefresh || metadata.LastFullRefresh.IsZero() { + fmt.Println("Performing full refresh of CPE data") + return time.Time{}, true + } + + fmt.Printf("Performing incremental update since %s\n", metadata.LastFullRefresh.Format("2006-01-02")) + return metadata.LastFullRefresh, false +} + +// fetchProducts fetches products from the NVD API +func fetchProducts(ctx context.Context, lastModStartDate time.Time, resumeFromIndex int) ([]NVDProduct, IncrementMetadata, error) { + apiClient := NewNVDAPIClient() + fmt.Println("Fetching CPE data from NVD Products API...") + + var allProducts []NVDProduct + var totalResults int + var firstStartIndex, lastEndIndex int + + onPageFetched := func(startIndex int, response NVDProductsResponse) error { + if totalResults == 0 { + totalResults = response.TotalResults + firstStartIndex = startIndex + } + lastEndIndex = startIndex + response.ResultsPerPage + allProducts = append(allProducts, response.Products...) + fmt.Printf("Fetched %d/%d products...\n", len(allProducts), totalResults) + return nil + } + + if err := apiClient.FetchProductsSince(ctx, lastModStartDate, resumeFromIndex, onPageFetched); err != nil { + // return partial products with increment metadata so they can be saved + increment := IncrementMetadata{ + FetchedAt: time.Now(), + LastModStartDate: lastModStartDate, + LastModEndDate: time.Now(), + Products: len(allProducts), + StartIndex: firstStartIndex, + EndIndex: lastEndIndex, + } + return allProducts, increment, fmt.Errorf("failed to fetch products from NVD API: %w", err) + } + + increment := IncrementMetadata{ + FetchedAt: time.Now(), + LastModStartDate: lastModStartDate, + LastModEndDate: time.Now(), + Products: len(allProducts), + StartIndex: firstStartIndex, + EndIndex: lastEndIndex, + } + + return allProducts, increment, nil +} + +// saveAndReportResults saves products and metadata, then reports success +func saveAndReportResults(cacheManager *CacheManager, allProducts []NVDProduct, isFullRefresh bool, metadata *CacheMetadata, increment IncrementMetadata) error { + fmt.Println("Saving products to cache...") + if err := cacheManager.SaveProducts(allProducts, isFullRefresh, metadata, increment); err != nil { + return fmt.Errorf("failed to save products: %w", err) + } + + if err := cacheManager.SaveMetadata(metadata); err != nil { + return fmt.Errorf("failed to save metadata: %w", err) + } + + fmt.Println("Cache updated successfully!") + if isFullRefresh { + fmt.Printf("Total products in cache: %d\n", len(allProducts)) + } else { + fmt.Printf("Added/updated %d products\n", len(allProducts)) + fmt.Printf("Grouped into %d monthly files\n", len(metadata.MonthlyBatches)) + } + + return nil +} + +// generateIndexFromCache generates the CPE index from cached data only +func generateIndexFromCache(cacheManager *CacheManager, outputFilename string) error { + fmt.Println("Loading cached products...") + allProducts, err := cacheManager.LoadAllProducts() + if err != nil { + return fmt.Errorf("failed to load cached products: %w", err) + } + + if len(allProducts) == 0 { + return fmt.Errorf("no cached data available - run 'task generate:cpe-index:cache:pull' and 'task generate:cpe-index:cache:update' first") + } + + fmt.Printf("Loaded %d products from cache\n", len(allProducts)) + fmt.Println("Converting products to CPE list...") + cpeList := ProductsToCpeList(allProducts) fmt.Println("Generating index...") - dictionaryJSON, err := generateIndexedDictionaryJSON(resp.Body) + dictionaryJSON, err := processCPEList(cpeList) if err != nil { return err } - // Write CPE index (JSON data) to disk - err = os.WriteFile(outputFilename, dictionaryJSON, 0600) - if err != nil { + // ensure parent directory exists + outputDir := filepath.Dir(outputFilename) + if err := os.MkdirAll(outputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + if err := os.WriteFile(outputFilename, dictionaryJSON, 0600); err != nil { return fmt.Errorf("unable to write processed CPE dictionary to file: %w", err) } - fmt.Println("Done!") - + fmt.Println("CPE index generated successfully!") return nil } +// processCPEList filters and indexes a CPE list, returning JSON bytes +func processCPEList(cpeList CpeList) ([]byte, error) { + // filter out data that's not applicable + cpeList = filterCpeList(cpeList) + + // create indexed dictionary to help with looking up CPEs + indexedDictionary := indexCPEList(cpeList) + + // convert to JSON + jsonData, err := json.MarshalIndent(indexedDictionary, "", " ") + if err != nil { + return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err) + } + return jsonData, nil +} + // errExit prints an error and exits with a non-zero exit code. func errExit(err error) { log.Printf("command failed: %s", err) diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd.go index 089e387ce..3b29538cf 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd.go +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd.go @@ -23,4 +23,5 @@ type CpeList struct { CpeItems []CpeItem `xml:"cpe-item"` } -const cpeDictionaryURL = "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz" +// cpeDictionaryURL is deprecated - we now use the NVD Products API +// const cpeDictionaryURL = "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz" diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_adapter.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_adapter.go new file mode 100644 index 000000000..f5ddc46f2 --- /dev/null +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_adapter.go @@ -0,0 +1,66 @@ +package main + +// nvd_adapter.go converts NVD API responses to the existing CpeList/CpeItem structures +// this allows the existing filtering and indexing logic to work without modification + +// ProductsToCpeList converts NVD API products to the legacy CpeList format +func ProductsToCpeList(products []NVDProduct) CpeList { + var cpeItems []CpeItem + + for _, product := range products { + item := productToCpeItem(product) + cpeItems = append(cpeItems, item) + } + + return CpeList{ + CpeItems: cpeItems, + } +} + +// productToCpeItem converts a single NVD API product to a CpeItem +func productToCpeItem(product NVDProduct) CpeItem { + details := product.CPE + + item := CpeItem{ + // use CPE 2.2 format for the Name field (legacy compatibility) + // note: the old XML feed had both 2.2 and 2.3 formats + // for now, we'll use 2.3 format in both places since that's what the API provides + Name: details.CPEName, + } + + // extract title (prefer English) + for _, title := range details.Titles { + if title.Lang == "en" { + item.Title = title.Title + break + } + } + // fallback to first title if no English title found + if item.Title == "" && len(details.Titles) > 0 { + item.Title = details.Titles[0].Title + } + + // convert references + if len(details.Refs) > 0 { + item.References.Reference = make([]struct { + Href string `xml:"href,attr"` + Body string `xml:",chardata"` + }, len(details.Refs)) + + for i, ref := range details.Refs { + item.References.Reference[i].Href = ref.Ref + item.References.Reference[i].Body = ref.Type + } + } + + // set CPE 2.3 information + item.Cpe23Item.Name = details.CPEName + + // handle deprecation + if details.Deprecated && len(details.DeprecatedBy) > 0 { + // use the first deprecated-by CPE (the old format only supported one) + item.Cpe23Item.Deprecation.DeprecatedBy.Name = details.DeprecatedBy[0].CPEName + } + + return item +} diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_adapter_test.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_adapter_test.go new file mode 100644 index 000000000..2196bd37e --- /dev/null +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_adapter_test.go @@ -0,0 +1,235 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestProductToCpeItem(t *testing.T) { + tests := []struct { + name string + product NVDProduct + expected CpeItem + }{ + { + name: "basic product conversion", + product: NVDProduct{ + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*", + Deprecated: false, + Titles: []NVDTitle{ + {Title: "Test Product", Lang: "en"}, + }, + Refs: []NVDRef{ + {Ref: "https://example.com/product", Type: "Vendor"}, + }, + }, + }, + expected: CpeItem{ + Name: "cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*", + Title: "Test Product", + References: struct { + Reference []struct { + Href string `xml:"href,attr"` + Body string `xml:",chardata"` + } `xml:"reference"` + }{ + Reference: []struct { + Href string `xml:"href,attr"` + Body string `xml:",chardata"` + }{ + {Href: "https://example.com/product", Body: "Vendor"}, + }, + }, + Cpe23Item: struct { + Name string `xml:"name,attr"` + Deprecation struct { + DeprecatedBy struct { + Name string `xml:"name,attr"` + } `xml:"deprecated-by"` + } `xml:"deprecation"` + }{ + Name: "cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*", + }, + }, + }, + { + name: "deprecated product", + product: NVDProduct{ + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:old:1.0:*:*:*:*:*:*:*", + Deprecated: true, + DeprecatedBy: []NVDDeprecatedBy{ + {CPEName: "cpe:2.3:a:vendor:new:1.0:*:*:*:*:*:*:*", CPENameID: "test-uuid-123"}, + }, + Titles: []NVDTitle{ + {Title: "Old Product", Lang: "en"}, + }, + Refs: []NVDRef{ + {Ref: "https://example.com/old", Type: "Vendor"}, + }, + }, + }, + expected: CpeItem{ + Name: "cpe:2.3:a:vendor:old:1.0:*:*:*:*:*:*:*", + Title: "Old Product", + References: struct { + Reference []struct { + Href string `xml:"href,attr"` + Body string `xml:",chardata"` + } `xml:"reference"` + }{ + Reference: []struct { + Href string `xml:"href,attr"` + Body string `xml:",chardata"` + }{ + {Href: "https://example.com/old", Body: "Vendor"}, + }, + }, + Cpe23Item: struct { + Name string `xml:"name,attr"` + Deprecation struct { + DeprecatedBy struct { + Name string `xml:"name,attr"` + } `xml:"deprecated-by"` + } `xml:"deprecation"` + }{ + Name: "cpe:2.3:a:vendor:old:1.0:*:*:*:*:*:*:*", + Deprecation: struct { + DeprecatedBy struct { + Name string `xml:"name,attr"` + } `xml:"deprecated-by"` + }{ + DeprecatedBy: struct { + Name string `xml:"name,attr"` + }{ + Name: "cpe:2.3:a:vendor:new:1.0:*:*:*:*:*:*:*", + }, + }, + }, + }, + }, + { + name: "product with multiple titles prefers English", + product: NVDProduct{ + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*", + Titles: []NVDTitle{ + {Title: "Produit", Lang: "fr"}, + {Title: "Product", Lang: "en"}, + {Title: "Producto", Lang: "es"}, + }, + Refs: []NVDRef{ + {Ref: "https://example.com", Type: "Vendor"}, + }, + }, + }, + expected: CpeItem{ + Name: "cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*", + Title: "Product", + References: struct { + Reference []struct { + Href string `xml:"href,attr"` + Body string `xml:",chardata"` + } `xml:"reference"` + }{ + Reference: []struct { + Href string `xml:"href,attr"` + Body string `xml:",chardata"` + }{ + {Href: "https://example.com", Body: "Vendor"}, + }, + }, + Cpe23Item: struct { + Name string `xml:"name,attr"` + Deprecation struct { + DeprecatedBy struct { + Name string `xml:"name,attr"` + } `xml:"deprecated-by"` + } `xml:"deprecation"` + }{ + Name: "cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*", + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := productToCpeItem(tt.product) + + assert.Equal(t, tt.expected.Name, result.Name) + assert.Equal(t, tt.expected.Title, result.Title) + assert.Equal(t, tt.expected.Cpe23Item.Name, result.Cpe23Item.Name) + assert.Equal(t, tt.expected.Cpe23Item.Deprecation.DeprecatedBy.Name, result.Cpe23Item.Deprecation.DeprecatedBy.Name) + + require.Equal(t, len(tt.expected.References.Reference), len(result.References.Reference)) + for i := range tt.expected.References.Reference { + assert.Equal(t, tt.expected.References.Reference[i].Href, result.References.Reference[i].Href) + assert.Equal(t, tt.expected.References.Reference[i].Body, result.References.Reference[i].Body) + } + }) + } +} + +func TestProductsToCpeList(t *testing.T) { + products := []NVDProduct{ + { + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product1:1.0:*:*:*:*:*:*:*", + Titles: []NVDTitle{ + {Title: "Product 1", Lang: "en"}, + }, + Refs: []NVDRef{ + {Ref: "https://npmjs.com/package/product1", Type: "Vendor"}, + }, + }, + }, + { + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product2:2.0:*:*:*:*:*:*:*", + Titles: []NVDTitle{ + {Title: "Product 2", Lang: "en"}, + }, + Refs: []NVDRef{ + {Ref: "https://pypi.org/project/product2", Type: "Vendor"}, + }, + }, + }, + } + + result := ProductsToCpeList(products) + + require.Len(t, result.CpeItems, 2) + assert.Equal(t, "cpe:2.3:a:vendor:product1:1.0:*:*:*:*:*:*:*", result.CpeItems[0].Name) + assert.Equal(t, "Product 1", result.CpeItems[0].Title) + assert.Equal(t, "cpe:2.3:a:vendor:product2:2.0:*:*:*:*:*:*:*", result.CpeItems[1].Name) + assert.Equal(t, "Product 2", result.CpeItems[1].Title) +} + +func TestProductsToCpeList_MultipleProducts(t *testing.T) { + products := []NVDProduct{ + { + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product1:*:*:*:*:*:*:*:*", + Titles: []NVDTitle{{Title: "Product 1", Lang: "en"}}, + Refs: []NVDRef{{Ref: "https://example.com/1", Type: "Vendor"}}, + }, + }, + { + CPE: NVDProductDetails{ + CPEName: "cpe:2.3:a:vendor:product2:*:*:*:*:*:*:*:*", + Titles: []NVDTitle{{Title: "Product 2", Lang: "en"}}, + Refs: []NVDRef{{Ref: "https://example.com/2", Type: "Vendor"}}, + }, + }, + } + + result := ProductsToCpeList(products) + + require.Len(t, result.CpeItems, 2) + assert.Equal(t, "cpe:2.3:a:vendor:product1:*:*:*:*:*:*:*:*", result.CpeItems[0].Cpe23Item.Name) + assert.Equal(t, "cpe:2.3:a:vendor:product2:*:*:*:*:*:*:*:*", result.CpeItems[1].Cpe23Item.Name) +} diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_api_client.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_api_client.go new file mode 100644 index 000000000..ec22a889c --- /dev/null +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_api_client.go @@ -0,0 +1,286 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strconv" + "time" + + "golang.org/x/time/rate" +) + +const ( + nvdProductsAPIURL = "https://services.nvd.nist.gov/rest/json/cpes/2.0" + resultsPerPage = 2000 // maximum allowed by NVD API + + // rate limits per NVD API documentation + unauthenticatedRequestsPer30Seconds = 5 + authenticatedRequestsPer30Seconds = 50 + + // retry configuration for rate limiting + maxRetries = 5 + baseRetryDelay = 30 * time.Second // NVD uses 30-second rolling windows +) + +// NVDAPIClient handles communication with the NVD Products API +type NVDAPIClient struct { + httpClient *http.Client + rateLimiter *rate.Limiter + apiKey string +} + +// NVDProductsResponse represents the JSON response from the NVD Products API +type NVDProductsResponse struct { + ResultsPerPage int `json:"resultsPerPage"` + StartIndex int `json:"startIndex"` + TotalResults int `json:"totalResults"` + Format string `json:"format"` + Version string `json:"version"` + Timestamp string `json:"timestamp"` + Products []NVDProduct `json:"products"` +} + +// NVDProduct represents a single product entry from the API +type NVDProduct struct { + CPE NVDProductDetails `json:"cpe"` +} + +// NVDProductDetails contains the CPE and reference information +type NVDProductDetails struct { + CPEName string `json:"cpeName"` + Deprecated bool `json:"deprecated"` + DeprecatedBy []NVDDeprecatedBy `json:"deprecatedBy,omitempty"` + CPENameID string `json:"cpeNameId"` + Created string `json:"created"` + LastModified string `json:"lastModified"` + Titles []NVDTitle `json:"titles"` + Refs []NVDRef `json:"refs"` +} + +// NVDTitle represents a title in a specific language +type NVDTitle struct { + Title string `json:"title"` + Lang string `json:"lang"` +} + +// NVDRef represents a reference URL +type NVDRef struct { + Ref string `json:"ref"` + Type string `json:"type,omitempty"` +} + +// NVDDeprecatedBy represents a CPE that replaces a deprecated one +type NVDDeprecatedBy struct { + CPEName string `json:"cpeName"` + CPENameID string `json:"cpeNameId"` +} + +// NewNVDAPIClient creates a new NVD API client +// it reads the NVD_API_KEY environment variable for authenticated requests +func NewNVDAPIClient() *NVDAPIClient { + apiKey := os.Getenv("NVD_API_KEY") + + // determine rate limit based on authentication + requestsPer30Seconds := unauthenticatedRequestsPer30Seconds + if apiKey != "" { + requestsPer30Seconds = authenticatedRequestsPer30Seconds + fmt.Printf("Using authenticated NVD API access (%d requests per 30 seconds)\n", requestsPer30Seconds) + } else { + fmt.Printf("Using unauthenticated NVD API access (%d requests per 30 seconds)\n", requestsPer30Seconds) + fmt.Println("Set NVD_API_KEY environment variable for higher rate limits") + } + + // create rate limiter with 10% safety margin to avoid hitting limits + // X requests per 30 seconds * 0.9 = (X * 0.9) / 30 requests per second + effectiveRate := float64(requestsPer30Seconds) * 0.9 / 30.0 + limiter := rate.NewLimiter(rate.Limit(effectiveRate), 1) + fmt.Printf("Rate limiter configured: %.2f requests/second (with 10%% safety margin)\n", effectiveRate) + + return &NVDAPIClient{ + httpClient: &http.Client{ + Timeout: 60 * time.Second, + }, + rateLimiter: limiter, + apiKey: apiKey, + } +} + +// PageCallback is called after each page is successfully fetched +// it receives the startIndex and the response for that page +type PageCallback func(startIndex int, response NVDProductsResponse) error + +// FetchProductsSince fetches all products modified since the given date +// if lastModStartDate is zero, fetches all products +// calls onPageFetched callback after each successful page fetch for incremental saving +// if resumeFromIndex > 0, starts fetching from that index +func (c *NVDAPIClient) FetchProductsSince(ctx context.Context, lastModStartDate time.Time, resumeFromIndex int, onPageFetched PageCallback) error { + startIndex := resumeFromIndex + + for { + resp, err := c.fetchPage(ctx, startIndex, lastModStartDate) + if err != nil { + return fmt.Errorf("failed to fetch page at index %d: %w", startIndex, err) + } + + // call callback to save progress immediately + if onPageFetched != nil { + if err := onPageFetched(startIndex, resp); err != nil { + return fmt.Errorf("callback failed at index %d: %w", startIndex, err) + } + } + + // check if we've fetched all results + if startIndex+resp.ResultsPerPage >= resp.TotalResults { + fmt.Printf("Fetched %d/%d products (complete)\n", resp.TotalResults, resp.TotalResults) + break + } + + startIndex += resp.ResultsPerPage + fmt.Printf("Fetched %d/%d products...\n", startIndex, resp.TotalResults) + } + + return nil +} + +// fetchPage fetches a single page of results from the NVD API with retry logic for rate limiting +func (c *NVDAPIClient) fetchPage(ctx context.Context, startIndex int, lastModStartDate time.Time) (NVDProductsResponse, error) { + var lastErr error + + for attempt := 0; attempt < maxRetries; attempt++ { + // wait for rate limiter + if err := c.rateLimiter.Wait(ctx); err != nil { + return NVDProductsResponse{}, fmt.Errorf("rate limiter error: %w", err) + } + + // build request URL + url := fmt.Sprintf("%s?resultsPerPage=%d&startIndex=%d", nvdProductsAPIURL, resultsPerPage, startIndex) + + // add date range if specified (incremental update) + if !lastModStartDate.IsZero() { + // NVD API requires RFC3339 format: 2024-01-01T00:00:00.000 + lastModStartStr := lastModStartDate.Format("2006-01-02T15:04:05.000") + url += fmt.Sprintf("&lastModStartDate=%s", lastModStartStr) + } + + // create request + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return NVDProductsResponse{}, fmt.Errorf("failed to create request: %w", err) + } + + // add API key header if available + if c.apiKey != "" { + req.Header.Set("apiKey", c.apiKey) + } + + req.Header.Set("User-Agent", "syft-cpe-dictionary-generator") + + // execute request + httpResp, err := c.httpClient.Do(req) + if err != nil { + return NVDProductsResponse{}, fmt.Errorf("failed to execute request: %w", err) + } + + // handle rate limiting + if httpResp.StatusCode == http.StatusTooManyRequests { + lastErr = c.handleRateLimit(ctx, httpResp, attempt) + continue // retry + } + + // handle HTTP status codes + statusResponse, handled, err := c.handleHTTPStatus(httpResp, startIndex) + if handled { + // either error or special case (404 with empty results) + return statusResponse, err + } + + // success - parse response + var response NVDProductsResponse + if err := json.NewDecoder(httpResp.Body).Decode(&response); err != nil { + httpResp.Body.Close() + return NVDProductsResponse{}, fmt.Errorf("failed to decode response: %w", err) + } + + httpResp.Body.Close() + return response, nil + } + + return NVDProductsResponse{}, fmt.Errorf("max retries (%d) exceeded: %w", maxRetries, lastErr) +} + +// handleRateLimit handles HTTP 429 responses by parsing Retry-After and waiting +func (c *NVDAPIClient) handleRateLimit(ctx context.Context, httpResp *http.Response, attempt int) error { + body, _ := io.ReadAll(httpResp.Body) + httpResp.Body.Close() + + // parse Retry-After header + retryAfter := parseRetryAfter(httpResp.Header.Get("Retry-After")) + if retryAfter == 0 { + // use exponential backoff if no Retry-After header + retryAfter = baseRetryDelay * time.Duration(1< 0 { + return duration + } + } + + return 0 +} diff --git a/syft/pkg/cataloger/nix/store_cataloger.go b/syft/pkg/cataloger/nix/store_cataloger.go index a6c4ef963..e239330ef 100644 --- a/syft/pkg/cataloger/nix/store_cataloger.go +++ b/syft/pkg/cataloger/nix/store_cataloger.go @@ -25,6 +25,7 @@ type storeCataloger struct { } // NewStoreCataloger returns a new cataloger object initialized for Nix store files. +// // Deprecated: please use NewCataloger instead func NewStoreCataloger() pkg.Cataloger { return newStoreCataloger(Config{CaptureOwnedFiles: true}, "nix-store-cataloger") diff --git a/syft/pkg/cataloger/php/cataloger.go b/syft/pkg/cataloger/php/cataloger.go index b5ee52ae6..8a2baaa36 100644 --- a/syft/pkg/cataloger/php/cataloger.go +++ b/syft/pkg/cataloger/php/cataloger.go @@ -31,6 +31,7 @@ func NewPearCataloger() pkg.Cataloger { // NewPeclCataloger returns a new cataloger for PHP Pecl metadata. Note: this will also catalog Pear metadata so should // not be used in conjunction with the Pear Cataloger. +// // Deprecated: please use NewPearCataloger instead. func NewPeclCataloger() pkg.Cataloger { return generic.NewCataloger("php-pecl-serialized-cataloger"). diff --git a/syft/pkg/cataloger/php/parse_pecl_pear.go b/syft/pkg/cataloger/php/parse_pecl_pear.go index b3580ef57..c4af6cebe 100644 --- a/syft/pkg/cataloger/php/parse_pecl_pear.go +++ b/syft/pkg/cataloger/php/parse_pecl_pear.go @@ -30,8 +30,8 @@ func (p *peclPearData) ToPear() pkg.PhpPearEntry { } } -func (p *peclPearData) ToPecl() pkg.PhpPeclEntry { - return pkg.PhpPeclEntry(p.ToPear()) +func (p *peclPearData) ToPecl() pkg.PhpPeclEntry { //nolint:staticcheck + return pkg.PhpPeclEntry(p.ToPear()) //nolint:staticcheck } func parsePecl(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { diff --git a/syft/pkg/php.go b/syft/pkg/php.go index 1864ed9ef..93e8fc981 100644 --- a/syft/pkg/php.go +++ b/syft/pkg/php.go @@ -85,6 +85,7 @@ type PhpComposerAuthors struct { } // PhpPeclEntry represents a single package entry found within php pecl metadata files. +// // Deprecated: please use PhpPearEntry instead with the pear cataloger. type PhpPeclEntry PhpPearEntry diff --git a/task.d/generate/cpe-index.yaml b/task.d/generate/cpe-index.yaml new file mode 100644 index 000000000..72913dcf1 --- /dev/null +++ b/task.d/generate/cpe-index.yaml @@ -0,0 +1,152 @@ +version: "3" + +vars: + CPE_CACHE_DIR: "syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/.cpe-cache" + CPE_CACHE_REGISTRY: "ghcr.io/anchore/syft/cpe-cache:latest" + CPE_INDEX_OUTPUT: "syft/pkg/cataloger/internal/cpegenerate/dictionary/data/cpe-index.json" + CPE_GENERATOR_DIR: "syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator" + +tasks: + cache:pull: + desc: Pull CPE cache from ORAS registry (ghcr.io/anchore/syft/cpe-cache:latest) + # deps: [tools] + cmds: + - cmd: | + set -eu + echo "Pulling CPE cache from ORAS registry..." + mkdir -p {{ .CPE_CACHE_DIR }} + + # pull compressed files from ORAS + {{ .ORAS }} pull {{ .CPE_CACHE_REGISTRY }} --output {{ .CPE_CACHE_DIR }} || { + exit_code=$? + if [ $exit_code -eq 1 ]; then + echo "No existing cache found in registry (this is normal for first run)" + exit 0 + else + exit $exit_code + fi + } + + # handle nested directory structure from old pushes (if exists) + # files might be at .cpe-cache/syft/pkg/.../cpe-cache/*.json.zst + nested_cache=$(find {{ .CPE_CACHE_DIR }} -type d -name ".cpe-cache" ! -path {{ .CPE_CACHE_DIR }} | head -1) + if [ -n "$nested_cache" ]; then + echo "Found nested cache structure, moving files to correct location..." + mv "$nested_cache"/*.json.zst {{ .CPE_CACHE_DIR }}/ 2>/dev/null || true + # clean up nested directories + rm -rf {{ .CPE_CACHE_DIR }}/syft 2>/dev/null || true + fi + + # decompress all .json.zst files to .json + echo "Decompressing cache files..." + decompressed_count=0 + for zst_file in {{ .CPE_CACHE_DIR }}/*.json.zst; do + # skip if no .zst files found (glob didn't match) + if [ ! -f "$zst_file" ]; then + echo "No compressed files to decompress" + break + fi + + # decompress to .json (removing .zst extension) + json_file="${zst_file%.zst}" + echo " Decompressing $(basename "$zst_file")..." + zstd -d -q -f "$zst_file" -o "$json_file" + + # remove compressed file + rm "$zst_file" + decompressed_count=$((decompressed_count + 1)) + done + + if [ $decompressed_count -gt 0 ]; then + echo "Decompressed $decompressed_count file(s) successfully" + fi + silent: false + + cache:update: + desc: Fetch incremental CPE updates from NVD API and update local cache + # deps: [tools] + dir: "{{ .CPE_GENERATOR_DIR }}" + cmds: + - cmd: | + echo "Updating CPE cache from NVD Products API..." + go run . -cache-only + silent: false + sources: + - "{{ .CPE_GENERATOR_DIR }}/*.go" + generates: + - "{{ .CPE_CACHE_DIR }}/metadata.json" + - "{{ .CPE_CACHE_DIR }}/products/*.json" + + cache:push: + desc: Push updated CPE cache to ORAS registry + # deps: [tools] + cmds: + - cmd: | + set -eu + echo "Pushing CPE cache to ORAS registry..." + + if [ ! -d "{{ .CPE_CACHE_DIR }}" ]; then + echo "No cache directory found" + exit 1 + fi + + # store absolute path to ORAS before changing directory + oras_bin="$(pwd)/{{ .ORAS }}" + + # change to cache directory to avoid including full paths in ORAS push + cd {{ .CPE_CACHE_DIR }} + + # find all JSON files (basenames only) + json_files=$(find . -maxdepth 1 -type f -name "*.json" -exec basename {} \;) + if [ -z "$json_files" ]; then + echo "No cache files to push" + exit 1 + fi + + # compress each JSON file to .json.zst + echo "Compressing cache files..." + compressed_files="" + for json_file in $json_files; do + zst_file="${json_file}.zst" + echo " Compressing $(basename "$json_file")..." + zstd -q -f "$json_file" -o "$zst_file" + compressed_files="$compressed_files $zst_file" + done + + # push compressed files to ORAS (from cache directory, so only basenames are used) + echo "Pushing compressed files to registry..." + "$oras_bin" push {{ .CPE_CACHE_REGISTRY }} $compressed_files \ + --annotation org.opencontainers.image.source=https://github.com/{{ .OWNER }}/{{ .PROJECT }} \ + --annotation org.opencontainers.image.created=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + # clean up compressed files + echo "Cleaning up compressed files..." + for zst_file in $compressed_files; do + rm "$zst_file" + done + + echo "Cache pushed successfully" + silent: false + + build: + desc: Generate cpe-index.json from existing local cache (does not pull/push/update) + dir: "{{ .CPE_GENERATOR_DIR }}" + cmds: + - cmd: | + echo "Generating CPE index from local cache..." + go run . -o ../data/cpe-index.json + echo "CPE index generated successfully" + silent: false + sources: + - "{{ .CPE_GENERATOR_DIR }}/*.go" + - "{{ .CPE_CACHE_DIR }}/metadata.json" + - "{{ .CPE_CACHE_DIR }}/products/*.json" + generates: + - "{{ .CPE_INDEX_OUTPUT }}" + + cache:clean: + desc: Remove local CPE cache directory + dir: "{{ .CPE_GENERATOR_DIR }}" + cmds: + - rm -rf .cpe-cache + - echo "CPE cache cleaned"