mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
Label PRs when the json schema changes (#2240)
* label PRs when the json schema changes Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * moderate pr comments Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * be more strict about processing file names Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
ef43294d0e
commit
8f6bdde666
224
.github/scripts/labeler.py
vendored
Normal file
224
.github/scripts/labeler.py
vendored
Normal file
@ -0,0 +1,224 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import glob
|
||||
import subprocess
|
||||
import os
|
||||
import re
|
||||
|
||||
DRY_RUN = False
|
||||
|
||||
|
||||
def main(changed_files: str | None = None, merge_base_schema_files: str | None = None):
|
||||
global DRY_RUN
|
||||
|
||||
pr_number = os.environ.get("GITHUB_PR_NUMBER")
|
||||
comment_file_path = os.environ.get("CI_COMMENT_FILE")
|
||||
|
||||
if not comment_file_path:
|
||||
print("CI_COMMENT_FILE not set")
|
||||
sys.exit(1)
|
||||
|
||||
if not pr_number:
|
||||
DRY_RUN = True
|
||||
|
||||
if changed_files:
|
||||
DRY_RUN = True
|
||||
|
||||
# read lines from file... this is useful for local testing
|
||||
with open(changed_files) as f:
|
||||
pr_changed_files = f.read().splitlines()
|
||||
|
||||
with open(merge_base_schema_files) as f:
|
||||
og_json_schema_files = sort_json_schema_files(f.read().splitlines())
|
||||
|
||||
else:
|
||||
if not is_ci():
|
||||
print("Not in CI")
|
||||
sys.exit(1)
|
||||
|
||||
if not pr_number:
|
||||
print("Not a PR")
|
||||
sys.exit(1)
|
||||
|
||||
pr_changed_files = get_pr_changed_files(pr_number)
|
||||
# since we are running this in the context of the pull_request_target, the checkout is the merge base..
|
||||
# that is the main branch of the original repo, NOT the branch in the forked repo (or branch in the target
|
||||
# repo for non-forked PRs). This means we just need to list the current checkedout files to get a sense of
|
||||
# the changes before a merge.
|
||||
og_json_schema_files = list_json_schema_files()
|
||||
|
||||
pr_json_schema_files = filter_to_schema_files(pr_changed_files)
|
||||
|
||||
# print("schema files in pr: ", summarize_schema_files(pr_json_schema_files))
|
||||
# print("og schema files: ", summarize_schema_files(og_json_schema_files))
|
||||
|
||||
if not og_json_schema_files:
|
||||
print("No schema files found in merge base")
|
||||
sys.exit(1)
|
||||
|
||||
# pr_json_schema_files = set of PR files are added, removed, and changed files
|
||||
new_schema_files = set(pr_json_schema_files) - set(og_json_schema_files)
|
||||
removed_or_modified_schema_files = set(pr_json_schema_files) - set(new_schema_files)
|
||||
|
||||
print("new schemas: ", summarize_schema_files(new_schema_files))
|
||||
print("removed or modified schemas:", summarize_schema_files(removed_or_modified_schema_files))
|
||||
|
||||
# if there is a new or modified schema, we should add the "json-schema" label to the PR...
|
||||
if new_schema_files or removed_or_modified_schema_files:
|
||||
print("\nAdding json-schema label...")
|
||||
add_label(pr_number, "json-schema")
|
||||
else:
|
||||
remove_label(pr_number, "json-schema")
|
||||
|
||||
# new schema files should be scrutinized, comparing the latest and added versions to see if it's a breaking
|
||||
# change (major version bump). Warn about it on the PR via adding a breaking-change label...
|
||||
if is_breaking_change(new_schema_files, og_json_schema_files[-1]):
|
||||
print("\nBreaking change detected...")
|
||||
add_label(pr_number, "breaking-change")
|
||||
else:
|
||||
remove_label(pr_number, "breaking-change")
|
||||
|
||||
# modifying an existing schema could be a breaking change, we should warn about it on the PR via a comment...
|
||||
# removing schema files should never be allowed, we should warn about it on the PR via a comment...
|
||||
if removed_or_modified_schema_files:
|
||||
print("\nRemoved or modified schema detected...")
|
||||
schemas = sort_json_schema_files(list(removed_or_modified_schema_files))
|
||||
schemas_str = "\n".join([f" - {schema}" for schema in schemas])
|
||||
add_comment(comment_file_path, f"Detected modification or removal of existing json schemas:\n{schemas_str}", warning=True)
|
||||
|
||||
|
||||
def add_comment(comment_file_path: str, comment: str, warning: bool = False, important: bool = False):
|
||||
if warning or important:
|
||||
comment_lines = comment.splitlines()
|
||||
comment = "\n".join([f"> {line}" for line in comment_lines])
|
||||
|
||||
if warning:
|
||||
comment = f"> [!WARNING]\n{comment}"
|
||||
elif important:
|
||||
comment = f"> [!IMPORTANT]\n{comment}"
|
||||
|
||||
# create any parent directories if they don't exist
|
||||
os.makedirs(os.path.dirname(comment_file_path), exist_ok=True)
|
||||
|
||||
with open(comment_file_path, "w") as f:
|
||||
f.write(comment)
|
||||
|
||||
print(f"Comment file contents: {comment_file_path}")
|
||||
print(comment)
|
||||
|
||||
|
||||
def add_label(pr_number: str, label: str):
|
||||
# run "gh pr edit --add-label <label>"
|
||||
result = run(f"gh pr edit {pr_number} --add-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
print(f"Unable to add {label!r} label to PR with")
|
||||
print(str(result.stderr))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def remove_label(pr_number: str, label: str):
|
||||
# run "gh pr edit --remove-label <label>"
|
||||
result = run(f"gh pr edit {pr_number} --remove-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
print(f"Unable to label PR with {label!r}")
|
||||
print(str(result.stderr))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def major_version(semver: str) -> int:
|
||||
return int(semver.split(".")[0])
|
||||
|
||||
|
||||
def is_breaking_change(new_schema_files: set[str], latest_schema_file: str) -> bool:
|
||||
latest_major_version = major_version(get_semver(latest_schema_file))
|
||||
for file in new_schema_files:
|
||||
change_major_version = major_version(get_semver(file))
|
||||
if change_major_version > latest_major_version:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def summarize_schema_files(files: list[str]) -> list[str]:
|
||||
return [get_semver(file) for file in files]
|
||||
|
||||
|
||||
def is_ci() -> bool:
|
||||
return "CI" in os.environ
|
||||
|
||||
|
||||
def get_pr_changed_files(pr_number: str) -> list[str]:
|
||||
result = run(f"gh pr view {pr_number} --json files --jq '.files.[].path'", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
if result.returncode != 0:
|
||||
print("Unable to get list of changed files in PR")
|
||||
print(str(result.stderr))
|
||||
sys.exit(1)
|
||||
|
||||
list_of_files = result.stdout.splitlines()
|
||||
return list_of_files
|
||||
|
||||
|
||||
def filter_to_schema_files(list_of_files: list[str]) -> list[str]:
|
||||
# get files matching "schema/json/schema-*.json"
|
||||
files = []
|
||||
for file in list_of_files:
|
||||
if re.match(r"^schema/json/schema-\d+\.\d+\.\d+\.json$", file):
|
||||
files.append(file)
|
||||
return sort_json_schema_files(files)
|
||||
|
||||
|
||||
def list_json_schema_files() -> list[str]:
|
||||
# list files in "schema/json" directory matching the pattern of "schema-*.json"
|
||||
return sort_json_schema_files(list(glob.glob("schema/json/schema-*.json")))
|
||||
|
||||
|
||||
def run(command: str, **kwargs) -> subprocess.CompletedProcess:
|
||||
if DRY_RUN:
|
||||
print(f"[DRY RUN] {command}")
|
||||
return subprocess.CompletedProcess(args=[command], returncode=0)
|
||||
print(f"[RUN] {command}")
|
||||
return subprocess.run(command, **kwargs)
|
||||
|
||||
|
||||
def get_semver(input_file: str) -> str:
|
||||
return input_file.split("-")[1].split(".json")[0]
|
||||
|
||||
|
||||
def sort_json_schema_files(files: list[str]) -> list[str]:
|
||||
# sort files by schema version, where the input looks like "schema/json/schema-1.12.1.json"
|
||||
# we should sort by the semantic version embedded within the basename, not the string
|
||||
# so that "schema/json/schema-1.2.1.json" comes before "schema/json/schema-1.12.1.json".
|
||||
versions = [get_semver(file) for file in files if file]
|
||||
|
||||
versions = sorted(versions, key=lambda s: [int(u) for u in s.split('.')])
|
||||
|
||||
return [f"schema/json/schema-{version}.json" for version in versions]
|
||||
|
||||
|
||||
# allow for test files that have line-by-line list of files:
|
||||
|
||||
# .binny.yaml
|
||||
# .github/actions/bootstrap/action.yaml
|
||||
# .github/scripts/goreleaser-install.sh
|
||||
# .github/workflows/release.yaml
|
||||
# .github/workflows/update-bootstrap-tools.yml
|
||||
# .github/workflows/update-cpe-dictionary-index.yml
|
||||
# .github/workflows/update-stereoscope-release.yml
|
||||
# .github/workflows/validations.yaml
|
||||
# .gitignore
|
||||
# .goreleaser.yaml
|
||||
# Makefile
|
||||
# Taskfile.yaml
|
||||
# schema/cyclonedx/Makefile
|
||||
|
||||
if __name__ == "__main__":
|
||||
# these are variables for a single file name that contains a list of files (line separated)
|
||||
changed_files = None
|
||||
merge_base_schema_files = None
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
changed_files = sys.argv[1]
|
||||
merge_base_schema_files = sys.argv[2]
|
||||
|
||||
main(changed_files, merge_base_schema_files)
|
||||
|
||||
65
.github/scripts/labeler_test.py
vendored
Normal file
65
.github/scripts/labeler_test.py
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
import subprocess
|
||||
|
||||
import labeler
|
||||
|
||||
class Labeler(unittest.TestCase):
|
||||
|
||||
def test_major_version(self):
|
||||
self.assertEqual(labeler.major_version("1.2.3"), 1)
|
||||
self.assertEqual(labeler.major_version("2.0.0"), 2)
|
||||
|
||||
def test_is_breaking_change(self):
|
||||
new_schema_files = ["schema/json/schema-2.0.0.json"]
|
||||
latest_schema_file = "schema/json/schema-1.2.0.json"
|
||||
self.assertTrue(labeler.is_breaking_change(new_schema_files, latest_schema_file))
|
||||
|
||||
new_schema_files = ["schema/json/schema-1.3.0.json"]
|
||||
latest_schema_file = "schema/json/schema-1.2.0.json"
|
||||
self.assertFalse(labeler.is_breaking_change(new_schema_files, latest_schema_file))
|
||||
|
||||
def test_summarize_schema_files(self):
|
||||
files = ["schema/json/schema-1.0.0.json", "schema/json/schema-2.0.0.json"]
|
||||
expected = ["1.0.0", "2.0.0"]
|
||||
self.assertEqual(labeler.summarize_schema_files(files), expected)
|
||||
|
||||
def test_is_ci(self):
|
||||
# Mock os.environ to simulate CI environment
|
||||
with patch.dict("os.environ", {"CI": "true"}):
|
||||
self.assertTrue(labeler.is_ci())
|
||||
|
||||
def test_get_pr_changed_files(self):
|
||||
expected_command = "gh pr view 123 --json files --jq '.files.[].path'"
|
||||
expected_output = "file1.json\nfile2.json\n"
|
||||
|
||||
subprocess.CompletedProcess.returncode = 0
|
||||
subprocess.CompletedProcess.stdout = expected_output
|
||||
with patch("labeler.run", return_value=subprocess.CompletedProcess) as mock_run:
|
||||
result = labeler.get_pr_changed_files("123")
|
||||
mock_run.assert_called_with(expected_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
self.assertEqual(result, ["file1.json", "file2.json"])
|
||||
|
||||
def test_filter_to_schema_files(self):
|
||||
input_files = ["schema/json/schema-1.0.0.json", "not_schema.txt", "schema/json/schema-2.0.0.json"]
|
||||
expected_files = ["schema/json/schema-1.0.0.json", "schema/json/schema-2.0.0.json"]
|
||||
self.assertEqual(labeler.filter_to_schema_files(input_files), expected_files)
|
||||
|
||||
# we should be strict about what files are allowed to be processed
|
||||
input_files = ["schema/json/schema-1.0.0extracontent.json", "schema/json/schema-1.0.0.md", "schema/json/schema-1.0.0.json.extracontent"]
|
||||
expected_files = []
|
||||
self.assertEqual(labeler.filter_to_schema_files(input_files), expected_files)
|
||||
|
||||
def test_get_semver(self):
|
||||
input_file = "schema/json/schema-1.0.0.json"
|
||||
expected_semver = "1.0.0"
|
||||
self.assertEqual(labeler.get_semver(input_file), expected_semver)
|
||||
|
||||
def test_sort_json_schema_files(self):
|
||||
files = ["schema/json/schema-1.12.1.json", "schema/json/schema-1.2.1.json"]
|
||||
expected_sorted_files = ["schema/json/schema-1.2.1.json", "schema/json/schema-1.12.1.json"]
|
||||
self.assertEqual(labeler.sort_json_schema_files(files), expected_sorted_files)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
54
.github/workflows/labeler.yaml
vendored
Normal file
54
.github/workflows/labeler.yaml
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
name: "Detect schema changes"
|
||||
|
||||
on:
|
||||
# IMPORTANT! This workflow is triggered by the `pull_request_target` event
|
||||
# which means that forked PRs will run with access secrets from the repo
|
||||
# it's forked from (the "target" repo).
|
||||
#
|
||||
# For this reason we only NEVER checkout the code from the pull request
|
||||
# (e.g. "ref: ${{ github.event.pull_request.head.sha }}") to prevent
|
||||
# accidentally running potentially untrusted code.
|
||||
#
|
||||
# By default the checkout will be:
|
||||
# - GITHUB_SHA: Last commit on the PR base branch
|
||||
# - GITHUB_REF: PR base branch
|
||||
#
|
||||
# ...unlike a typical PR where:
|
||||
# - GITHUB_SHA: Last merge commit on the GITHUB_REF branch
|
||||
# - GITHUB_REF: PR merge branch refs/pull/:prNumber/merge
|
||||
pull_request_target:
|
||||
|
||||
env:
|
||||
# note: this is used within hashFiles() so must be within the GITHUB_WORKSPACE path (or will silently fail)
|
||||
CI_COMMENT_FILE: .tmp/labeler-comment.txt
|
||||
# needs to be any string to uniquely identify the comment on a PR across multiple runs
|
||||
COMMENT_HEADER: "label-commentary"
|
||||
|
||||
jobs:
|
||||
label:
|
||||
name: "Label changes"
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 #v4.1.1
|
||||
|
||||
- run: python .github/scripts/labeler.py
|
||||
env:
|
||||
# note: this token has write access to the repo
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_PR_NUMBER: ${{ github.event.number }}
|
||||
|
||||
- name: Delete existing comment
|
||||
if: ${{ hashFiles( env.CI_COMMENT_FILE ) == '' }}
|
||||
uses: marocchino/sticky-pull-request-comment@efaaab3fd41a9c3de579aba759d2552635e590fd #v2.8.0
|
||||
with:
|
||||
header: ${{ env.COMMENT_HEADER }}
|
||||
hide: true
|
||||
hide_classify: "OUTDATED"
|
||||
|
||||
- name: Add comment
|
||||
if: ${{ hashFiles( env.CI_COMMENT_FILE ) != '' }}
|
||||
uses: marocchino/sticky-pull-request-comment@efaaab3fd41a9c3de579aba759d2552635e590fd #v2.8.0
|
||||
with:
|
||||
header: ${{ env.COMMENT_HEADER }}
|
||||
path: ${{ env.CI_COMMENT_FILE }}
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@ -64,3 +64,8 @@ test/integration/test-fixtures/**/go.sum
|
||||
# attestation
|
||||
cosign.key
|
||||
cosign.pub
|
||||
|
||||
# Byte-compiled object files for python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
@ -6,6 +6,7 @@ In order to test and develop in this repo you will need the following dependenci
|
||||
- Golang
|
||||
- docker
|
||||
- make
|
||||
- Python (>= 3.9)
|
||||
|
||||
### Docker settings for getting started
|
||||
Make sure you've updated your docker settings so the default docker socket path is available.
|
||||
|
||||
6
Makefile
6
Makefile
@ -70,7 +70,7 @@ all: static-analysis test ## Run all linux-based checks (linting, license check,
|
||||
static-analysis: check-go-mod-tidy lint check-licenses check-json-schema-drift ## Run all static analysis checks
|
||||
|
||||
.PHONY: test
|
||||
test: unit integration validate-cyclonedx-schema benchmark cli ## Run all tests (currently unit, integration, linux compare, and cli tests)
|
||||
test: unit integration validate-cyclonedx-schema benchmark test-utils cli ## Run all tests (currently unit, integration, linux compare, and cli tests)
|
||||
|
||||
|
||||
## Bootstrapping targets #################################
|
||||
@ -167,6 +167,10 @@ cli: $(SNAPSHOT_DIR) ## Run CLI tests
|
||||
SYFT_BINARY_LOCATION='$(SNAPSHOT_BIN)' \
|
||||
go test -count=1 -timeout=15m -v ./test/cli
|
||||
|
||||
.PHONY: test-utils
|
||||
test-utils:
|
||||
python .github/scripts/labeler_test.py
|
||||
|
||||
|
||||
## Benchmark test targets #################################
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user