syft/.github/scripts/find_cache_paths.py
Alex Goodman d61af0abab
Port to go-make (#4923)
* port to go-make

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* refresh fixtures on running unit tests

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* address refresh cache issues with old now-gitignored files

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2026-05-18 11:59:55 -04:00

155 lines
4.5 KiB
Python
Executable File

#!/usr/bin/env python3
from __future__ import annotations
import glob
import hashlib
import json
import os
import sys
IGNORED_PREFIXES = []
def find_fingerprints_and_check_dirs(base_dir):
all_fingerprints = set(
glob.glob(
os.path.join(base_dir, "**", "test*", "**", "*.fingerprint"), recursive=True
)
)
all_fingerprints = {
os.path.relpath(fp)
for fp in all_fingerprints
if not any(fp.startswith(prefix) for prefix in IGNORED_PREFIXES)
}
if not all_fingerprints:
show("No .fingerprint files or cache directories found.")
exit(1)
orphan_fingerprints = []
empty_content = []
valid_paths = set()
fingerprint_contents = []
for fingerprint in all_fingerprints:
path = fingerprint.replace(".fingerprint", "")
if not os.path.exists(path):
# paired content path is entirely missing — the .fingerprint is likely
# leftover from a moved/deleted source (testdata trees are git-ignored,
# so they persist locally across rename refactors)
orphan_fingerprints.append(fingerprint)
continue
if not os.path.isdir(path):
valid_paths.add(path)
continue
if os.listdir(path):
valid_paths.add(path)
else:
empty_content.append(path)
with open(fingerprint, "r") as f:
content = f.read().strip()
fingerprint_contents.append((fingerprint, content))
return sorted(valid_paths), empty_content, orphan_fingerprints, fingerprint_contents
def parse_fingerprint_contents(fingerprint_content):
input_map = {}
for line in fingerprint_content.splitlines():
digest, path = line.split()
input_map[path] = digest
return input_map
def calculate_sha256(fingerprint_contents):
sorted_fingerprint_contents = sorted(fingerprint_contents, key=lambda x: x[0])
concatenated_contents = "".join(
content for _, content in sorted_fingerprint_contents
)
sha256_hash = hashlib.sha256(concatenated_contents.encode()).hexdigest()
return sha256_hash
def calculate_file_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def show(*s: str):
print(*s, file=sys.stderr)
def main(file_path: str | None):
base_dir = "."
valid_paths, empty_content, orphan_fingerprints, fingerprint_contents = (
find_fingerprints_and_check_dirs(base_dir)
)
if empty_content:
show(
"The following paths exist but are empty, and have corresponding .fingerprint files:"
)
for path in sorted(empty_content):
show(f"- {path}")
# when adding new cache directories there is a time where it is not possible to have this directory without
# running the tests first... but this step is a prerequisite for running the tests. We should not block on this.
if orphan_fingerprints:
show(
"The following .fingerprint files reference paths that no longer exist "
"(likely leftover from a moved/deleted cataloger — safe to delete, "
"or run `task prune-orphan-fingerprints`):"
)
for fp in sorted(orphan_fingerprints):
show(f"- {fp}")
sha256_hash = calculate_sha256(fingerprint_contents)
paths_with_digests = []
for path in sorted(valid_paths):
fingerprint_file = f"{path}.fingerprint"
try:
if os.path.exists(fingerprint_file):
file_digest = calculate_file_sha256(fingerprint_file)
# Parse the fingerprint file to get the digest/path tuples
with open(fingerprint_file, "r") as f:
fingerprint_content = f.read().strip()
input_map = parse_fingerprint_contents(fingerprint_content)
paths_with_digests.append(
{"path": path, "digest": file_digest, "input": input_map}
)
except Exception as e:
show(f"Error processing {fingerprint_file}: {e}")
raise e
output = {"digest": sha256_hash, "paths": paths_with_digests}
content = json.dumps(output, indent=2, sort_keys=True)
if file_path:
with open(file_path, "w") as f:
f.write(content)
print(content)
if __name__ == "__main__":
file_path = None
if len(sys.argv) > 1:
file_path = sys.argv[1]
main(file_path)