diff --git a/app.py b/app.py index 07312ec..15f0f0a 100644 --- a/app.py +++ b/app.py @@ -32,17 +32,17 @@ def _verify_signature(payload: bytes, signature_header: str | None) -> bool: return hmac.compare_digest(expected, signature_header.strip()) -def _handle_push(owner: str, repo: str, changed_files: list[str]) -> None: +def _handle_push(owner: str, repo: str, branch: str, changed_files: list[str]) -> None: """Process all changed markdown files in a push event.""" for file_path in changed_files: if not file_path.endswith(".md"): continue - logger.info("Checking %s/%s: %s", owner, repo, file_path) + logger.info("Checking %s/%s@%s: %s", owner, repo, branch, file_path) try: - content, sha = gitea_client.get_file(owner, repo, file_path) + content, sha = gitea_client.get_file(owner, repo, file_path, branch) updated, changed = processor.process_document(content) if changed: - gitea_client.update_file(owner, repo, file_path, updated, sha) + gitea_client.update_file(owner, repo, file_path, updated, sha, branch) logger.info("Updated %s", file_path) else: logger.info("No BLIGHT triggers found in %s", file_path) @@ -65,6 +65,7 @@ def webhook(): data = json.loads(payload) owner = data["repository"]["owner"]["login"] repo = data["repository"]["name"] + branch = data.get("ref", "").removeprefix("refs/heads/") # Collect unique file paths from all commits in the push seen: set[str] = set() @@ -81,7 +82,7 @@ def webhook(): # Process in background so we return 200 to Gitea immediately thread = threading.Thread( target=_handle_push, - args=(owner, repo, changed_files), + args=(owner, repo, branch, changed_files), daemon=True, ) thread.start() diff --git a/gitea_client.py b/gitea_client.py index a9a1993..ad21eda 100644 --- a/gitea_client.py +++ b/gitea_client.py @@ -10,7 +10,7 @@ def _headers() -> dict: } -def get_file(owner: str, repo: str, path: str) -> tuple[str, str]: +def get_file(owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: """Fetch a file's decoded content and its SHA from Gitea. Returns: @@ -18,7 +18,7 @@ def get_file(owner: str, repo: str, path: str) -> tuple[str, str]: required for the subsequent update call. """ url = f"{config.GITEA_URL}/api/v1/repos/{owner}/{repo}/contents/{path}" - response = requests.get(url, headers=_headers(), timeout=30) + response = requests.get(url, headers=_headers(), params={"ref": branch}, timeout=30) response.raise_for_status() data = response.json() content = base64.b64decode(data["content"]).decode("utf-8") @@ -31,14 +31,16 @@ def update_file( path: str, content: str, sha: str, - commit_message: str = "BLIGHT: process triggers", + branch: str, + commit_message: str | None = None, ) -> None: """Write updated file content back to Gitea.""" url = f"{config.GITEA_URL}/api/v1/repos/{owner}/{repo}/contents/{path}" payload = { - "message": commit_message, + "message": commit_message or f"BLIGHT: process triggers in {path}", "content": base64.b64encode(content.encode("utf-8")).decode("ascii"), "sha": sha, + "branch": branch, } response = requests.put(url, headers=_headers(), json=payload, timeout=30) response.raise_for_status() diff --git a/processor.py b/processor.py index c41ec8b..ee6eb42 100644 --- a/processor.py +++ b/processor.py @@ -12,6 +12,9 @@ DOCUMENT_PATTERN = re.compile(r"^BLIGHT::\s+(.+)$", re.MULTILINE | re.IGNORECASE FAILED_TEMPLATE = "\n" +# Matches any BLIGHT: trigger in AI output that could cause a processing loop. +_SANITIZE_PATTERN = re.compile(r"BLIGHT:", re.IGNORECASE) + _MAX_RETRIES = 3 _RETRY_DELAYS = [1, 2, 4] # seconds between attempts @@ -94,8 +97,8 @@ def _call_with_retry(document: str, instruction: str, *, document_scope: bool) - for attempt in range(_MAX_RETRIES): try: if document_scope: - return _provider.complete_document(document, instruction) - return _provider.complete(document, instruction) + return _sanitize(_provider.complete_document(document, instruction)) + return _sanitize(_provider.complete(document, instruction)) except Exception as exc: last_error = exc if attempt < _MAX_RETRIES - 1: @@ -117,3 +120,8 @@ def _call_with_retry(document: str, instruction: str, *, document_scope: bool) - last_error, ) return FAILED_TEMPLATE.format(instruction=instruction, error=last_error) + + +def _sanitize(text: str) -> str: + """Defuse any BLIGHT: trigger patterns in AI output to prevent loop re-processing.""" + return _SANITIZE_PATTERN.sub("BLIGHT:", text)