Updated .gitignore

Fix loop re-processing, branch awareness, and commit message clarity
- Sanitize AI responses by replacing BLIGHT: with BLIGHT: to prevent the service's own commits from triggering another processing cycle - Pass branch (extracted from refs/heads/<branch>) through to Gitea get/update calls so pushes to non-default branches are read and written correctly - Commit message now includes the file path: "BLIGHT: process triggers in <path>" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-16 12:40:29 -05:00 · 2026-03-16 12:39:05 -05:00 · 2026-03-16 12:36:32 -05:00
6 changed files with 120 additions and 32 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .venv/
 __pycache__/
 *.pyc
+.claude/
--- a/ai/base.py
+++ b/ai/base.py
@@ -4,13 +4,14 @@ from abc import ABC, abstractmethod
 class AIProvider(ABC):
    """Base class for all AI provider implementations.

-    To add a new provider, subclass this and implement `complete`, then
-    instantiate your provider in `processor.py` instead of GeminiProvider.
+    To add a new provider, subclass this and implement `complete` and
+    `complete_document`, then instantiate your provider in `processor.py`
+    instead of GeminiProvider.
    """

    @abstractmethod
    def complete(self, document: str, instruction: str) -> str:
-        """Process an instruction in the context of a full document.
+        """Process an inline instruction in the context of a full document.

        Args:
            document: The full markdown document text (for context).
@@ -19,3 +20,15 @@ class AIProvider(ABC):
        Returns:
            The text to insert in place of the trigger line.
        """
+
+    @abstractmethod
+    def complete_document(self, document: str, instruction: str) -> str:
+        """Apply a document-scope instruction and return the full rewritten document.
+
+        Args:
+            document: The full markdown document text.
+            instruction: The BLIGHT:: instruction extracted from the trigger line.
+
+        Returns:
+            The full rewritten document as a string.
+        """
--- a/ai/gemini.py
+++ b/ai/gemini.py
@@ -2,7 +2,7 @@ import google.generativeai as genai
 import config
 from .base import AIProvider

-_SYSTEM_PROMPT = (
+_INLINE_SYSTEM_PROMPT = (
    "You are an inline document assistant. "
    "The user will provide a markdown document and a specific instruction. "
    "Your response must contain ONLY the text to be inserted into the document — "
@@ -11,13 +11,26 @@ _SYSTEM_PROMPT = (
    "Respond as if your output will be dropped directly into the middle of a document."
 )

+_DOCUMENT_SYSTEM_PROMPT = (
+    "You are a document editing assistant. "
+    "The user will provide a markdown document and a specific instruction. "
+    "Apply the instruction to the entire document and return the full rewritten document. "
+    "Your response must contain ONLY the rewritten document — "
+    "no preamble, no explanation, no meta-commentary, no markdown code fences. "
+    "Preserve the document's structure and formatting unless the instruction says otherwise."
+)
+

 class GeminiProvider(AIProvider):
    def __init__(self) -> None:
        genai.configure(api_key=config.GEMINI_API_KEY)
-        self._model = genai.GenerativeModel(
+        self._inline_model = genai.GenerativeModel(
            model_name="gemini-2.5-flash-lite",
-            system_instruction=_SYSTEM_PROMPT,
+            system_instruction=_INLINE_SYSTEM_PROMPT,
+        )
+        self._document_model = genai.GenerativeModel(
+            model_name="gemini-2.5-flash-lite",
+            system_instruction=_DOCUMENT_SYSTEM_PROMPT,
        )

    def complete(self, document: str, instruction: str) -> str:
@@ -25,5 +38,13 @@ class GeminiProvider(AIProvider):
            f"DOCUMENT:\n\n{document}\n\n"
            f"INSTRUCTION: {instruction}"
        )
-        response = self._model.generate_content(prompt)
+        response = self._inline_model.generate_content(prompt)
+        return response.text.strip()
+
+    def complete_document(self, document: str, instruction: str) -> str:
+        prompt = (
+            f"DOCUMENT:\n\n{document}\n\n"
+            f"INSTRUCTION: {instruction}"
+        )
+        response = self._document_model.generate_content(prompt)
        return response.text.strip()
--- a/app.py
+++ b/app.py
@@ -32,17 +32,17 @@ def _verify_signature(payload: bytes, signature_header: str | None) -> bool:
    return hmac.compare_digest(expected, signature_header.strip())


-def _handle_push(owner: str, repo: str, changed_files: list[str]) -> None:
+def _handle_push(owner: str, repo: str, branch: str, changed_files: list[str]) -> None:
    """Process all changed markdown files in a push event."""
    for file_path in changed_files:
        if not file_path.endswith(".md"):
            continue
-        logger.info("Checking %s/%s: %s", owner, repo, file_path)
+        logger.info("Checking %s/%s@%s: %s", owner, repo, branch, file_path)
        try:
-            content, sha = gitea_client.get_file(owner, repo, file_path)
+            content, sha = gitea_client.get_file(owner, repo, file_path, branch)
            updated, changed = processor.process_document(content)
            if changed:
-                gitea_client.update_file(owner, repo, file_path, updated, sha)
+                gitea_client.update_file(owner, repo, file_path, updated, sha, branch)
                logger.info("Updated %s", file_path)
            else:
                logger.info("No BLIGHT triggers found in %s", file_path)
@@ -65,6 +65,7 @@ def webhook():
    data = json.loads(payload)
    owner = data["repository"]["owner"]["login"]
    repo = data["repository"]["name"]
+    branch = data.get("ref", "").removeprefix("refs/heads/")

    # Collect unique file paths from all commits in the push
    seen: set[str] = set()
@@ -81,7 +82,7 @@ def webhook():
    # Process in background so we return 200 to Gitea immediately
    thread = threading.Thread(
        target=_handle_push,
-        args=(owner, repo, changed_files),
+        args=(owner, repo, branch, changed_files),
        daemon=True,
    )
    thread.start()
--- a/gitea_client.py
+++ b/gitea_client.py
@@ -10,7 +10,7 @@ def _headers() -> dict:
    }


-def get_file(owner: str, repo: str, path: str) -> tuple[str, str]:
+def get_file(owner: str, repo: str, path: str, branch: str) -> tuple[str, str]:
    """Fetch a file's decoded content and its SHA from Gitea.

    Returns:
@@ -18,7 +18,7 @@ def get_file(owner: str, repo: str, path: str) -> tuple[str, str]:
        required for the subsequent update call.
    """
    url = f"{config.GITEA_URL}/api/v1/repos/{owner}/{repo}/contents/{path}"
-    response = requests.get(url, headers=_headers(), timeout=30)
+    response = requests.get(url, headers=_headers(), params={"ref": branch}, timeout=30)
    response.raise_for_status()
    data = response.json()
    content = base64.b64decode(data["content"]).decode("utf-8")
@@ -31,14 +31,16 @@ def update_file(
    path: str,
    content: str,
    sha: str,
-    commit_message: str = "BLIGHT: process triggers",
+    branch: str,
+    commit_message: str | None = None,
 ) -> None:
    """Write updated file content back to Gitea."""
    url = f"{config.GITEA_URL}/api/v1/repos/{owner}/{repo}/contents/{path}"
    payload = {
-        "message": commit_message,
+        "message": commit_message or f"BLIGHT: process triggers in {path}",
        "content": base64.b64encode(content.encode("utf-8")).decode("ascii"),
        "sha": sha,
+        "branch": branch,
    }
    response = requests.put(url, headers=_headers(), json=payload, timeout=30)
    response.raise_for_status()
--- a/processor.py
+++ b/processor.py
@@ -5,8 +5,15 @@ from ai import GeminiProvider

 logger = logging.getLogger(__name__)

-TRIGGER_PATTERN = re.compile(r"^BLIGHT:\s+(.+)$", re.MULTILINE)
-FAILED_TEMPLATE = "<!-- BLIGHT_FAILED: {instruction} -->"
+# Inline trigger: BLIGHT: <instruction>  (single colon, case-insensitive)
+INLINE_PATTERN = re.compile(r"^BLIGHT:(?!:)\s+(.+)$", re.MULTILINE | re.IGNORECASE)
+# Document-scope trigger: BLIGHT:: <instruction>  (double colon, case-insensitive)
+DOCUMENT_PATTERN = re.compile(r"^BLIGHT::\s+(.+)$", re.MULTILINE | re.IGNORECASE)
+
+FAILED_TEMPLATE = "<!-- BLIGHT_FAILED: {instruction} -->\n<!-- BLIGHT_ERROR: {error} -->"
+
+# Matches any BLIGHT: trigger in AI output that could cause a processing loop.
+_SANITIZE_PATTERN = re.compile(r"BLIGHT:", re.IGNORECASE)

 _MAX_RETRIES = 3
 _RETRY_DELAYS = [1, 2, 4]  # seconds between attempts
@@ -17,43 +24,81 @@ _provider = GeminiProvider()
 def process_document(content: str) -> tuple[str, bool]:
    """Scan content for BLIGHT triggers and process each one.

+    Inline triggers (BLIGHT:) are processed first in document order, each
+    replacing only the trigger line. Document-scope triggers (BLIGHT::) are
+    processed next in document order, each replacing the entire file content
+    and operating on the result of the previous.
+
    Returns:
        (updated_content, changed) where changed is True if any triggers
        were found and the content was modified.
    """
-    triggers = list(TRIGGER_PATTERN.finditer(content))
-    if not triggers:
+    has_inline = bool(INLINE_PATTERN.search(content))
+    has_document = bool(DOCUMENT_PATTERN.search(content))
+    if not has_inline and not has_document:
        return content, False

-    # Process triggers one by one. After each replacement the string length
-    # may change, so we re-search on the updated content each iteration.
    changed = False
-    for _ in range(len(triggers)):
-        match = TRIGGER_PATTERN.search(content)
+
+    # --- Pass 1: inline triggers ---
+    # Re-search after each replacement since string length may change.
+    inline_count = len(INLINE_PATTERN.findall(content))
+    for _ in range(inline_count):
+        match = INLINE_PATTERN.search(content)
        if not match:
            break

        instruction = match.group(1).strip()
-        trigger_line = match.group(0)
-        logger.info("Processing trigger: %s", instruction)
+        logger.info("Processing inline trigger: %s", instruction)

-        replacement = _call_with_retry(content, instruction)
+        replacement = _call_with_retry(content, instruction, document_scope=False)
        content = content[:match.start()] + replacement + content[match.end():]
        changed = True

+    # --- Pass 2: document-scope triggers ---
+    # Each trigger operates on the result of the previous.
+    doc_count = len(DOCUMENT_PATTERN.findall(content))
+    for _ in range(doc_count):
+        match = DOCUMENT_PATTERN.search(content)
+        if not match:
+            break
+
+        instruction = match.group(1).strip()
+        logger.info("Processing document-scope trigger: %s", instruction)
+
+        # Remove the trigger line before passing to AI so it doesn't appear
+        # in the rewritten document. Also consume the trailing newline that
+        # follows the trigger line, if present.
+        trigger_start, trigger_end = match.start(), match.end()
+        if trigger_end < len(content) and content[trigger_end] == "\n":
+            trigger_end += 1
+        content_without_trigger = content[:trigger_start] + content[trigger_end:]
+
+        result = _call_with_retry(content_without_trigger, instruction, document_scope=True)
+
+        if result.startswith("<!-- BLIGHT_FAILED:"):
+            # On failure, restore the trigger line and insert the failure comment.
+            content = content[:trigger_start] + result + content[trigger_end:]
+        else:
+            content = result
+
+        changed = True
+
    return content, changed


-def _call_with_retry(document: str, instruction: str) -> str:
+def _call_with_retry(document: str, instruction: str, *, document_scope: bool) -> str:
    """Call the AI provider with up to _MAX_RETRIES attempts.

-    Returns the AI response on success, or a BLIGHT_FAILED comment on
-    exhausted retries.
+    Returns the AI response on success, or BLIGHT_FAILED/BLIGHT_ERROR comments
+    on exhausted retries.
    """
    last_error: Exception | None = None
    for attempt in range(_MAX_RETRIES):
        try:
-            return _provider.complete(document, instruction)
+            if document_scope:
+                return _sanitize(_provider.complete_document(document, instruction))
+            return _sanitize(_provider.complete(document, instruction))
        except Exception as exc:
            last_error = exc
            if attempt < _MAX_RETRIES - 1:
@@ -74,4 +119,9 @@ def _call_with_retry(document: str, instruction: str) -> str:
        instruction,
        last_error,
    )
-    return FAILED_TEMPLATE.format(instruction=instruction)
+    return FAILED_TEMPLATE.format(instruction=instruction, error=last_error)
+
+
+def _sanitize(text: str) -> str:
+    """Defuse any BLIGHT: trigger patterns in AI output to prevent loop re-processing."""
+    return _SANITIZE_PATTERN.sub("BLIGHT&#58;", text)