BLIGHT--CUE/processor.py

import re
import time
import logging
from ai import GeminiProvider

logger = logging.getLogger(__name__)

# Inline trigger: BLIGHT: <instruction>  (single colon, case-insensitive)
INLINE_PATTERN = re.compile(r"^BLIGHT:(?!:)\s+(.+)$", re.MULTILINE | re.IGNORECASE)
# Document-scope trigger: BLIGHT:: <instruction>  (double colon, case-insensitive)
DOCUMENT_PATTERN = re.compile(r"^BLIGHT::\s+(.+)$", re.MULTILINE | re.IGNORECASE)

FAILED_TEMPLATE = "<!-- BLIGHT_FAILED: {instruction} -->\n<!-- BLIGHT_ERROR: {error} -->"

# Matches any BLIGHT: trigger in AI output that could cause a processing loop.
_SANITIZE_PATTERN = re.compile(r"BLIGHT:", re.IGNORECASE)

_MAX_RETRIES = 3
_RETRY_DELAYS = [1, 2, 4]  # seconds between attempts

_provider = GeminiProvider()


def process_document(content: str) -> tuple[str, bool]:
    """Scan content for BLIGHT triggers and process each one.

    Inline triggers (BLIGHT:) are processed first in document order, each
    replacing only the trigger line. Document-scope triggers (BLIGHT::) are
    processed next in document order, each replacing the entire file content
    and operating on the result of the previous.

    Returns:
        (updated_content, changed) where changed is True if any triggers
        were found and the content was modified.
    """
    has_inline = bool(INLINE_PATTERN.search(content))
    has_document = bool(DOCUMENT_PATTERN.search(content))
    if not has_inline and not has_document:
        return content, False

    changed = False

    # --- Pass 1: inline triggers ---
    # Re-search after each replacement since string length may change.
    inline_count = len(INLINE_PATTERN.findall(content))
    for _ in range(inline_count):
        match = INLINE_PATTERN.search(content)
        if not match:
            break

        instruction = match.group(1).strip()
        logger.info("Processing inline trigger: %s", instruction)

        replacement = _call_with_retry(content, instruction, document_scope=False)
        content = content[:match.start()] + replacement + content[match.end():]
        changed = True

    # --- Pass 2: document-scope triggers ---
    # Each trigger operates on the result of the previous.
    doc_count = len(DOCUMENT_PATTERN.findall(content))
    for _ in range(doc_count):
        match = DOCUMENT_PATTERN.search(content)
        if not match:
            break

        instruction = match.group(1).strip()
        logger.info("Processing document-scope trigger: %s", instruction)

        # Remove the trigger line before passing to AI so it doesn't appear
        # in the rewritten document. Also consume the trailing newline that
        # follows the trigger line, if present.
        trigger_start, trigger_end = match.start(), match.end()
        if trigger_end < len(content) and content[trigger_end] == "\n":
            trigger_end += 1
        content_without_trigger = content[:trigger_start] + content[trigger_end:]

        result = _call_with_retry(content_without_trigger, instruction, document_scope=True)

        if result.startswith("<!-- BLIGHT_FAILED:"):
            # On failure, restore the trigger line and insert the failure comment.
            content = content[:trigger_start] + result + content[trigger_end:]
        else:
            content = result

        changed = True

    return content, changed


def _call_with_retry(document: str, instruction: str, *, document_scope: bool) -> str:
    """Call the AI provider with up to _MAX_RETRIES attempts.

    Returns the AI response on success, or BLIGHT_FAILED/BLIGHT_ERROR comments
    on exhausted retries.
    """
    last_error: Exception | None = None
    for attempt in range(_MAX_RETRIES):
        try:
            if document_scope:
                return _sanitize(_provider.complete_document(document, instruction))
            return _sanitize(_provider.complete(document, instruction))
        except Exception as exc:
            last_error = exc
            if attempt < _MAX_RETRIES - 1:
                delay = _RETRY_DELAYS[attempt]
                logger.warning(
                    "Attempt %d/%d failed for instruction %r: %s — retrying in %ds",
                    attempt + 1,
                    _MAX_RETRIES,
                    instruction,
                    exc,
                    delay,
                )
                time.sleep(delay)

    logger.error(
        "All %d attempts failed for instruction %r: %s",
        _MAX_RETRIES,
        instruction,
        last_error,
    )
    return FAILED_TEMPLATE.format(instruction=instruction, error=last_error)


def _sanitize(text: str) -> str:
    """Defuse any BLIGHT: trigger patterns in AI output to prevent loop re-processing."""
    return _SANITIZE_PATTERN.sub("BLIGHT&#58;", text)