diff --git a/ai/base.py b/ai/base.py index 311ea1e..e3aa379 100644 --- a/ai/base.py +++ b/ai/base.py @@ -4,13 +4,14 @@ from abc import ABC, abstractmethod class AIProvider(ABC): """Base class for all AI provider implementations. - To add a new provider, subclass this and implement `complete`, then - instantiate your provider in `processor.py` instead of GeminiProvider. + To add a new provider, subclass this and implement `complete` and + `complete_document`, then instantiate your provider in `processor.py` + instead of GeminiProvider. """ @abstractmethod def complete(self, document: str, instruction: str) -> str: - """Process an instruction in the context of a full document. + """Process an inline instruction in the context of a full document. Args: document: The full markdown document text (for context). @@ -19,3 +20,15 @@ class AIProvider(ABC): Returns: The text to insert in place of the trigger line. """ + + @abstractmethod + def complete_document(self, document: str, instruction: str) -> str: + """Apply a document-scope instruction and return the full rewritten document. + + Args: + document: The full markdown document text. + instruction: The BLIGHT:: instruction extracted from the trigger line. + + Returns: + The full rewritten document as a string. + """ diff --git a/ai/gemini.py b/ai/gemini.py index 2bb2b62..c06b3f6 100644 --- a/ai/gemini.py +++ b/ai/gemini.py @@ -2,7 +2,7 @@ import google.generativeai as genai import config from .base import AIProvider -_SYSTEM_PROMPT = ( +_INLINE_SYSTEM_PROMPT = ( "You are an inline document assistant. " "The user will provide a markdown document and a specific instruction. " "Your response must contain ONLY the text to be inserted into the document — " @@ -11,13 +11,26 @@ _SYSTEM_PROMPT = ( "Respond as if your output will be dropped directly into the middle of a document." ) +_DOCUMENT_SYSTEM_PROMPT = ( + "You are a document editing assistant. " + "The user will provide a markdown document and a specific instruction. " + "Apply the instruction to the entire document and return the full rewritten document. " + "Your response must contain ONLY the rewritten document — " + "no preamble, no explanation, no meta-commentary, no markdown code fences. " + "Preserve the document's structure and formatting unless the instruction says otherwise." +) + class GeminiProvider(AIProvider): def __init__(self) -> None: genai.configure(api_key=config.GEMINI_API_KEY) - self._model = genai.GenerativeModel( + self._inline_model = genai.GenerativeModel( model_name="gemini-2.5-flash-lite", - system_instruction=_SYSTEM_PROMPT, + system_instruction=_INLINE_SYSTEM_PROMPT, + ) + self._document_model = genai.GenerativeModel( + model_name="gemini-2.5-flash-lite", + system_instruction=_DOCUMENT_SYSTEM_PROMPT, ) def complete(self, document: str, instruction: str) -> str: @@ -25,5 +38,13 @@ class GeminiProvider(AIProvider): f"DOCUMENT:\n\n{document}\n\n" f"INSTRUCTION: {instruction}" ) - response = self._model.generate_content(prompt) + response = self._inline_model.generate_content(prompt) + return response.text.strip() + + def complete_document(self, document: str, instruction: str) -> str: + prompt = ( + f"DOCUMENT:\n\n{document}\n\n" + f"INSTRUCTION: {instruction}" + ) + response = self._document_model.generate_content(prompt) return response.text.strip() diff --git a/processor.py b/processor.py index 9360e48..c41ec8b 100644 --- a/processor.py +++ b/processor.py @@ -5,8 +5,12 @@ from ai import GeminiProvider logger = logging.getLogger(__name__) -TRIGGER_PATTERN = re.compile(r"^BLIGHT:\s+(.+)$", re.MULTILINE) -FAILED_TEMPLATE = "" +# Inline trigger: BLIGHT: (single colon, case-insensitive) +INLINE_PATTERN = re.compile(r"^BLIGHT:(?!:)\s+(.+)$", re.MULTILINE | re.IGNORECASE) +# Document-scope trigger: BLIGHT:: (double colon, case-insensitive) +DOCUMENT_PATTERN = re.compile(r"^BLIGHT::\s+(.+)$", re.MULTILINE | re.IGNORECASE) + +FAILED_TEMPLATE = "\n" _MAX_RETRIES = 3 _RETRY_DELAYS = [1, 2, 4] # seconds between attempts @@ -17,42 +21,80 @@ _provider = GeminiProvider() def process_document(content: str) -> tuple[str, bool]: """Scan content for BLIGHT triggers and process each one. + Inline triggers (BLIGHT:) are processed first in document order, each + replacing only the trigger line. Document-scope triggers (BLIGHT::) are + processed next in document order, each replacing the entire file content + and operating on the result of the previous. + Returns: (updated_content, changed) where changed is True if any triggers were found and the content was modified. """ - triggers = list(TRIGGER_PATTERN.finditer(content)) - if not triggers: + has_inline = bool(INLINE_PATTERN.search(content)) + has_document = bool(DOCUMENT_PATTERN.search(content)) + if not has_inline and not has_document: return content, False - # Process triggers one by one. After each replacement the string length - # may change, so we re-search on the updated content each iteration. changed = False - for _ in range(len(triggers)): - match = TRIGGER_PATTERN.search(content) + + # --- Pass 1: inline triggers --- + # Re-search after each replacement since string length may change. + inline_count = len(INLINE_PATTERN.findall(content)) + for _ in range(inline_count): + match = INLINE_PATTERN.search(content) if not match: break instruction = match.group(1).strip() - trigger_line = match.group(0) - logger.info("Processing trigger: %s", instruction) + logger.info("Processing inline trigger: %s", instruction) - replacement = _call_with_retry(content, instruction) + replacement = _call_with_retry(content, instruction, document_scope=False) content = content[:match.start()] + replacement + content[match.end():] changed = True + # --- Pass 2: document-scope triggers --- + # Each trigger operates on the result of the previous. + doc_count = len(DOCUMENT_PATTERN.findall(content)) + for _ in range(doc_count): + match = DOCUMENT_PATTERN.search(content) + if not match: + break + + instruction = match.group(1).strip() + logger.info("Processing document-scope trigger: %s", instruction) + + # Remove the trigger line before passing to AI so it doesn't appear + # in the rewritten document. Also consume the trailing newline that + # follows the trigger line, if present. + trigger_start, trigger_end = match.start(), match.end() + if trigger_end < len(content) and content[trigger_end] == "\n": + trigger_end += 1 + content_without_trigger = content[:trigger_start] + content[trigger_end:] + + result = _call_with_retry(content_without_trigger, instruction, document_scope=True) + + if result.startswith("