From 4c9fecda167e64cb6e0d4e2d41523737b98da861 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 14 Mar 2026 20:17:31 -0500 Subject: [PATCH] Initial commit: BLIGHT: CUE Webhook listener that monitors Gitea repos for BLIGHT: triggers in markdown files, processes them via Gemini 2.5 Flash-Lite, and writes results back in-place. Co-Authored-By: Claude Sonnet 4.6 --- .env.example | 15 ++++ .gitignore | 4 + README.md | 185 +++++++++++++++++++++++++++++++++++++++++++++++ SpecSheet.md | 34 +++++++++ ai/__init__.py | 4 + ai/base.py | 21 ++++++ ai/gemini.py | 29 ++++++++ app.py | 96 ++++++++++++++++++++++++ config.py | 10 +++ gitea_client.py | 44 +++++++++++ processor.py | 77 ++++++++++++++++++++ requirements.txt | 4 + 12 files changed, 523 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100644 SpecSheet.md create mode 100644 ai/__init__.py create mode 100644 ai/base.py create mode 100644 ai/gemini.py create mode 100644 app.py create mode 100644 config.py create mode 100644 gitea_client.py create mode 100644 processor.py create mode 100644 requirements.txt diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e9a28fe --- /dev/null +++ b/.env.example @@ -0,0 +1,15 @@ +# Gitea instance base URL (no trailing slash) +GITEA_URL=https://gitea.bunny-wyvern.ts.net + +# Gitea personal access token (Settings → Applications → Generate Token) +# Needs read/write access to repository contents +GITEA_TOKEN=your_gitea_token_here + +# Google Gemini API key (https://aistudio.google.com/apikey) +GEMINI_API_KEY=your_gemini_api_key_here + +# Secret shared with the Gitea webhook (set this when registering the webhook in Gitea) +WEBHOOK_SECRET=your_webhook_secret_here + +# Port for the webhook listener to bind on (default: 5010) +WEBHOOK_PORT=5010 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69cac2e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.env +.venv/ +__pycache__/ +*.pyc diff --git a/README.md b/README.md new file mode 100644 index 0000000..015bf07 --- /dev/null +++ b/README.md @@ -0,0 +1,185 @@ +# BLIGHT: CUE + +A module in the **BLIGHT** ecosystem. + +BLIGHT: CUE monitors Gitea repositories containing markdown files. When a push is received, it scans changed files for `BLIGHT:` trigger lines, sends the surrounding document to an AI model along with the instruction, and writes the result back to the file in-place — fully automated. + +--- + +## How It Works + +1. You write a `BLIGHT:` trigger anywhere in a markdown file and push it to Gitea. +2. Gitea sends a webhook POST to this server. +3. The server fetches the file, finds all `BLIGHT:` triggers, and processes them one by one. +4. Each trigger is replaced with the AI's response at the exact position of the trigger line. +5. The updated file is committed back to the repo automatically. + +### Trigger Syntax + +``` +BLIGHT: +``` + +Examples: + +```markdown +This paragraph discusses は vs が in Japanese grammar. + +BLIGHT: Explain the key differences between は and が based on the paragraph above. + +## Next Section +``` + +```markdown +BLIGHT: Spell check this entire document and list any errors found. +``` + +```markdown +BLIGHT: Write a conclusion paragraph for this document. +``` + +### Failure Behavior + +If the AI call fails after 3 attempts, the trigger is replaced with: + +```html + +``` + +You can re-trigger processing by editing the file to restore the original `BLIGHT:` line and pushing again. + +--- + +## Prerequisites + +- Python 3.10+ +- Access to your Gitea instance (via Tailscale or local network) +- A [Google Gemini API key](https://aistudio.google.com/apikey) +- A Gitea personal access token with **repository read/write** permissions + +--- + +## Installation + +```bash +# 1. Clone this repo onto the machine that will run the listener +git clone +cd Blight_Reader + +# 2. Create and activate a virtual environment +python3 -m venv .venv +source .venv/bin/activate + +# 3. Install dependencies +pip install -r requirements.txt + +# 4. Copy the example env file and fill in your values +cp .env.example .env +``` + +--- + +## Configuration + +Edit `.env` with your values: + +| Variable | Description | +|---|---| +| `GITEA_URL` | Base URL of your Gitea instance, no trailing slash | +| `GITEA_TOKEN` | Personal access token from Gitea → Settings → Applications | +| `GEMINI_API_KEY` | API key from [Google AI Studio](https://aistudio.google.com/apikey) | +| `WEBHOOK_SECRET` | A secret string you choose — must match what you set in Gitea | +| `WEBHOOK_PORT` | Port the listener binds to (default: `5010`) | + +--- + +## Running + +```bash +python app.py +``` + +The server binds to `0.0.0.0:5010` (or your configured port). Keep it running as a service or in a screen/tmux session. + +For production use, consider running it with a process manager: + +```bash +# With systemd (example unit file) +# /etc/systemd/system/blight-cue.service +[Unit] +Description=BLIGHT: CUE webhook listener +After=network.target + +[Service] +WorkingDirectory=/path/to/Blight_Reader +ExecStart=/path/to/Blight_Reader/.venv/bin/python app.py +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +> Replace `/path/to/Blight_Reader` with the actual path. The venv Python is at `.venv/bin/python` relative to the project root — create it with `python3 -m venv .venv && .venv/bin/pip install -r requirements.txt`. + +--- + +## Registering the Webhook in Gitea + +Do this for **each repository** you want BLIGHT: CUE to watch. + +1. Open the repository in Gitea. +2. Go to **Settings** → **Webhooks** → **Add Webhook** → **Gitea**. +3. Set the fields: + - **Target URL**: `http://:5010/webhook` + - **HTTP Method**: POST + - **Content Type**: `application/json` + - **Secret**: the same value as `WEBHOOK_SECRET` in your `.env` + - **Trigger On**: Push events only +4. Click **Add Webhook**, then use **Test Delivery** to verify connectivity. + +--- + +## AI Provider + +BLIGHT: CUE currently uses **Google Gemini 2.5 Flash-Lite** — the most cost-effective stable Gemini model (~$0.10/$0.40 per million tokens input/output). + +### Adding a New Provider + +All AI providers implement the `AIProvider` abstract base class in `ai/base.py`: + +```python +from abc import ABC, abstractmethod + +class AIProvider(ABC): + def complete(self, document: str, instruction: str) -> str: + ... +``` + +To add a new provider (e.g. OpenRouter): + +1. Create `ai/openrouter.py` and implement `AIProvider`. +2. In `processor.py`, replace `GeminiProvider()` with your new class. + +--- + +## Project Structure + +``` +Blight_Reader/ +├── app.py # Flask webhook server +├── processor.py # Trigger scanning and replacement logic +├── gitea_client.py # Gitea REST API wrapper +├── config.py # Environment config loader +├── ai/ +│ ├── base.py # AIProvider abstract base class +│ └── gemini.py # Gemini 2.5 Flash-Lite implementation +├── requirements.txt +├── .env.example +└── README.md +``` + +--- + +## Part of the BLIGHT Ecosystem + +BLIGHT: CUE is one module in a larger modular system called **BLIGHT**. Each module is independently deployable and communicates through Gitea repositories as the shared data layer. diff --git a/SpecSheet.md b/SpecSheet.md new file mode 100644 index 0000000..c73ef32 --- /dev/null +++ b/SpecSheet.md @@ -0,0 +1,34 @@ +# Name: + +TBD. Will need help thinking of a name. + +# Objective: + +Create a script/tool/project that can monitor a gitea repo full of markdown files, look for a specific text trigger, and then follow the instructions listed after that trigger. + +For example, lets say the trigger text is "BLIGHT: " + +Example triggers could be: + +"BLIGHT: Explain to the differences of the two grammer points mentioned in the above paragraph." + +"BLIGHT: Please format this document" + +"BLIGHT: Can you spell check this page" + +"BLIGHT: Look at the rest of the doc and write a conclusion paragraph please" + +I'm currently thinking to use Gemini with an API key, but if openrouter seems like the better (or cheaper) option, we can go with that. + +# Process + +1. Script checks for text trigger +2. Script process instructions after trigger +3. Script carries out instructions after trigger +4. Script replaces the trigger and instructions with result of aforementioned instructions. + +# Additional Notes + +- As mentioned, the files that need to be watched are currently on a gitea server. We can pull them, change them, then push them, or if there's some better way, I'm open to suggestions. + +- This will be part of a much bigger ecosystem, simply known as "BLIGHT". diff --git a/ai/__init__.py b/ai/__init__.py new file mode 100644 index 0000000..16af1c6 --- /dev/null +++ b/ai/__init__.py @@ -0,0 +1,4 @@ +from .base import AIProvider +from .gemini import GeminiProvider + +__all__ = ["AIProvider", "GeminiProvider"] diff --git a/ai/base.py b/ai/base.py new file mode 100644 index 0000000..311ea1e --- /dev/null +++ b/ai/base.py @@ -0,0 +1,21 @@ +from abc import ABC, abstractmethod + + +class AIProvider(ABC): + """Base class for all AI provider implementations. + + To add a new provider, subclass this and implement `complete`, then + instantiate your provider in `processor.py` instead of GeminiProvider. + """ + + @abstractmethod + def complete(self, document: str, instruction: str) -> str: + """Process an instruction in the context of a full document. + + Args: + document: The full markdown document text (for context). + instruction: The BLIGHT instruction extracted from the trigger line. + + Returns: + The text to insert in place of the trigger line. + """ diff --git a/ai/gemini.py b/ai/gemini.py new file mode 100644 index 0000000..2bb2b62 --- /dev/null +++ b/ai/gemini.py @@ -0,0 +1,29 @@ +import google.generativeai as genai +import config +from .base import AIProvider + +_SYSTEM_PROMPT = ( + "You are an inline document assistant. " + "The user will provide a markdown document and a specific instruction. " + "Your response must contain ONLY the text to be inserted into the document — " + "no preamble, no explanation, no meta-commentary, no markdown code fences unless " + "the instruction specifically asks for them. " + "Respond as if your output will be dropped directly into the middle of a document." +) + + +class GeminiProvider(AIProvider): + def __init__(self) -> None: + genai.configure(api_key=config.GEMINI_API_KEY) + self._model = genai.GenerativeModel( + model_name="gemini-2.5-flash-lite", + system_instruction=_SYSTEM_PROMPT, + ) + + def complete(self, document: str, instruction: str) -> str: + prompt = ( + f"DOCUMENT:\n\n{document}\n\n" + f"INSTRUCTION: {instruction}" + ) + response = self._model.generate_content(prompt) + return response.text.strip() diff --git a/app.py b/app.py new file mode 100644 index 0000000..889e828 --- /dev/null +++ b/app.py @@ -0,0 +1,96 @@ +import hashlib +import hmac +import json +import logging +import threading +from flask import Flask, request, abort + +import config +import gitea_client +import processor + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) +logger = logging.getLogger(__name__) + +app = Flask(__name__) + + +def _verify_signature(payload: bytes, signature_header: str | None) -> bool: + """Validate the Gitea webhook HMAC-SHA256 signature.""" + if not signature_header: + return False + try: + scheme, provided_digest = signature_header.split("=", 1) + except ValueError: + return False + if scheme != "sha256": + return False + expected = hmac.new( + config.WEBHOOK_SECRET.encode(), payload, hashlib.sha256 + ).hexdigest() + return hmac.compare_digest(expected, provided_digest) + + +def _handle_push(owner: str, repo: str, changed_files: list[str]) -> None: + """Process all changed markdown files in a push event.""" + for file_path in changed_files: + if not file_path.endswith(".md"): + continue + logger.info("Checking %s/%s: %s", owner, repo, file_path) + try: + content, sha = gitea_client.get_file(owner, repo, file_path) + updated, changed = processor.process_document(content) + if changed: + gitea_client.update_file(owner, repo, file_path, updated, sha) + logger.info("Updated %s", file_path) + else: + logger.info("No BLIGHT triggers found in %s", file_path) + except Exception as exc: + logger.error("Failed processing %s: %s", file_path, exc) + + +@app.post("/webhook") +def webhook(): + payload = request.get_data() + + if not _verify_signature(payload, request.headers.get("X-Gitea-Signature")): + logger.warning("Rejected webhook: invalid signature") + abort(403) + + event = request.headers.get("X-Gitea-Event") + if event != "push": + return {"status": "ignored", "event": event}, 200 + + data = json.loads(payload) + owner = data["repository"]["owner"]["login"] + repo = data["repository"]["name"] + + # Collect unique file paths from all commits in the push + seen: set[str] = set() + changed_files: list[str] = [] + for commit in data.get("commits", []): + for path in commit.get("added", []) + commit.get("modified", []): + if path not in seen: + seen.add(path) + changed_files.append(path) + + if not changed_files: + return {"status": "no files"}, 200 + + # Process in background so we return 200 to Gitea immediately + thread = threading.Thread( + target=_handle_push, + args=(owner, repo, changed_files), + daemon=True, + ) + thread.start() + + return {"status": "processing", "files": len(changed_files)}, 200 + + +if __name__ == "__main__": + logger.info("BLIGHT: CUE starting on port %d", config.WEBHOOK_PORT) + app.run(host="0.0.0.0", port=config.WEBHOOK_PORT) diff --git a/config.py b/config.py new file mode 100644 index 0000000..f89c688 --- /dev/null +++ b/config.py @@ -0,0 +1,10 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +GITEA_URL = os.environ["GITEA_URL"].rstrip("/") +GITEA_TOKEN = os.environ["GITEA_TOKEN"] +GEMINI_API_KEY = os.environ["GEMINI_API_KEY"] +WEBHOOK_SECRET = os.environ["WEBHOOK_SECRET"] +WEBHOOK_PORT = int(os.environ.get("WEBHOOK_PORT", 5010)) diff --git a/gitea_client.py b/gitea_client.py new file mode 100644 index 0000000..a9a1993 --- /dev/null +++ b/gitea_client.py @@ -0,0 +1,44 @@ +import base64 +import requests +import config + + +def _headers() -> dict: + return { + "Authorization": f"token {config.GITEA_TOKEN}", + "Content-Type": "application/json", + } + + +def get_file(owner: str, repo: str, path: str) -> tuple[str, str]: + """Fetch a file's decoded content and its SHA from Gitea. + + Returns: + (content, sha) where content is the decoded UTF-8 string and sha is + required for the subsequent update call. + """ + url = f"{config.GITEA_URL}/api/v1/repos/{owner}/{repo}/contents/{path}" + response = requests.get(url, headers=_headers(), timeout=30) + response.raise_for_status() + data = response.json() + content = base64.b64decode(data["content"]).decode("utf-8") + return content, data["sha"] + + +def update_file( + owner: str, + repo: str, + path: str, + content: str, + sha: str, + commit_message: str = "BLIGHT: process triggers", +) -> None: + """Write updated file content back to Gitea.""" + url = f"{config.GITEA_URL}/api/v1/repos/{owner}/{repo}/contents/{path}" + payload = { + "message": commit_message, + "content": base64.b64encode(content.encode("utf-8")).decode("ascii"), + "sha": sha, + } + response = requests.put(url, headers=_headers(), json=payload, timeout=30) + response.raise_for_status() diff --git a/processor.py b/processor.py new file mode 100644 index 0000000..9360e48 --- /dev/null +++ b/processor.py @@ -0,0 +1,77 @@ +import re +import time +import logging +from ai import GeminiProvider + +logger = logging.getLogger(__name__) + +TRIGGER_PATTERN = re.compile(r"^BLIGHT:\s+(.+)$", re.MULTILINE) +FAILED_TEMPLATE = "" + +_MAX_RETRIES = 3 +_RETRY_DELAYS = [1, 2, 4] # seconds between attempts + +_provider = GeminiProvider() + + +def process_document(content: str) -> tuple[str, bool]: + """Scan content for BLIGHT triggers and process each one. + + Returns: + (updated_content, changed) where changed is True if any triggers + were found and the content was modified. + """ + triggers = list(TRIGGER_PATTERN.finditer(content)) + if not triggers: + return content, False + + # Process triggers one by one. After each replacement the string length + # may change, so we re-search on the updated content each iteration. + changed = False + for _ in range(len(triggers)): + match = TRIGGER_PATTERN.search(content) + if not match: + break + + instruction = match.group(1).strip() + trigger_line = match.group(0) + logger.info("Processing trigger: %s", instruction) + + replacement = _call_with_retry(content, instruction) + content = content[:match.start()] + replacement + content[match.end():] + changed = True + + return content, changed + + +def _call_with_retry(document: str, instruction: str) -> str: + """Call the AI provider with up to _MAX_RETRIES attempts. + + Returns the AI response on success, or a BLIGHT_FAILED comment on + exhausted retries. + """ + last_error: Exception | None = None + for attempt in range(_MAX_RETRIES): + try: + return _provider.complete(document, instruction) + except Exception as exc: + last_error = exc + if attempt < _MAX_RETRIES - 1: + delay = _RETRY_DELAYS[attempt] + logger.warning( + "Attempt %d/%d failed for instruction %r: %s — retrying in %ds", + attempt + 1, + _MAX_RETRIES, + instruction, + exc, + delay, + ) + time.sleep(delay) + + logger.error( + "All %d attempts failed for instruction %r: %s", + _MAX_RETRIES, + instruction, + last_error, + ) + return FAILED_TEMPLATE.format(instruction=instruction) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..db0eb09 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +flask>=3.0.0 +python-dotenv>=1.0.0 +requests>=2.31.0 +google-generativeai>=0.8.0