From 9917d44f5d0b7b1d8a411304cd05f76e2f04b280 Mon Sep 17 00:00:00 2001
From: Spencer <toamidan@gmail.com>
Date: Thu, 26 Feb 2026 15:56:09 -0600
Subject: [PATCH] docs: add HuggingFace cache troubleshooting to README

- Document HF_HOME environment variable for writable cache
- Add systemd service permission guidance for /tmp paths
- Troubleshooting steps for read-only file system errors
---
 .env.example                                  |   0
 .env.testing                                  |   3 +
 .gitignore                                    |   0
 README.md                                     |  21 +++
 audio_effects.py                              |   0
 audio_preprocessor.py                         |   0
 bot.py                                        |   0
 config.py                                     |   0
 launch.sh                                     |   4 +
 media/Subnautica/CyclopsEngineOff.oga         | Bin
 media/Subnautica/CyclopsEngineOn.oga          | Bin
 media/Subnautica/CyclopsOverheat.oga          | Bin
 media/Subnautica/Cyclops_Welcome.oga          | Bin
 media/Subnautica/Cyclops_Welcome2.oga         | Bin
 .../Ronin/diag_gs_titanRonin_embark_03.wav    | Bin
 .../Ronin/diag_gs_titanRonin_embark_05.wav    | Bin
 .../Ronin/diag_gs_titanRonin_embark_06.wav    | Bin
 .../Ronin/diag_gs_titanRonin_embark_08.wav    | Bin
 .../Ronin/diag_gs_titanRonin_embark_09.wav    | Bin
 .../Ronin/diag_gs_titanRonin_embark_10.wav    | Bin
 .../Ronin/diag_gs_titanRonin_embark_11.wav    | Bin
 numba_config.py                               |   0
 pockettts.service                             |   0
 requirements.txt                              |   0
 research/overview.md                          | 140 ++++++++++++++++++
 voice_manager.py                              |   0
 voices/ChoGath.wav                            | Bin
 voices/Estinien.wav                           | Bin
 voices/Gaius.wav                              | Bin
 voices/Gibralter_funny.wav                    | Bin
 voices/Gibralter_good.wav                     | Bin
 voices/HankHill.wav                           | Bin
 voices/Johnny.wav                             | Bin
 voices/MasterChief.wav                        | Bin
 voices/SelfHelpSingh.wav                      | Bin
 voices/Trump.wav                              | Bin
 36 files changed, 168 insertions(+)
 mode change 100644 => 100755 .env.example
 mode change 100644 => 100755 .env.testing
 mode change 100644 => 100755 .gitignore
 mode change 100644 => 100755 README.md
 mode change 100644 => 100755 audio_effects.py
 mode change 100644 => 100755 audio_preprocessor.py
 mode change 100644 => 100755 bot.py
 mode change 100644 => 100755 config.py
 create mode 100755 launch.sh
 mode change 100644 => 100755 media/Subnautica/CyclopsEngineOff.oga
 mode change 100644 => 100755 media/Subnautica/CyclopsEngineOn.oga
 mode change 100644 => 100755 media/Subnautica/CyclopsOverheat.oga
 mode change 100644 => 100755 media/Subnautica/Cyclops_Welcome.oga
 mode change 100644 => 100755 media/Subnautica/Cyclops_Welcome2.oga
 mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_03.wav
 mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_05.wav
 mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_06.wav
 mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_08.wav
 mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_09.wav
 mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_10.wav
 mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_11.wav
 mode change 100644 => 100755 numba_config.py
 mode change 100644 => 100755 pockettts.service
 mode change 100644 => 100755 requirements.txt
 create mode 100755 research/overview.md
 mode change 100644 => 100755 voice_manager.py
 mode change 100644 => 100755 voices/ChoGath.wav
 mode change 100644 => 100755 voices/Estinien.wav
 mode change 100644 => 100755 voices/Gaius.wav
 mode change 100644 => 100755 voices/Gibralter_funny.wav
 mode change 100644 => 100755 voices/Gibralter_good.wav
 mode change 100644 => 100755 voices/HankHill.wav
 mode change 100644 => 100755 voices/Johnny.wav
 mode change 100644 => 100755 voices/MasterChief.wav
 mode change 100644 => 100755 voices/SelfHelpSingh.wav
 mode change 100644 => 100755 voices/Trump.wav

diff --git a/.env.example b/.env.example
old mode 100644
new mode 100755
diff --git a/.env.testing b/.env.testing
old mode 100644
new mode 100755
index 668b268..63f1c83
--- a/.env.testing
+++ b/.env.testing
@@ -16,3 +16,6 @@ VOICES_DIR=./voices
 # Default voice name (optional - uses first found voice if not set)
 # This should match the filename without .wav extension (case-insensitive)
 # DEFAULT_VOICE=masterchief
+
+# HuggingFace cache directory (must be writable)
+HF_HOME=/tmp/huggingface
diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
index 67a6968..d28d8d0
--- a/README.md
+++ b/README.md
@@ -253,6 +253,27 @@ Test any combination of voice and effects before committing:
 - Ensure the reference audio is clear with minimal background noise
 - Try a longer reference clip (5-10 seconds)
 
+### HuggingFace cache read-only error
+If you see errors like `OSError: [Errno 30] Read-only file system` when the bot tries to download the TTS model:
+
+1. **Set a writable cache directory**: Add to your `.env` file:
+   ```env
+   HF_HOME=/tmp/huggingface
+   ```
+
+2. **Create and set permissions** on the directory:
+   ```bash
+   sudo mkdir /tmp/huggingface
+   sudo chown -R $USER:$USER /tmp/huggingface
+   ```
+
+3. **If using systemd service**: Ensure the service has write access to `/tmp` or the chosen cache directory. You may need to add `ReadWritePaths=/tmp/huggingface` to the service file or remove `ProtectHome=read-only`.
+
+4. **Restart the bot**:
+   ```bash
+   sudo systemctl restart vox.service
+   ```
+
 ## Linux Server Deployment
 
 To run the bot as a service on a Linux server:
diff --git a/audio_effects.py b/audio_effects.py
old mode 100644
new mode 100755
diff --git a/audio_preprocessor.py b/audio_preprocessor.py
old mode 100644
new mode 100755
diff --git a/bot.py b/bot.py
old mode 100644
new mode 100755
diff --git a/config.py b/config.py
old mode 100644
new mode 100755
diff --git a/launch.sh b/launch.sh
new file mode 100755
index 0000000..3ad6ea9
--- /dev/null
+++ b/launch.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+cd /home/artanis/Documents/Vox/
+source venv/bin/activate
+python bot.py
diff --git a/media/Subnautica/CyclopsEngineOff.oga b/media/Subnautica/CyclopsEngineOff.oga
old mode 100644
new mode 100755
diff --git a/media/Subnautica/CyclopsEngineOn.oga b/media/Subnautica/CyclopsEngineOn.oga
old mode 100644
new mode 100755
diff --git a/media/Subnautica/CyclopsOverheat.oga b/media/Subnautica/CyclopsOverheat.oga
old mode 100644
new mode 100755
diff --git a/media/Subnautica/Cyclops_Welcome.oga b/media/Subnautica/Cyclops_Welcome.oga
old mode 100644
new mode 100755
diff --git a/media/Subnautica/Cyclops_Welcome2.oga b/media/Subnautica/Cyclops_Welcome2.oga
old mode 100644
new mode 100755
diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_03.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_03.wav
old mode 100644
new mode 100755
diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_05.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_05.wav
old mode 100644
new mode 100755
diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_06.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_06.wav
old mode 100644
new mode 100755
diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_08.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_08.wav
old mode 100644
new mode 100755
diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_09.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_09.wav
old mode 100644
new mode 100755
diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_10.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_10.wav
old mode 100644
new mode 100755
diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_11.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_11.wav
old mode 100644
new mode 100755
diff --git a/numba_config.py b/numba_config.py
old mode 100644
new mode 100755
diff --git a/pockettts.service b/pockettts.service
old mode 100644
new mode 100755
diff --git a/requirements.txt b/requirements.txt
old mode 100644
new mode 100755
diff --git a/research/overview.md b/research/overview.md
new file mode 100755
index 0000000..73ffb8b
--- /dev/null
+++ b/research/overview.md
@@ -0,0 +1,140 @@
+# Vox - Discord Text-to-Speech Bot
+
+A Python-based Discord bot that generates neural text-to-speech using voice cloning from reference WAV files.
+
+## Project Structure
+
+```
+Vox/
+├── bot.py                 # Main entry point, Discord bot implementation
+├── config.py              # Configuration management using environment variables
+├── voice_manager.py       # Voice discovery, loading, and user preferences
+├── audio_effects.py       # Audio post-processing effects (7 effects)
+├── audio_preprocessor.py  # Audio preprocessing for voice cloning
+├── numba_config.py        # Numba JIT compiler cache configuration
+├── requirements.txt       # Python dependencies
+├── launch.sh              # Shell script to start the bot
+├── pockettts.service      # Systemd service file for Linux deployment
+├── README.md             # Comprehensive documentation
+├── .env                   # Production environment configuration
+├── .env.testing           # Testing environment configuration
+├── .env.example           # Environment configuration template
+└── voices/               # Directory for voice WAV files
+    ├── preferences.json  # User voice/effect preferences (auto-generated)
+    └── *.wav             # Voice reference files
+```
+
+## Core Functionality
+
+### TTS Implementation
+- **Engine**: Pocket TTS (`pocket-tts` library) for neural text-to-speech synthesis
+- **Voice Cloning**: Uses reference WAV files to clone voices via `model.get_state_for_audio_prompt()`
+- **On-demand Loading**: Voices are loaded only when first needed, then cached
+
+### Discord Integration
+- Monitors a configured text channel for messages
+- Joins the user's voice channel when they speak
+- Uses `discord.FFmpegPCMAudio` with piped WAV data for streaming
+
+### Audio Processing Pipeline
+```
+Text Message → Pocket TTS → Audio Effects → Normalize → FFmpeg → Discord VC
+```
+
+## Dependencies
+
+| Library | Purpose |
+|---------|---------|
+| `discord.py[voice]>=2.3.0` | Discord bot API with voice support |
+| `pocket-tts>=0.1.0` | Neural TTS engine with voice cloning |
+| `scipy>=1.10.0` | Scientific computing (audio I/O) |
+| `numpy>=1.24.0` | Numerical computing |
+| `librosa>=0.10.0` | Audio analysis and effects |
+| `noisereduce>=3.0.0` | Noise reduction preprocessing |
+| `soundfile>=0.12.0` | Audio file I/O |
+| `python-dotenv>=1.0.0` | Environment variable loading |
+
+**System Requirements**: Python 3.10+, FFmpeg
+
+## Key Modules
+
+### `TTSBot` (bot.py)
+Main Discord bot class that extends `commands.Bot`. Handles:
+- Message processing and TTS queue
+- Voice channel connections
+- Slash command registration
+- Startup initialization (loads TTS model, discovers voices)
+
+### `VoiceManager` (voice_manager.py)
+Manages voice files and user preferences:
+- Discovers voices from WAV files in `voices/` directory
+- On-demand voice loading with caching
+- Per-user voice selection and effect preferences
+- Preferences persistence to JSON
+
+### `AudioEffects` (audio_effects.py)
+Provides 7 post-processing effects:
+1. **Pitch** (-12 to +12 semitones)
+2. **Speed** (0.5x to 2.0x)
+3. **Echo** (0-100%)
+4. **Robot** (0-100%) - Ring modulation
+5. **Chorus** (0-100%) - Multiple voice layering
+6. **Tremolo Depth** (0.0-1.0)
+7. **Tremolo Rate** (0.0-10.0 Hz)
+
+### `AudioPreprocessor` (audio_preprocessor.py)
+Prepares voice reference files for cloning:
+1. Load and resample to 22050 Hz
+2. Normalize volume
+3. Trim silence
+4. Noise reduction
+5. Limit length (default 15 seconds)
+
+### `Config` (config.py)
+Centralized configuration management with environment-aware loading and validation.
+
+## Slash Commands
+
+| Command | Description |
+|---------|-------------|
+| `/voice list` | Show available voices |
+| `/voice set <name>` | Select your voice |
+| `/voice current` | Show current voice |
+| `/voice refresh` | Rescan for new voices |
+| `/voice preview <name>` | Preview before committing |
+| `/effects list` | Show your effect settings |
+| `/effects set <effect> <value>` | Adjust effects |
+| `/effects reset` | Reset to defaults |
+
+## Features
+
+- **Voice Cloning**: Add new voices by placing `.wav` files in `voices/` directory
+- **Per-User Customization**: Each user can have their own voice and effect preferences
+- **Hot-Reload**: Rescan for new voices without restart (`/voice refresh`)
+- **Message Queue**: Queues messages for sequential playback
+- **Inactivity Management**: Disconnects after 10 minutes of inactivity
+- **Testing Support**: Separate `.env.testing` configuration for safe development
+
+## Configuration (.env)
+
+```env
+DISCORD_TOKEN=your_bot_token
+TEXT_CHANNEL_ID=channel_id_to_monitor
+VOICES_DIR=./voices
+DEFAULT_VOICE=optional_default_voice_name
+```
+
+## Running the Bot
+
+```bash
+# Production
+python bot.py
+
+# Testing (uses .env.testing)
+python bot.py testing
+
+# Or use the launch script
+./launch.sh
+```
+
+For production deployment on Linux, a systemd service file (`pockettts.service`) is included.
diff --git a/voice_manager.py b/voice_manager.py
old mode 100644
new mode 100755
diff --git a/voices/ChoGath.wav b/voices/ChoGath.wav
old mode 100644
new mode 100755
diff --git a/voices/Estinien.wav b/voices/Estinien.wav
old mode 100644
new mode 100755
diff --git a/voices/Gaius.wav b/voices/Gaius.wav
old mode 100644
new mode 100755
diff --git a/voices/Gibralter_funny.wav b/voices/Gibralter_funny.wav
old mode 100644
new mode 100755
diff --git a/voices/Gibralter_good.wav b/voices/Gibralter_good.wav
old mode 100644
new mode 100755
diff --git a/voices/HankHill.wav b/voices/HankHill.wav
old mode 100644
new mode 100755
diff --git a/voices/Johnny.wav b/voices/Johnny.wav
old mode 100644
new mode 100755
diff --git a/voices/MasterChief.wav b/voices/MasterChief.wav
old mode 100644
new mode 100755
diff --git a/voices/SelfHelpSingh.wav b/voices/SelfHelpSingh.wav
old mode 100644
new mode 100755
diff --git a/voices/Trump.wav b/voices/Trump.wav
old mode 100644
new mode 100755