From 9917d44f5d0b7b1d8a411304cd05f76e2f04b280 Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 26 Feb 2026 15:56:09 -0600 Subject: [PATCH] docs: add HuggingFace cache troubleshooting to README - Document HF_HOME environment variable for writable cache - Add systemd service permission guidance for /tmp paths - Troubleshooting steps for read-only file system errors --- .env.example | 0 .env.testing | 3 + .gitignore | 0 README.md | 21 +++ audio_effects.py | 0 audio_preprocessor.py | 0 bot.py | 0 config.py | 0 launch.sh | 4 + media/Subnautica/CyclopsEngineOff.oga | Bin media/Subnautica/CyclopsEngineOn.oga | Bin media/Subnautica/CyclopsOverheat.oga | Bin media/Subnautica/Cyclops_Welcome.oga | Bin media/Subnautica/Cyclops_Welcome2.oga | Bin .../Ronin/diag_gs_titanRonin_embark_03.wav | Bin .../Ronin/diag_gs_titanRonin_embark_05.wav | Bin .../Ronin/diag_gs_titanRonin_embark_06.wav | Bin .../Ronin/diag_gs_titanRonin_embark_08.wav | Bin .../Ronin/diag_gs_titanRonin_embark_09.wav | Bin .../Ronin/diag_gs_titanRonin_embark_10.wav | Bin .../Ronin/diag_gs_titanRonin_embark_11.wav | Bin numba_config.py | 0 pockettts.service | 0 requirements.txt | 0 research/overview.md | 140 ++++++++++++++++++ voice_manager.py | 0 voices/ChoGath.wav | Bin voices/Estinien.wav | Bin voices/Gaius.wav | Bin voices/Gibralter_funny.wav | Bin voices/Gibralter_good.wav | Bin voices/HankHill.wav | Bin voices/Johnny.wav | Bin voices/MasterChief.wav | Bin voices/SelfHelpSingh.wav | Bin voices/Trump.wav | Bin 36 files changed, 168 insertions(+) mode change 100644 => 100755 .env.example mode change 100644 => 100755 .env.testing mode change 100644 => 100755 .gitignore mode change 100644 => 100755 README.md mode change 100644 => 100755 audio_effects.py mode change 100644 => 100755 audio_preprocessor.py mode change 100644 => 100755 bot.py mode change 100644 => 100755 config.py create mode 100755 launch.sh mode change 100644 => 100755 media/Subnautica/CyclopsEngineOff.oga mode change 100644 => 100755 media/Subnautica/CyclopsEngineOn.oga mode change 100644 => 100755 media/Subnautica/CyclopsOverheat.oga mode change 100644 => 100755 media/Subnautica/Cyclops_Welcome.oga mode change 100644 => 100755 media/Subnautica/Cyclops_Welcome2.oga mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_03.wav mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_05.wav mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_06.wav mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_08.wav mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_09.wav mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_10.wav mode change 100644 => 100755 media/TF2/Ronin/diag_gs_titanRonin_embark_11.wav mode change 100644 => 100755 numba_config.py mode change 100644 => 100755 pockettts.service mode change 100644 => 100755 requirements.txt create mode 100755 research/overview.md mode change 100644 => 100755 voice_manager.py mode change 100644 => 100755 voices/ChoGath.wav mode change 100644 => 100755 voices/Estinien.wav mode change 100644 => 100755 voices/Gaius.wav mode change 100644 => 100755 voices/Gibralter_funny.wav mode change 100644 => 100755 voices/Gibralter_good.wav mode change 100644 => 100755 voices/HankHill.wav mode change 100644 => 100755 voices/Johnny.wav mode change 100644 => 100755 voices/MasterChief.wav mode change 100644 => 100755 voices/SelfHelpSingh.wav mode change 100644 => 100755 voices/Trump.wav diff --git a/.env.example b/.env.example old mode 100644 new mode 100755 diff --git a/.env.testing b/.env.testing old mode 100644 new mode 100755 index 668b268..63f1c83 --- a/.env.testing +++ b/.env.testing @@ -16,3 +16,6 @@ VOICES_DIR=./voices # Default voice name (optional - uses first found voice if not set) # This should match the filename without .wav extension (case-insensitive) # DEFAULT_VOICE=masterchief + +# HuggingFace cache directory (must be writable) +HF_HOME=/tmp/huggingface diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 67a6968..d28d8d0 --- a/README.md +++ b/README.md @@ -253,6 +253,27 @@ Test any combination of voice and effects before committing: - Ensure the reference audio is clear with minimal background noise - Try a longer reference clip (5-10 seconds) +### HuggingFace cache read-only error +If you see errors like `OSError: [Errno 30] Read-only file system` when the bot tries to download the TTS model: + +1. **Set a writable cache directory**: Add to your `.env` file: + ```env + HF_HOME=/tmp/huggingface + ``` + +2. **Create and set permissions** on the directory: + ```bash + sudo mkdir /tmp/huggingface + sudo chown -R $USER:$USER /tmp/huggingface + ``` + +3. **If using systemd service**: Ensure the service has write access to `/tmp` or the chosen cache directory. You may need to add `ReadWritePaths=/tmp/huggingface` to the service file or remove `ProtectHome=read-only`. + +4. **Restart the bot**: + ```bash + sudo systemctl restart vox.service + ``` + ## Linux Server Deployment To run the bot as a service on a Linux server: diff --git a/audio_effects.py b/audio_effects.py old mode 100644 new mode 100755 diff --git a/audio_preprocessor.py b/audio_preprocessor.py old mode 100644 new mode 100755 diff --git a/bot.py b/bot.py old mode 100644 new mode 100755 diff --git a/config.py b/config.py old mode 100644 new mode 100755 diff --git a/launch.sh b/launch.sh new file mode 100755 index 0000000..3ad6ea9 --- /dev/null +++ b/launch.sh @@ -0,0 +1,4 @@ +#!/bin/bash +cd /home/artanis/Documents/Vox/ +source venv/bin/activate +python bot.py diff --git a/media/Subnautica/CyclopsEngineOff.oga b/media/Subnautica/CyclopsEngineOff.oga old mode 100644 new mode 100755 diff --git a/media/Subnautica/CyclopsEngineOn.oga b/media/Subnautica/CyclopsEngineOn.oga old mode 100644 new mode 100755 diff --git a/media/Subnautica/CyclopsOverheat.oga b/media/Subnautica/CyclopsOverheat.oga old mode 100644 new mode 100755 diff --git a/media/Subnautica/Cyclops_Welcome.oga b/media/Subnautica/Cyclops_Welcome.oga old mode 100644 new mode 100755 diff --git a/media/Subnautica/Cyclops_Welcome2.oga b/media/Subnautica/Cyclops_Welcome2.oga old mode 100644 new mode 100755 diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_03.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_03.wav old mode 100644 new mode 100755 diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_05.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_05.wav old mode 100644 new mode 100755 diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_06.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_06.wav old mode 100644 new mode 100755 diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_08.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_08.wav old mode 100644 new mode 100755 diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_09.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_09.wav old mode 100644 new mode 100755 diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_10.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_10.wav old mode 100644 new mode 100755 diff --git a/media/TF2/Ronin/diag_gs_titanRonin_embark_11.wav b/media/TF2/Ronin/diag_gs_titanRonin_embark_11.wav old mode 100644 new mode 100755 diff --git a/numba_config.py b/numba_config.py old mode 100644 new mode 100755 diff --git a/pockettts.service b/pockettts.service old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/research/overview.md b/research/overview.md new file mode 100755 index 0000000..73ffb8b --- /dev/null +++ b/research/overview.md @@ -0,0 +1,140 @@ +# Vox - Discord Text-to-Speech Bot + +A Python-based Discord bot that generates neural text-to-speech using voice cloning from reference WAV files. + +## Project Structure + +``` +Vox/ +├── bot.py # Main entry point, Discord bot implementation +├── config.py # Configuration management using environment variables +├── voice_manager.py # Voice discovery, loading, and user preferences +├── audio_effects.py # Audio post-processing effects (7 effects) +├── audio_preprocessor.py # Audio preprocessing for voice cloning +├── numba_config.py # Numba JIT compiler cache configuration +├── requirements.txt # Python dependencies +├── launch.sh # Shell script to start the bot +├── pockettts.service # Systemd service file for Linux deployment +├── README.md # Comprehensive documentation +├── .env # Production environment configuration +├── .env.testing # Testing environment configuration +├── .env.example # Environment configuration template +└── voices/ # Directory for voice WAV files + ├── preferences.json # User voice/effect preferences (auto-generated) + └── *.wav # Voice reference files +``` + +## Core Functionality + +### TTS Implementation +- **Engine**: Pocket TTS (`pocket-tts` library) for neural text-to-speech synthesis +- **Voice Cloning**: Uses reference WAV files to clone voices via `model.get_state_for_audio_prompt()` +- **On-demand Loading**: Voices are loaded only when first needed, then cached + +### Discord Integration +- Monitors a configured text channel for messages +- Joins the user's voice channel when they speak +- Uses `discord.FFmpegPCMAudio` with piped WAV data for streaming + +### Audio Processing Pipeline +``` +Text Message → Pocket TTS → Audio Effects → Normalize → FFmpeg → Discord VC +``` + +## Dependencies + +| Library | Purpose | +|---------|---------| +| `discord.py[voice]>=2.3.0` | Discord bot API with voice support | +| `pocket-tts>=0.1.0` | Neural TTS engine with voice cloning | +| `scipy>=1.10.0` | Scientific computing (audio I/O) | +| `numpy>=1.24.0` | Numerical computing | +| `librosa>=0.10.0` | Audio analysis and effects | +| `noisereduce>=3.0.0` | Noise reduction preprocessing | +| `soundfile>=0.12.0` | Audio file I/O | +| `python-dotenv>=1.0.0` | Environment variable loading | + +**System Requirements**: Python 3.10+, FFmpeg + +## Key Modules + +### `TTSBot` (bot.py) +Main Discord bot class that extends `commands.Bot`. Handles: +- Message processing and TTS queue +- Voice channel connections +- Slash command registration +- Startup initialization (loads TTS model, discovers voices) + +### `VoiceManager` (voice_manager.py) +Manages voice files and user preferences: +- Discovers voices from WAV files in `voices/` directory +- On-demand voice loading with caching +- Per-user voice selection and effect preferences +- Preferences persistence to JSON + +### `AudioEffects` (audio_effects.py) +Provides 7 post-processing effects: +1. **Pitch** (-12 to +12 semitones) +2. **Speed** (0.5x to 2.0x) +3. **Echo** (0-100%) +4. **Robot** (0-100%) - Ring modulation +5. **Chorus** (0-100%) - Multiple voice layering +6. **Tremolo Depth** (0.0-1.0) +7. **Tremolo Rate** (0.0-10.0 Hz) + +### `AudioPreprocessor` (audio_preprocessor.py) +Prepares voice reference files for cloning: +1. Load and resample to 22050 Hz +2. Normalize volume +3. Trim silence +4. Noise reduction +5. Limit length (default 15 seconds) + +### `Config` (config.py) +Centralized configuration management with environment-aware loading and validation. + +## Slash Commands + +| Command | Description | +|---------|-------------| +| `/voice list` | Show available voices | +| `/voice set ` | Select your voice | +| `/voice current` | Show current voice | +| `/voice refresh` | Rescan for new voices | +| `/voice preview ` | Preview before committing | +| `/effects list` | Show your effect settings | +| `/effects set ` | Adjust effects | +| `/effects reset` | Reset to defaults | + +## Features + +- **Voice Cloning**: Add new voices by placing `.wav` files in `voices/` directory +- **Per-User Customization**: Each user can have their own voice and effect preferences +- **Hot-Reload**: Rescan for new voices without restart (`/voice refresh`) +- **Message Queue**: Queues messages for sequential playback +- **Inactivity Management**: Disconnects after 10 minutes of inactivity +- **Testing Support**: Separate `.env.testing` configuration for safe development + +## Configuration (.env) + +```env +DISCORD_TOKEN=your_bot_token +TEXT_CHANNEL_ID=channel_id_to_monitor +VOICES_DIR=./voices +DEFAULT_VOICE=optional_default_voice_name +``` + +## Running the Bot + +```bash +# Production +python bot.py + +# Testing (uses .env.testing) +python bot.py testing + +# Or use the launch script +./launch.sh +``` + +For production deployment on Linux, a systemd service file (`pockettts.service`) is included. diff --git a/voice_manager.py b/voice_manager.py old mode 100644 new mode 100755 diff --git a/voices/ChoGath.wav b/voices/ChoGath.wav old mode 100644 new mode 100755 diff --git a/voices/Estinien.wav b/voices/Estinien.wav old mode 100644 new mode 100755 diff --git a/voices/Gaius.wav b/voices/Gaius.wav old mode 100644 new mode 100755 diff --git a/voices/Gibralter_funny.wav b/voices/Gibralter_funny.wav old mode 100644 new mode 100755 diff --git a/voices/Gibralter_good.wav b/voices/Gibralter_good.wav old mode 100644 new mode 100755 diff --git a/voices/HankHill.wav b/voices/HankHill.wav old mode 100644 new mode 100755 diff --git a/voices/Johnny.wav b/voices/Johnny.wav old mode 100644 new mode 100755 diff --git a/voices/MasterChief.wav b/voices/MasterChief.wav old mode 100644 new mode 100755 diff --git a/voices/SelfHelpSingh.wav b/voices/SelfHelpSingh.wav old mode 100644 new mode 100755 diff --git a/voices/Trump.wav b/voices/Trump.wav old mode 100644 new mode 100755