feat: add audio effects (pitch and speed control)

- Added new audio_effects.py module with pitch shift and speed change - Pitch range: -12 to +12 semitones (higher = chipmunk, lower = deeper) - Speed range: 0.5 to 2.0x (higher = faster, lower = slower) - Maximum 2 active effects per user (performance optimization) - Added /effects command group: - /effects list - Shows current effects with descriptions - /effects set pitch|speed <value> - Apply effects - /effects reset - Confirmation UI to clear all effects - Effects persist across restarts in preferences.json - Updated /voice preview to support optional pitch/speed parameters - Effects applied in _generate_wav_bytes using librosa - Added performance warnings when processing takes >1 second - Updated README with effects documentation
2026-01-31 15:43:29 -06:00
parent 4a2d72517f
commit 9f14e8c745
4 changed files with 527 additions and 29 deletions
--- a/voice_manager.py
+++ b/voice_manager.py
@@ -6,6 +6,7 @@ from typing import Any

 from pocket_tts import TTSModel

+from audio_effects import AudioEffects
 from audio_preprocessor import (
    AudioPreprocessor,
    PreprocessingConfig,
@@ -26,6 +27,8 @@ class VoiceManager:
        self._voice_states: dict[str, Any] = {}
        # Per-user voice preferences: user_id -> voice_name
        self._user_voices: dict[int, str] = {}
+        # Per-user audio effects: user_id -> {"pitch": int, "speed": float}
+        self._user_effects: dict[int, dict[str, Any]] = {}
        # Available voices: voice_name -> file_path
        self._available_voices: dict[str, Path] = {}

@@ -179,12 +182,97 @@ class VoiceManager:
        try:
            # Ensure directory exists
            self.preferences_file.parent.mkdir(parents=True, exist_ok=True)
-            
+
            data = {
-                "user_voices": {str(k): v for k, v in self._user_voices.items()}
+                "user_voices": {str(k): v for k, v in self._user_voices.items()},
+                "user_effects": {str(k): v for k, v in self._user_effects.items()},
            }
-            
+
            with open(self.preferences_file, "w") as f:
                json.dump(data, f, indent=2)
        except Exception as e:
            print(f"Warning: Failed to save preferences: {e}")
+
+    # Effects management methods
+
+    def get_user_effects(self, user_id: int) -> dict[str, Any]:
+        """Get the audio effects for a user. Returns defaults if not set."""
+        effects = self._user_effects.get(user_id, {})
+        return {
+            "pitch": effects.get("pitch", AudioEffects.PITCH_DEFAULT),
+            "speed": effects.get("speed", AudioEffects.SPEED_DEFAULT),
+        }
+
+    def set_user_effect(self, user_id: int, effect_name: str, value: Any) -> tuple[bool, str]:
+        """
+        Set an audio effect for a user.
+
+        Returns:
+            Tuple of (success, message)
+        """
+        # Validate the effect
+        is_valid, error_msg = AudioEffects.validate_effect(effect_name, value)
+        if not is_valid:
+            return False, error_msg
+
+        # Get current effects
+        if user_id not in self._user_effects:
+            self._user_effects[user_id] = {}
+
+        # Check if this would exceed max effects
+        current_effects = self._user_effects[user_id].copy()
+        if effect_name == "pitch":
+            current_effects["pitch"] = int(value)
+        elif effect_name == "speed":
+            current_effects["speed"] = float(value)
+
+        active_count = AudioEffects.count_active_effects(
+            current_effects.get("pitch", AudioEffects.PITCH_DEFAULT),
+            current_effects.get("speed", AudioEffects.SPEED_DEFAULT),
+        )
+
+        # Save the effect
+        self._user_effects[user_id][effect_name] = value
+        self._save_preferences()
+
+        if active_count >= AudioEffects.MAX_ACTIVE_EFFECTS:
+            return True, f"Effect applied! ⚠️ You now have {active_count} active effects (max {AudioEffects.MAX_ACTIVE_EFFECTS}). More effects = slower processing."
+        else:
+            return True, "Effect applied successfully!"
+
+    def reset_user_effects(self, user_id: int) -> None:
+        """Reset all audio effects to defaults for a user."""
+        if user_id in self._user_effects:
+            del self._user_effects[user_id]
+            self._save_preferences()
+
+    def count_active_effects(self, user_id: int) -> int:
+        """Count how many effects are active for a user."""
+        effects = self.get_user_effects(user_id)
+        return AudioEffects.count_active_effects(effects["pitch"], effects["speed"])
+
+    def _load_preferences(self) -> None:
+        """Load user voice preferences from JSON file."""
+        if not self.preferences_file.exists():
+            return
+
+        try:
+            with open(self.preferences_file, "r") as f:
+                data = json.load(f)
+
+            # Load user preferences (convert string keys back to int)
+            for user_id_str, voice_name in data.get("user_voices", {}).items():
+                user_id = int(user_id_str)
+                # Only load if voice still exists
+                if voice_name.lower() in self._available_voices:
+                    self._user_voices[user_id] = voice_name.lower()
+
+            # Load user effects (convert string keys back to int)
+            for user_id_str, effects in data.get("user_effects", {}).items():
+                user_id = int(user_id_str)
+                self._user_effects[user_id] = effects
+
+            print(f"  Loaded {len(self._user_voices)} user voice preferences")
+            print(f"  Loaded {len(self._user_effects)} user effect preferences")
+        except Exception as e:
+            print(f"  Warning: Failed to load preferences: {e}")