diff --git a/audio_effects.py b/audio_effects.py index aba9e51..bbf01df 100644 --- a/audio_effects.py +++ b/audio_effects.py @@ -10,9 +10,10 @@ import numpy as np class AudioEffects: """Apply post-processing effects to TTS audio.""" - MAX_ACTIVE_EFFECTS = 2 + # No limit on effects, but warnings shown when > 2 active + MAX_ACTIVE_EFFECTS = None - # Effect ranges + # Effect ranges and defaults PITCH_MIN = -12 PITCH_MAX = 12 PITCH_DEFAULT = 0 @@ -21,6 +22,26 @@ class AudioEffects: SPEED_MAX = 2.0 SPEED_DEFAULT = 1.0 + ECHO_MIN = 0 + ECHO_MAX = 100 + ECHO_DEFAULT = 0 + + ROBOT_MIN = 0 + ROBOT_MAX = 100 + ROBOT_DEFAULT = 0 + + CHORUS_MIN = 0 + CHORUS_MAX = 100 + CHORUS_DEFAULT = 0 + + TREMOLO_DEPTH_MIN = 0.0 + TREMOLO_DEPTH_MAX = 1.0 + TREMOLO_DEPTH_DEFAULT = 0.0 + + TREMOLO_RATE_MIN = 0.0 + TREMOLO_RATE_MAX = 10.0 + TREMOLO_RATE_DEFAULT = 0.0 + @classmethod def apply_effects( cls, @@ -28,15 +49,25 @@ class AudioEffects: sr: int, pitch: int = PITCH_DEFAULT, speed: float = SPEED_DEFAULT, + echo: int = ECHO_DEFAULT, + robot: int = ROBOT_DEFAULT, + chorus: int = CHORUS_DEFAULT, + tremolo_depth: float = TREMOLO_DEPTH_DEFAULT, + tremolo_rate: float = TREMOLO_RATE_DEFAULT, ) -> tuple[np.ndarray, bool]: """ - Apply effects to audio. + Apply effects to audio in order: pitch → speed → echo → chorus → tremolo → robot Args: - audio: Input audio array + audio: Input audio array (1D) sr: Sample rate pitch: Pitch shift in semitones (-12 to +12, 0 = no shift) speed: Speed multiplier (0.5 to 2.0, 1.0 = normal) + echo: Echo intensity (0-100, 0 = no echo) + robot: Robot voice intensity (0-100, 0 = no robot) + chorus: Chorus intensity (0-100, 0 = no chorus) + tremolo_depth: Tremolo depth (0.0-1.0, 0.0 = no tremolo) + tremolo_rate: Tremolo rate in Hz (0.0-10.0) Returns: Tuple of (processed_audio, show_processing_message) @@ -48,24 +79,43 @@ class AudioEffects: # Validate inputs pitch = max(cls.PITCH_MIN, min(cls.PITCH_MAX, pitch)) speed = max(cls.SPEED_MIN, min(cls.SPEED_MAX, speed)) + echo = max(cls.ECHO_MIN, min(cls.ECHO_MAX, echo)) + robot = max(cls.ROBOT_MIN, min(cls.ROBOT_MAX, robot)) + chorus = max(cls.CHORUS_MIN, min(cls.CHORUS_MAX, chorus)) + tremolo_depth = max(cls.TREMOLO_DEPTH_MIN, min(cls.TREMOLO_DEPTH_MAX, tremolo_depth)) + tremolo_rate = max(cls.TREMOLO_RATE_MIN, min(cls.TREMOLO_RATE_MAX, tremolo_rate)) - print(f"Applying effects - Pitch: {pitch:+d}, Speed: {speed:.1f}x") - - # Apply pitch shift first (if not default) + # Apply pitch shift first if pitch != cls.PITCH_DEFAULT: print(f" Applying pitch shift: {pitch:+d} semitones...") audio = librosa.effects.pitch_shift( audio, sr=sr, n_steps=pitch, bins_per_octave=12 ) - # Apply speed change second (if not default) + # Apply speed change second if speed != cls.SPEED_DEFAULT: print(f" Applying speed change: {speed:.1f}x...") audio = librosa.effects.time_stretch(audio, rate=speed) - # Stretching changes length, so we need to resample to maintain duration - # Actually, for TTS we want the new speed, so we don't resample back - # The audio will be shorter or longer based on speed + # Apply echo third + if echo > 0: + print(f" Applying echo: {echo}%...") + audio = cls._apply_echo(audio, sr, echo) + + # Apply chorus fourth + if chorus > 0: + print(f" Applying chorus: {chorus}%...") + audio = cls._apply_chorus(audio, sr, chorus) + + # Apply tremolo fifth + if tremolo_depth > 0 and tremolo_rate > 0: + print(f" Applying tremolo: depth={tremolo_depth:.1f}, rate={tremolo_rate:.1f}Hz...") + audio = cls._apply_tremolo(audio, sr, tremolo_depth, tremolo_rate) + + # Apply robot voice last + if robot > 0: + print(f" Applying robot effect: {robot}%...") + audio = cls._apply_robot(audio, sr, robot) processing_time = time.time() - start_time print(f" Effects applied in {processing_time:.2f}s") @@ -75,6 +125,110 @@ class AudioEffects: return audio, show_message + @classmethod + def _apply_echo(cls, audio: np.ndarray, sr: int, intensity: int) -> np.ndarray: + """Apply simple echo/reverb effect.""" + if intensity == 0: + return audio + + # Calculate delay in samples (50-300ms based on intensity) + delay_ms = 50 + (intensity / 100) * 250 + delay_samples = int((delay_ms / 1000) * sr) + + # Create output array + output = np.copy(audio) + + # Add delayed copy with decay + decay = 0.3 + (intensity / 100) * 0.4 # 0.3-0.7 decay factor + if delay_samples < len(audio): + output[delay_samples:] += audio[:-delay_samples] * decay + + # Normalize + max_val = np.max(np.abs(output)) + if max_val > 0: + output = output / max_val * np.max(np.abs(audio)) + + return output + + @classmethod + def _apply_chorus(cls, audio: np.ndarray, sr: int, intensity: int) -> np.ndarray: + """Apply chorus effect using multiple delayed voices.""" + if intensity == 0: + return audio + + # Number of voices based on intensity (1-3) + num_voices = 1 + int((intensity / 100) * 2) + + # Base delay (15-30ms) + base_delay_ms = 15 + (intensity / 100) * 15 + base_delay_samples = int((base_delay_ms / 1000) * sr) + + output = np.copy(audio) * 0.6 # Reduce original to make room for voices + + for i in range(num_voices): + # Slight pitch variation for each voice (±3%) + pitch_var = 1.0 + (0.03 * (i - 1)) + try: + voice = librosa.effects.time_stretch(audio, rate=pitch_var) + + # Slight delay variation + delay_samples = base_delay_samples + int((i * 5 / 1000) * sr) + + # Mix voice into output + voice_len = min(len(voice), len(output) - delay_samples) + if voice_len > 0: + output[delay_samples:delay_samples + voice_len] += voice[:voice_len] * 0.2 + except Exception as e: + print(f" Warning: Chorus voice {i+1} failed: {e}") + + # Normalize + max_val = np.max(np.abs(output)) + if max_val > 0: + output = output / max_val * 0.95 + + return output + + @classmethod + def _apply_tremolo(cls, audio: np.ndarray, sr: int, depth: float, rate: float) -> np.ndarray: + """Apply tremolo effect (amplitude modulation).""" + if depth == 0 or rate == 0: + return audio + + # Create modulation signal + duration = len(audio) / sr + t = np.linspace(0, duration, len(audio)) + + # Sine wave modulation at specified rate + modulation = 1.0 - depth * 0.5 * (1 - np.sin(2 * np.pi * rate * t)) + + return audio * modulation + + @classmethod + def _apply_robot(cls, audio: np.ndarray, sr: int, intensity: int) -> np.ndarray: + """Apply robot voice effect using ring modulation.""" + if intensity == 0: + return audio + + # Carrier frequency based on intensity (80-300 Hz) + carrier_freq = 80 + (intensity / 100) * 220 + + # Create carrier signal + duration = len(audio) / sr + t = np.linspace(0, duration, len(audio)) + carrier = np.sin(2 * np.pi * carrier_freq * t) + + # Mix original with ring-modulated version based on intensity + mix = intensity / 100 + robot_signal = audio * carrier + output = audio * (1 - mix * 0.7) + robot_signal * mix * 0.7 + + # Normalize + max_val = np.max(np.abs(output)) + if max_val > 0: + output = output / max_val * 0.95 + + return output + @classmethod def validate_effect(cls, effect_name: str, value: Any) -> tuple[bool, str]: """ @@ -83,40 +237,47 @@ class AudioEffects: Returns: Tuple of (is_valid, error_message) """ - if effect_name == "pitch": - try: - pitch = int(value) - if cls.PITCH_MIN <= pitch <= cls.PITCH_MAX: - return True, "" - return ( - False, - f"Pitch must be between {cls.PITCH_MIN} and {cls.PITCH_MAX} semitones", - ) - except (ValueError, TypeError): - return False, "Pitch must be a whole number" + validators = { + "pitch": (int, cls.PITCH_MIN, cls.PITCH_MAX, "Pitch must be a whole number", "semitones"), + "speed": (float, cls.SPEED_MIN, cls.SPEED_MAX, "Speed must be a number", "x"), + "echo": (int, cls.ECHO_MIN, cls.ECHO_MAX, "Echo must be a whole number", "%"), + "robot": (int, cls.ROBOT_MIN, cls.ROBOT_MAX, "Robot must be a whole number", "%"), + "chorus": (int, cls.CHORUS_MIN, cls.CHORUS_MAX, "Chorus must be a whole number", "%"), + "tremolo_depth": (float, cls.TREMOLO_DEPTH_MIN, cls.TREMOLO_DEPTH_MAX, "Tremolo depth must be a number", ""), + "tremolo_rate": (float, cls.TREMOLO_RATE_MIN, cls.TREMOLO_RATE_MAX, "Tremolo rate must be a number", "Hz"), + } - elif effect_name == "speed": - try: - speed = float(value) - if cls.SPEED_MIN <= speed <= cls.SPEED_MAX: - return True, "" - return ( - False, - f"Speed must be between {cls.SPEED_MIN} and {cls.SPEED_MAX}", - ) - except (ValueError, TypeError): - return False, "Speed must be a number" + if effect_name not in validators: + return False, f"Unknown effect: {effect_name}" - return False, f"Unknown effect: {effect_name}" + type_func, min_val, max_val, error_msg, unit = validators[effect_name] + + try: + val = type_func(value) + if min_val <= val <= max_val: + return True, "" + unit_str = f" {unit}" if unit else "" + return False, f"{effect_name.replace('_', ' ').title()} must be between {min_val} and {max_val}{unit_str}" + except (ValueError, TypeError): + return False, error_msg @classmethod - def count_active_effects(cls, pitch: int, speed: float) -> int: + def count_active_effects(cls, **effects) -> int: """Count how many effects are active (non-default).""" count = 0 - if pitch != cls.PITCH_DEFAULT: + if effects.get("pitch", cls.PITCH_DEFAULT) != cls.PITCH_DEFAULT: count += 1 - if speed != cls.SPEED_DEFAULT: + if effects.get("speed", cls.SPEED_DEFAULT) != cls.SPEED_DEFAULT: count += 1 + if effects.get("echo", cls.ECHO_DEFAULT) > cls.ECHO_DEFAULT: + count += 1 + if effects.get("robot", cls.ROBOT_DEFAULT) > cls.ROBOT_DEFAULT: + count += 1 + if effects.get("chorus", cls.CHORUS_DEFAULT) > cls.CHORUS_DEFAULT: + count += 1 + if effects.get("tremolo_depth", cls.TREMOLO_DEPTH_DEFAULT) > cls.TREMOLO_DEPTH_DEFAULT: + count += 1 + # tremolo_rate only counts if depth is also active return count @classmethod @@ -125,6 +286,11 @@ class AudioEffects: descriptions = { "pitch": f"Changes voice pitch ({cls.PITCH_MIN} to {cls.PITCH_MAX} semitones). Positive = higher/chipmunk, Negative = lower/deeper.", "speed": f"Changes speech speed ({cls.SPEED_MIN} to {cls.SPEED_MAX}x). Higher = faster, Lower = slower.", + "echo": f"Adds echo/reverb ({cls.ECHO_MIN} to {cls.ECHO_MAX}%). Higher = more pronounced echo.", + "robot": f"Applies robot voice effect ({cls.ROBOT_MIN} to {cls.ROBOT_MAX}%). Higher = more robotic.", + "chorus": f"Adds chorus effect ({cls.CHORUS_MIN} to {cls.CHORUS_MAX}%). Higher = more voices/depth.", + "tremolo_depth": f"Tremolo amplitude modulation ({cls.TREMOLO_DEPTH_MIN} to {cls.TREMOLO_DEPTH_MAX}). Higher = more warble.", + "tremolo_rate": f"Tremolo speed ({cls.TREMOLO_RATE_MIN} to {cls.TREMOLO_RATE_MAX} Hz). Higher = faster warble.", } return descriptions.get(effect_name, "Unknown effect") @@ -143,4 +309,29 @@ class AudioEffects: return "1.0x (normal)" direction = "faster" if speed > 1.0 else "slower" return f"{speed:.1f}x ({direction})" + elif effect_name == "echo": + echo = int(value) + if echo == 0: + return "0% (off)" + return f"{echo}%" + elif effect_name == "robot": + robot = int(value) + if robot == 0: + return "0% (off)" + return f"{robot}%" + elif effect_name == "chorus": + chorus = int(value) + if chorus == 0: + return "0% (off)" + return f"{chorus}%" + elif effect_name == "tremolo_depth": + depth = float(value) + if depth == 0.0: + return "0.0 (off)" + return f"{depth:.1f}" + elif effect_name == "tremolo_rate": + rate = float(value) + if rate == 0.0: + return "0.0 Hz (off)" + return f"{rate:.1f} Hz" return str(value) diff --git a/bot.py b/bot.py index c63b02b..23df3cd 100644 --- a/bot.py +++ b/bot.py @@ -141,6 +141,11 @@ class TTSBot(commands.Bot): @app_commands.choices(effect_name=[ app_commands.Choice(name="pitch", value="pitch"), app_commands.Choice(name="speed", value="speed"), + app_commands.Choice(name="echo", value="echo"), + app_commands.Choice(name="robot", value="robot"), + app_commands.Choice(name="chorus", value="chorus"), + app_commands.Choice(name="tremolo_depth", value="tremolo_depth"), + app_commands.Choice(name="tremolo_rate", value="tremolo_rate"), ]) async def effects_command( interaction: discord.Interaction, @@ -174,12 +179,42 @@ class TTSBot(commands.Bot): lines.append(f"⚡ **Speed**: {speed_val}") lines.append(f" {speed_desc}\n") + # Echo + echo_desc = AudioEffects.get_effect_description("echo") + echo_val = AudioEffects.format_effect_value("echo", effects["echo"]) + lines.append(f"🔊 **Echo**: {echo_val}") + lines.append(f" {echo_desc}\n") + + # Robot + robot_desc = AudioEffects.get_effect_description("robot") + robot_val = AudioEffects.format_effect_value("robot", effects["robot"]) + lines.append(f"🤖 **Robot**: {robot_val}") + lines.append(f" {robot_desc}\n") + + # Chorus + chorus_desc = AudioEffects.get_effect_description("chorus") + chorus_val = AudioEffects.format_effect_value("chorus", effects["chorus"]) + lines.append(f"🎶 **Chorus**: {chorus_val}") + lines.append(f" {chorus_desc}\n") + + # Tremolo Depth + tremolo_depth_desc = AudioEffects.get_effect_description("tremolo_depth") + tremolo_depth_val = AudioEffects.format_effect_value("tremolo_depth", effects["tremolo_depth"]) + lines.append(f"〰️ **Tremolo Depth**: {tremolo_depth_val}") + lines.append(f" {tremolo_depth_desc}\n") + + # Tremolo Rate + tremolo_rate_desc = AudioEffects.get_effect_description("tremolo_rate") + tremolo_rate_val = AudioEffects.format_effect_value("tremolo_rate", effects["tremolo_rate"]) + lines.append(f"📳 **Tremolo Rate**: {tremolo_rate_val}") + lines.append(f" {tremolo_rate_desc}\n") + # Active count warning - lines.append(f"**Active Effects**: {active_count}/{AudioEffects.MAX_ACTIVE_EFFECTS}") - if active_count >= AudioEffects.MAX_ACTIVE_EFFECTS: - lines.append("⚠️ Max effects reached. More effects = slower processing time.") + lines.append(f"**Active Effects**: {active_count}") + if active_count > 2: + lines.append("⚠️ You have more than 2 active effects. Processing may be slower!") elif active_count > 0: - lines.append(f"ℹ️ You can add {AudioEffects.MAX_ACTIVE_EFFECTS - active_count} more effect(s).") + lines.append("ℹ️ Add more effects for fun variations (may slow processing)") lines.append(f"\n*Use `/effects set ` to change settings*") lines.append(f"*Use `/effects reset` to clear all effects*") diff --git a/voice_manager.py b/voice_manager.py index 438f2b6..c2e7a6a 100644 --- a/voice_manager.py +++ b/voice_manager.py @@ -201,9 +201,20 @@ class VoiceManager: # Convert to proper types (JSON stores them as strings) pitch = effects.get("pitch", AudioEffects.PITCH_DEFAULT) speed = effects.get("speed", AudioEffects.SPEED_DEFAULT) + echo = effects.get("echo", AudioEffects.ECHO_DEFAULT) + robot = effects.get("robot", AudioEffects.ROBOT_DEFAULT) + chorus = effects.get("chorus", AudioEffects.CHORUS_DEFAULT) + tremolo_depth = effects.get("tremolo_depth", AudioEffects.TREMOLO_DEPTH_DEFAULT) + tremolo_rate = effects.get("tremolo_rate", AudioEffects.TREMOLO_RATE_DEFAULT) + return { "pitch": int(pitch) if pitch is not None else AudioEffects.PITCH_DEFAULT, "speed": float(speed) if speed is not None else AudioEffects.SPEED_DEFAULT, + "echo": int(echo) if echo is not None else AudioEffects.ECHO_DEFAULT, + "robot": int(robot) if robot is not None else AudioEffects.ROBOT_DEFAULT, + "chorus": int(chorus) if chorus is not None else AudioEffects.CHORUS_DEFAULT, + "tremolo_depth": float(tremolo_depth) if tremolo_depth is not None else AudioEffects.TREMOLO_DEPTH_DEFAULT, + "tremolo_rate": float(tremolo_rate) if tremolo_rate is not None else AudioEffects.TREMOLO_RATE_DEFAULT, } def set_user_effect(self, user_id: int, effect_name: str, value: Any) -> tuple[bool, str]: @@ -222,24 +233,37 @@ class VoiceManager: if user_id not in self._user_effects: self._user_effects[user_id] = {} - # Check if this would exceed max effects + # Save the effect current_effects = self._user_effects[user_id].copy() if effect_name == "pitch": current_effects["pitch"] = int(value) elif effect_name == "speed": current_effects["speed"] = float(value) + elif effect_name == "echo": + current_effects["echo"] = int(value) + elif effect_name == "robot": + current_effects["robot"] = int(value) + elif effect_name == "chorus": + current_effects["chorus"] = int(value) + elif effect_name == "tremolo_depth": + current_effects["tremolo_depth"] = float(value) + elif effect_name == "tremolo_rate": + current_effects["tremolo_rate"] = float(value) + # Count active effects and show warning if > 2 active_count = AudioEffects.count_active_effects( - current_effects.get("pitch", AudioEffects.PITCH_DEFAULT), - current_effects.get("speed", AudioEffects.SPEED_DEFAULT), + pitch=current_effects.get("pitch", AudioEffects.PITCH_DEFAULT), + speed=current_effects.get("speed", AudioEffects.SPEED_DEFAULT), + echo=current_effects.get("echo", AudioEffects.ECHO_DEFAULT), + robot=current_effects.get("robot", AudioEffects.ROBOT_DEFAULT), + chorus=current_effects.get("chorus", AudioEffects.CHORUS_DEFAULT), + tremolo_depth=current_effects.get("tremolo_depth", AudioEffects.TREMOLO_DEPTH_DEFAULT), ) - - # Save the effect self._user_effects[user_id][effect_name] = value self._save_preferences() - if active_count >= AudioEffects.MAX_ACTIVE_EFFECTS: - return True, f"Effect applied! ⚠️ You now have {active_count} active effects (max {AudioEffects.MAX_ACTIVE_EFFECTS}). More effects = slower processing." + if active_count > 2: + return True, f"Effect applied! ⚠️ You have {active_count} active effects. Performance may be slower with more effects." else: return True, "Effect applied successfully!" @@ -252,7 +276,14 @@ class VoiceManager: def count_active_effects(self, user_id: int) -> int: """Count how many effects are active for a user.""" effects = self.get_user_effects(user_id) - return AudioEffects.count_active_effects(effects["pitch"], effects["speed"]) + return AudioEffects.count_active_effects( + pitch=effects["pitch"], + speed=effects["speed"], + echo=effects["echo"], + robot=effects["robot"], + chorus=effects["chorus"], + tremolo_depth=effects["tremolo_depth"], + ) def _load_preferences(self) -> None: """Load user voice preferences from JSON file."""