feat: add 4 new voice effects (echo, robot, chorus, tremolo)

- Removed MAX_ACTIVE_EFFECTS limit (effects unlimited)
- Added echo effect (0-100%): spatial delay/reverb
- Added robot effect (0-100%): ring modulation voice
- Added chorus effect (0-100%): multiple voices effect
- Added tremolo depth (0.0-1.0) and rate (0.0-10.0 Hz): amplitude modulation
- Effects apply in order: pitch → speed → echo → chorus → tremolo → robot
- Updated /effects command with all 7 effect choices
- Updated /effects list to display all 7 effects with emojis
- Updated warning system: warns when > 2 active effects
- Added validation and formatting for all new effects
- Updated voice_manager.py to handle all 7 effect storage/loading

Note: Cancel button for processing >10s not yet implemented
Note: Queue system needs updating to handle all effect parameters
This commit is contained in:
2026-01-31 17:10:19 -06:00
parent 8d4ac59f73
commit 795d5087e9
3 changed files with 306 additions and 49 deletions

View File

@@ -10,9 +10,10 @@ import numpy as np
class AudioEffects:
"""Apply post-processing effects to TTS audio."""
MAX_ACTIVE_EFFECTS = 2
# No limit on effects, but warnings shown when > 2 active
MAX_ACTIVE_EFFECTS = None
# Effect ranges
# Effect ranges and defaults
PITCH_MIN = -12
PITCH_MAX = 12
PITCH_DEFAULT = 0
@@ -21,6 +22,26 @@ class AudioEffects:
SPEED_MAX = 2.0
SPEED_DEFAULT = 1.0
ECHO_MIN = 0
ECHO_MAX = 100
ECHO_DEFAULT = 0
ROBOT_MIN = 0
ROBOT_MAX = 100
ROBOT_DEFAULT = 0
CHORUS_MIN = 0
CHORUS_MAX = 100
CHORUS_DEFAULT = 0
TREMOLO_DEPTH_MIN = 0.0
TREMOLO_DEPTH_MAX = 1.0
TREMOLO_DEPTH_DEFAULT = 0.0
TREMOLO_RATE_MIN = 0.0
TREMOLO_RATE_MAX = 10.0
TREMOLO_RATE_DEFAULT = 0.0
@classmethod
def apply_effects(
cls,
@@ -28,15 +49,25 @@ class AudioEffects:
sr: int,
pitch: int = PITCH_DEFAULT,
speed: float = SPEED_DEFAULT,
echo: int = ECHO_DEFAULT,
robot: int = ROBOT_DEFAULT,
chorus: int = CHORUS_DEFAULT,
tremolo_depth: float = TREMOLO_DEPTH_DEFAULT,
tremolo_rate: float = TREMOLO_RATE_DEFAULT,
) -> tuple[np.ndarray, bool]:
"""
Apply effects to audio.
Apply effects to audio in order: pitch → speed → echo → chorus → tremolo → robot
Args:
audio: Input audio array
audio: Input audio array (1D)
sr: Sample rate
pitch: Pitch shift in semitones (-12 to +12, 0 = no shift)
speed: Speed multiplier (0.5 to 2.0, 1.0 = normal)
echo: Echo intensity (0-100, 0 = no echo)
robot: Robot voice intensity (0-100, 0 = no robot)
chorus: Chorus intensity (0-100, 0 = no chorus)
tremolo_depth: Tremolo depth (0.0-1.0, 0.0 = no tremolo)
tremolo_rate: Tremolo rate in Hz (0.0-10.0)
Returns:
Tuple of (processed_audio, show_processing_message)
@@ -48,24 +79,43 @@ class AudioEffects:
# Validate inputs
pitch = max(cls.PITCH_MIN, min(cls.PITCH_MAX, pitch))
speed = max(cls.SPEED_MIN, min(cls.SPEED_MAX, speed))
echo = max(cls.ECHO_MIN, min(cls.ECHO_MAX, echo))
robot = max(cls.ROBOT_MIN, min(cls.ROBOT_MAX, robot))
chorus = max(cls.CHORUS_MIN, min(cls.CHORUS_MAX, chorus))
tremolo_depth = max(cls.TREMOLO_DEPTH_MIN, min(cls.TREMOLO_DEPTH_MAX, tremolo_depth))
tremolo_rate = max(cls.TREMOLO_RATE_MIN, min(cls.TREMOLO_RATE_MAX, tremolo_rate))
print(f"Applying effects - Pitch: {pitch:+d}, Speed: {speed:.1f}x")
# Apply pitch shift first (if not default)
# Apply pitch shift first
if pitch != cls.PITCH_DEFAULT:
print(f" Applying pitch shift: {pitch:+d} semitones...")
audio = librosa.effects.pitch_shift(
audio, sr=sr, n_steps=pitch, bins_per_octave=12
)
# Apply speed change second (if not default)
# Apply speed change second
if speed != cls.SPEED_DEFAULT:
print(f" Applying speed change: {speed:.1f}x...")
audio = librosa.effects.time_stretch(audio, rate=speed)
# Stretching changes length, so we need to resample to maintain duration
# Actually, for TTS we want the new speed, so we don't resample back
# The audio will be shorter or longer based on speed
# Apply echo third
if echo > 0:
print(f" Applying echo: {echo}%...")
audio = cls._apply_echo(audio, sr, echo)
# Apply chorus fourth
if chorus > 0:
print(f" Applying chorus: {chorus}%...")
audio = cls._apply_chorus(audio, sr, chorus)
# Apply tremolo fifth
if tremolo_depth > 0 and tremolo_rate > 0:
print(f" Applying tremolo: depth={tremolo_depth:.1f}, rate={tremolo_rate:.1f}Hz...")
audio = cls._apply_tremolo(audio, sr, tremolo_depth, tremolo_rate)
# Apply robot voice last
if robot > 0:
print(f" Applying robot effect: {robot}%...")
audio = cls._apply_robot(audio, sr, robot)
processing_time = time.time() - start_time
print(f" Effects applied in {processing_time:.2f}s")
@@ -75,6 +125,110 @@ class AudioEffects:
return audio, show_message
@classmethod
def _apply_echo(cls, audio: np.ndarray, sr: int, intensity: int) -> np.ndarray:
"""Apply simple echo/reverb effect."""
if intensity == 0:
return audio
# Calculate delay in samples (50-300ms based on intensity)
delay_ms = 50 + (intensity / 100) * 250
delay_samples = int((delay_ms / 1000) * sr)
# Create output array
output = np.copy(audio)
# Add delayed copy with decay
decay = 0.3 + (intensity / 100) * 0.4 # 0.3-0.7 decay factor
if delay_samples < len(audio):
output[delay_samples:] += audio[:-delay_samples] * decay
# Normalize
max_val = np.max(np.abs(output))
if max_val > 0:
output = output / max_val * np.max(np.abs(audio))
return output
@classmethod
def _apply_chorus(cls, audio: np.ndarray, sr: int, intensity: int) -> np.ndarray:
"""Apply chorus effect using multiple delayed voices."""
if intensity == 0:
return audio
# Number of voices based on intensity (1-3)
num_voices = 1 + int((intensity / 100) * 2)
# Base delay (15-30ms)
base_delay_ms = 15 + (intensity / 100) * 15
base_delay_samples = int((base_delay_ms / 1000) * sr)
output = np.copy(audio) * 0.6 # Reduce original to make room for voices
for i in range(num_voices):
# Slight pitch variation for each voice (±3%)
pitch_var = 1.0 + (0.03 * (i - 1))
try:
voice = librosa.effects.time_stretch(audio, rate=pitch_var)
# Slight delay variation
delay_samples = base_delay_samples + int((i * 5 / 1000) * sr)
# Mix voice into output
voice_len = min(len(voice), len(output) - delay_samples)
if voice_len > 0:
output[delay_samples:delay_samples + voice_len] += voice[:voice_len] * 0.2
except Exception as e:
print(f" Warning: Chorus voice {i+1} failed: {e}")
# Normalize
max_val = np.max(np.abs(output))
if max_val > 0:
output = output / max_val * 0.95
return output
@classmethod
def _apply_tremolo(cls, audio: np.ndarray, sr: int, depth: float, rate: float) -> np.ndarray:
"""Apply tremolo effect (amplitude modulation)."""
if depth == 0 or rate == 0:
return audio
# Create modulation signal
duration = len(audio) / sr
t = np.linspace(0, duration, len(audio))
# Sine wave modulation at specified rate
modulation = 1.0 - depth * 0.5 * (1 - np.sin(2 * np.pi * rate * t))
return audio * modulation
@classmethod
def _apply_robot(cls, audio: np.ndarray, sr: int, intensity: int) -> np.ndarray:
"""Apply robot voice effect using ring modulation."""
if intensity == 0:
return audio
# Carrier frequency based on intensity (80-300 Hz)
carrier_freq = 80 + (intensity / 100) * 220
# Create carrier signal
duration = len(audio) / sr
t = np.linspace(0, duration, len(audio))
carrier = np.sin(2 * np.pi * carrier_freq * t)
# Mix original with ring-modulated version based on intensity
mix = intensity / 100
robot_signal = audio * carrier
output = audio * (1 - mix * 0.7) + robot_signal * mix * 0.7
# Normalize
max_val = np.max(np.abs(output))
if max_val > 0:
output = output / max_val * 0.95
return output
@classmethod
def validate_effect(cls, effect_name: str, value: Any) -> tuple[bool, str]:
"""
@@ -83,40 +237,47 @@ class AudioEffects:
Returns:
Tuple of (is_valid, error_message)
"""
if effect_name == "pitch":
try:
pitch = int(value)
if cls.PITCH_MIN <= pitch <= cls.PITCH_MAX:
return True, ""
return (
False,
f"Pitch must be between {cls.PITCH_MIN} and {cls.PITCH_MAX} semitones",
)
except (ValueError, TypeError):
return False, "Pitch must be a whole number"
validators = {
"pitch": (int, cls.PITCH_MIN, cls.PITCH_MAX, "Pitch must be a whole number", "semitones"),
"speed": (float, cls.SPEED_MIN, cls.SPEED_MAX, "Speed must be a number", "x"),
"echo": (int, cls.ECHO_MIN, cls.ECHO_MAX, "Echo must be a whole number", "%"),
"robot": (int, cls.ROBOT_MIN, cls.ROBOT_MAX, "Robot must be a whole number", "%"),
"chorus": (int, cls.CHORUS_MIN, cls.CHORUS_MAX, "Chorus must be a whole number", "%"),
"tremolo_depth": (float, cls.TREMOLO_DEPTH_MIN, cls.TREMOLO_DEPTH_MAX, "Tremolo depth must be a number", ""),
"tremolo_rate": (float, cls.TREMOLO_RATE_MIN, cls.TREMOLO_RATE_MAX, "Tremolo rate must be a number", "Hz"),
}
elif effect_name == "speed":
try:
speed = float(value)
if cls.SPEED_MIN <= speed <= cls.SPEED_MAX:
return True, ""
return (
False,
f"Speed must be between {cls.SPEED_MIN} and {cls.SPEED_MAX}",
)
except (ValueError, TypeError):
return False, "Speed must be a number"
if effect_name not in validators:
return False, f"Unknown effect: {effect_name}"
return False, f"Unknown effect: {effect_name}"
type_func, min_val, max_val, error_msg, unit = validators[effect_name]
try:
val = type_func(value)
if min_val <= val <= max_val:
return True, ""
unit_str = f" {unit}" if unit else ""
return False, f"{effect_name.replace('_', ' ').title()} must be between {min_val} and {max_val}{unit_str}"
except (ValueError, TypeError):
return False, error_msg
@classmethod
def count_active_effects(cls, pitch: int, speed: float) -> int:
def count_active_effects(cls, **effects) -> int:
"""Count how many effects are active (non-default)."""
count = 0
if pitch != cls.PITCH_DEFAULT:
if effects.get("pitch", cls.PITCH_DEFAULT) != cls.PITCH_DEFAULT:
count += 1
if speed != cls.SPEED_DEFAULT:
if effects.get("speed", cls.SPEED_DEFAULT) != cls.SPEED_DEFAULT:
count += 1
if effects.get("echo", cls.ECHO_DEFAULT) > cls.ECHO_DEFAULT:
count += 1
if effects.get("robot", cls.ROBOT_DEFAULT) > cls.ROBOT_DEFAULT:
count += 1
if effects.get("chorus", cls.CHORUS_DEFAULT) > cls.CHORUS_DEFAULT:
count += 1
if effects.get("tremolo_depth", cls.TREMOLO_DEPTH_DEFAULT) > cls.TREMOLO_DEPTH_DEFAULT:
count += 1
# tremolo_rate only counts if depth is also active
return count
@classmethod
@@ -125,6 +286,11 @@ class AudioEffects:
descriptions = {
"pitch": f"Changes voice pitch ({cls.PITCH_MIN} to {cls.PITCH_MAX} semitones). Positive = higher/chipmunk, Negative = lower/deeper.",
"speed": f"Changes speech speed ({cls.SPEED_MIN} to {cls.SPEED_MAX}x). Higher = faster, Lower = slower.",
"echo": f"Adds echo/reverb ({cls.ECHO_MIN} to {cls.ECHO_MAX}%). Higher = more pronounced echo.",
"robot": f"Applies robot voice effect ({cls.ROBOT_MIN} to {cls.ROBOT_MAX}%). Higher = more robotic.",
"chorus": f"Adds chorus effect ({cls.CHORUS_MIN} to {cls.CHORUS_MAX}%). Higher = more voices/depth.",
"tremolo_depth": f"Tremolo amplitude modulation ({cls.TREMOLO_DEPTH_MIN} to {cls.TREMOLO_DEPTH_MAX}). Higher = more warble.",
"tremolo_rate": f"Tremolo speed ({cls.TREMOLO_RATE_MIN} to {cls.TREMOLO_RATE_MAX} Hz). Higher = faster warble.",
}
return descriptions.get(effect_name, "Unknown effect")
@@ -143,4 +309,29 @@ class AudioEffects:
return "1.0x (normal)"
direction = "faster" if speed > 1.0 else "slower"
return f"{speed:.1f}x ({direction})"
elif effect_name == "echo":
echo = int(value)
if echo == 0:
return "0% (off)"
return f"{echo}%"
elif effect_name == "robot":
robot = int(value)
if robot == 0:
return "0% (off)"
return f"{robot}%"
elif effect_name == "chorus":
chorus = int(value)
if chorus == 0:
return "0% (off)"
return f"{chorus}%"
elif effect_name == "tremolo_depth":
depth = float(value)
if depth == 0.0:
return "0.0 (off)"
return f"{depth:.1f}"
elif effect_name == "tremolo_rate":
rate = float(value)
if rate == 0.0:
return "0.0 Hz (off)"
return f"{rate:.1f} Hz"
return str(value)

43
bot.py
View File

@@ -141,6 +141,11 @@ class TTSBot(commands.Bot):
@app_commands.choices(effect_name=[
app_commands.Choice(name="pitch", value="pitch"),
app_commands.Choice(name="speed", value="speed"),
app_commands.Choice(name="echo", value="echo"),
app_commands.Choice(name="robot", value="robot"),
app_commands.Choice(name="chorus", value="chorus"),
app_commands.Choice(name="tremolo_depth", value="tremolo_depth"),
app_commands.Choice(name="tremolo_rate", value="tremolo_rate"),
])
async def effects_command(
interaction: discord.Interaction,
@@ -174,12 +179,42 @@ class TTSBot(commands.Bot):
lines.append(f"⚡ **Speed**: {speed_val}")
lines.append(f" {speed_desc}\n")
# Echo
echo_desc = AudioEffects.get_effect_description("echo")
echo_val = AudioEffects.format_effect_value("echo", effects["echo"])
lines.append(f"🔊 **Echo**: {echo_val}")
lines.append(f" {echo_desc}\n")
# Robot
robot_desc = AudioEffects.get_effect_description("robot")
robot_val = AudioEffects.format_effect_value("robot", effects["robot"])
lines.append(f"🤖 **Robot**: {robot_val}")
lines.append(f" {robot_desc}\n")
# Chorus
chorus_desc = AudioEffects.get_effect_description("chorus")
chorus_val = AudioEffects.format_effect_value("chorus", effects["chorus"])
lines.append(f"🎶 **Chorus**: {chorus_val}")
lines.append(f" {chorus_desc}\n")
# Tremolo Depth
tremolo_depth_desc = AudioEffects.get_effect_description("tremolo_depth")
tremolo_depth_val = AudioEffects.format_effect_value("tremolo_depth", effects["tremolo_depth"])
lines.append(f"〰️ **Tremolo Depth**: {tremolo_depth_val}")
lines.append(f" {tremolo_depth_desc}\n")
# Tremolo Rate
tremolo_rate_desc = AudioEffects.get_effect_description("tremolo_rate")
tremolo_rate_val = AudioEffects.format_effect_value("tremolo_rate", effects["tremolo_rate"])
lines.append(f"📳 **Tremolo Rate**: {tremolo_rate_val}")
lines.append(f" {tremolo_rate_desc}\n")
# Active count warning
lines.append(f"**Active Effects**: {active_count}/{AudioEffects.MAX_ACTIVE_EFFECTS}")
if active_count >= AudioEffects.MAX_ACTIVE_EFFECTS:
lines.append("⚠️ Max effects reached. More effects = slower processing time.")
lines.append(f"**Active Effects**: {active_count}")
if active_count > 2:
lines.append("⚠️ You have more than 2 active effects. Processing may be slower!")
elif active_count > 0:
lines.append(f" You can add {AudioEffects.MAX_ACTIVE_EFFECTS - active_count} more effect(s).")
lines.append(" Add more effects for fun variations (may slow processing)")
lines.append(f"\n*Use `/effects set <effect> <value>` to change settings*")
lines.append(f"*Use `/effects reset` to clear all effects*")

View File

@@ -201,9 +201,20 @@ class VoiceManager:
# Convert to proper types (JSON stores them as strings)
pitch = effects.get("pitch", AudioEffects.PITCH_DEFAULT)
speed = effects.get("speed", AudioEffects.SPEED_DEFAULT)
echo = effects.get("echo", AudioEffects.ECHO_DEFAULT)
robot = effects.get("robot", AudioEffects.ROBOT_DEFAULT)
chorus = effects.get("chorus", AudioEffects.CHORUS_DEFAULT)
tremolo_depth = effects.get("tremolo_depth", AudioEffects.TREMOLO_DEPTH_DEFAULT)
tremolo_rate = effects.get("tremolo_rate", AudioEffects.TREMOLO_RATE_DEFAULT)
return {
"pitch": int(pitch) if pitch is not None else AudioEffects.PITCH_DEFAULT,
"speed": float(speed) if speed is not None else AudioEffects.SPEED_DEFAULT,
"echo": int(echo) if echo is not None else AudioEffects.ECHO_DEFAULT,
"robot": int(robot) if robot is not None else AudioEffects.ROBOT_DEFAULT,
"chorus": int(chorus) if chorus is not None else AudioEffects.CHORUS_DEFAULT,
"tremolo_depth": float(tremolo_depth) if tremolo_depth is not None else AudioEffects.TREMOLO_DEPTH_DEFAULT,
"tremolo_rate": float(tremolo_rate) if tremolo_rate is not None else AudioEffects.TREMOLO_RATE_DEFAULT,
}
def set_user_effect(self, user_id: int, effect_name: str, value: Any) -> tuple[bool, str]:
@@ -222,24 +233,37 @@ class VoiceManager:
if user_id not in self._user_effects:
self._user_effects[user_id] = {}
# Check if this would exceed max effects
# Save the effect
current_effects = self._user_effects[user_id].copy()
if effect_name == "pitch":
current_effects["pitch"] = int(value)
elif effect_name == "speed":
current_effects["speed"] = float(value)
elif effect_name == "echo":
current_effects["echo"] = int(value)
elif effect_name == "robot":
current_effects["robot"] = int(value)
elif effect_name == "chorus":
current_effects["chorus"] = int(value)
elif effect_name == "tremolo_depth":
current_effects["tremolo_depth"] = float(value)
elif effect_name == "tremolo_rate":
current_effects["tremolo_rate"] = float(value)
# Count active effects and show warning if > 2
active_count = AudioEffects.count_active_effects(
current_effects.get("pitch", AudioEffects.PITCH_DEFAULT),
current_effects.get("speed", AudioEffects.SPEED_DEFAULT),
pitch=current_effects.get("pitch", AudioEffects.PITCH_DEFAULT),
speed=current_effects.get("speed", AudioEffects.SPEED_DEFAULT),
echo=current_effects.get("echo", AudioEffects.ECHO_DEFAULT),
robot=current_effects.get("robot", AudioEffects.ROBOT_DEFAULT),
chorus=current_effects.get("chorus", AudioEffects.CHORUS_DEFAULT),
tremolo_depth=current_effects.get("tremolo_depth", AudioEffects.TREMOLO_DEPTH_DEFAULT),
)
# Save the effect
self._user_effects[user_id][effect_name] = value
self._save_preferences()
if active_count >= AudioEffects.MAX_ACTIVE_EFFECTS:
return True, f"Effect applied! ⚠️ You now have {active_count} active effects (max {AudioEffects.MAX_ACTIVE_EFFECTS}). More effects = slower processing."
if active_count > 2:
return True, f"Effect applied! ⚠️ You have {active_count} active effects. Performance may be slower with more effects."
else:
return True, "Effect applied successfully!"
@@ -252,7 +276,14 @@ class VoiceManager:
def count_active_effects(self, user_id: int) -> int:
"""Count how many effects are active for a user."""
effects = self.get_user_effects(user_id)
return AudioEffects.count_active_effects(effects["pitch"], effects["speed"])
return AudioEffects.count_active_effects(
pitch=effects["pitch"],
speed=effects["speed"],
echo=effects["echo"],
robot=effects["robot"],
chorus=effects["chorus"],
tremolo_depth=effects["tremolo_depth"],
)
def _load_preferences(self) -> None:
"""Load user voice preferences from JSON file."""