feat: wire up all effects to audio processing pipeline
- Updated queue system to pass effects as dict instead of individual params - Updated process_queue to handle effects_dict for previews - Updated speak_message to extract all 7 effects from user settings - Updated _generate_wav_bytes to accept effects dict and pass all params - Updated _handle_voice_preview to use new effects dict system - Effects now actually process the audio: - pitch, speed, echo, robot, chorus, tremolo_depth, tremolo_rate - Fixed preview effect description to use preview_effects dict
This commit is contained in:
74
bot.py
74
bot.py
@@ -514,18 +514,25 @@ class TTSBot(commands.Bot):
|
|||||||
|
|
||||||
# Use user's current effects if not overridden
|
# Use user's current effects if not overridden
|
||||||
user_effects = self.voice_manager.get_user_effects(interaction.user.id)
|
user_effects = self.voice_manager.get_user_effects(interaction.user.id)
|
||||||
final_pitch = preview_pitch if preview_pitch is not None else user_effects["pitch"]
|
effect_overrides = {}
|
||||||
final_speed = preview_speed if preview_speed is not None else user_effects["speed"]
|
if preview_pitch is not None:
|
||||||
|
effect_overrides["pitch"] = preview_pitch
|
||||||
|
if preview_speed is not None:
|
||||||
|
effect_overrides["speed"] = preview_speed
|
||||||
|
|
||||||
|
# Use default effects from user settings for preview
|
||||||
|
preview_effects = user_effects.copy()
|
||||||
|
preview_effects.update(effect_overrides)
|
||||||
|
|
||||||
# Queue the preview with voice override and effects
|
# Queue the preview with voice override and effects
|
||||||
await self.message_queue.put((preview_message, preview_text, voice_name, final_pitch, final_speed))
|
await self.message_queue.put((preview_message, preview_text, voice_name, preview_effects))
|
||||||
|
|
||||||
# Build effect description
|
# Build effect description
|
||||||
effect_desc = []
|
effect_desc = []
|
||||||
if final_pitch != 0:
|
if preview_effects.get("pitch", 0) != 0:
|
||||||
effect_desc.append(f"pitch: {final_pitch:+d}")
|
effect_desc.append(f"pitch: {preview_effects['pitch']:+d}")
|
||||||
if final_speed != 1.0:
|
if preview_effects.get("speed", 1.0) != 1.0:
|
||||||
effect_desc.append(f"speed: {final_speed:.1f}x")
|
effect_desc.append(f"speed: {preview_effects['speed']:.1f}x")
|
||||||
|
|
||||||
effect_str = f" (with {', '.join(effect_desc)})" if effect_desc else ""
|
effect_str = f" (with {', '.join(effect_desc)})" if effect_desc else ""
|
||||||
|
|
||||||
@@ -622,24 +629,22 @@ class TTSBot(commands.Bot):
|
|||||||
while True:
|
while True:
|
||||||
queue_item = await self.message_queue.get()
|
queue_item = await self.message_queue.get()
|
||||||
|
|
||||||
# Handle queue items of different lengths:
|
# Handle queue items:
|
||||||
# - (message, text) - regular message
|
# - (message, text) - regular message
|
||||||
# - (message, text, voice_name) - preview with voice override
|
# - (message, text, voice_override) - preview with voice override
|
||||||
# - (message, text, voice_name, pitch, speed) - preview with effects
|
# - (message, text, voice_override, effects_dict) - preview with effect overrides
|
||||||
if len(queue_item) == 5:
|
if len(queue_item) == 4 and isinstance(queue_item[3], dict):
|
||||||
message, text, voice_override, pitch, speed = queue_item
|
message, text, voice_override, effect_overrides = queue_item
|
||||||
elif len(queue_item) == 3:
|
elif len(queue_item) == 3:
|
||||||
message, text, voice_override = queue_item
|
message, text, voice_override = queue_item
|
||||||
pitch = None
|
effect_overrides = {}
|
||||||
speed = None
|
|
||||||
else:
|
else:
|
||||||
message, text = queue_item
|
message, text = queue_item
|
||||||
voice_override = None
|
voice_override = None
|
||||||
pitch = None
|
effect_overrides = {}
|
||||||
speed = None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.speak_message(message, text, voice_override, pitch, speed)
|
await self.speak_message(message, text, voice_override, effect_overrides)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing message: {e}")
|
print(f"Error processing message: {e}")
|
||||||
finally:
|
finally:
|
||||||
@@ -650,8 +655,7 @@ class TTSBot(commands.Bot):
|
|||||||
message: discord.Message,
|
message: discord.Message,
|
||||||
text: str,
|
text: str,
|
||||||
voice_override: str | None = None,
|
voice_override: str | None = None,
|
||||||
pitch: int | None = None,
|
effect_overrides: dict | None = None,
|
||||||
speed: float | None = None,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Generate TTS and play it in the user's voice channel."""
|
"""Generate TTS and play it in the user's voice channel."""
|
||||||
if message.author.voice is None:
|
if message.author.voice is None:
|
||||||
@@ -685,16 +689,14 @@ class TTSBot(commands.Bot):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Get user's effects if not overridden
|
# Get user's effects and apply any overrides
|
||||||
if pitch is None or speed is None:
|
user_effects = self.voice_manager.get_user_effects(message.author.id)
|
||||||
user_effects = self.voice_manager.get_user_effects(message.author.id)
|
effects = user_effects.copy()
|
||||||
if pitch is None:
|
if effect_overrides:
|
||||||
pitch = user_effects["pitch"]
|
effects.update(effect_overrides)
|
||||||
if speed is None:
|
|
||||||
speed = user_effects["speed"]
|
|
||||||
|
|
||||||
wav_bytes = await asyncio.to_thread(
|
wav_bytes = await asyncio.to_thread(
|
||||||
self._generate_wav_bytes, voice_state, text, pitch, speed
|
self._generate_wav_bytes, voice_state, text, effects
|
||||||
)
|
)
|
||||||
|
|
||||||
audio_source = discord.FFmpegPCMAudio(
|
audio_source = discord.FFmpegPCMAudio(
|
||||||
@@ -723,8 +725,7 @@ class TTSBot(commands.Bot):
|
|||||||
self,
|
self,
|
||||||
voice_state: Any,
|
voice_state: Any,
|
||||||
text: str,
|
text: str,
|
||||||
pitch: int = 0,
|
effects: dict,
|
||||||
speed: float = 1.0,
|
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""Generate audio and return as WAV file bytes."""
|
"""Generate audio and return as WAV file bytes."""
|
||||||
model = self.voice_manager.model
|
model = self.voice_manager.model
|
||||||
@@ -739,12 +740,21 @@ class TTSBot(commands.Bot):
|
|||||||
audio_np = audio_np.reshape(-1, 1)
|
audio_np = audio_np.reshape(-1, 1)
|
||||||
|
|
||||||
# Apply audio effects if any are active
|
# Apply audio effects if any are active
|
||||||
if pitch != 0 or speed != 1.0:
|
pitch = effects.get("pitch", AudioEffects.PITCH_DEFAULT)
|
||||||
print(f"Applying effects - Pitch: {pitch:+d}, Speed: {speed:.1f}x")
|
speed = effects.get("speed", AudioEffects.SPEED_DEFAULT)
|
||||||
|
echo = effects.get("echo", AudioEffects.ECHO_DEFAULT)
|
||||||
|
robot = effects.get("robot", AudioEffects.ROBOT_DEFAULT)
|
||||||
|
chorus = effects.get("chorus", AudioEffects.CHORUS_DEFAULT)
|
||||||
|
tremolo_depth = effects.get("tremolo_depth", AudioEffects.TREMOLO_DEPTH_DEFAULT)
|
||||||
|
tremolo_rate = effects.get("tremolo_rate", AudioEffects.TREMOLO_RATE_DEFAULT)
|
||||||
|
|
||||||
|
if any([pitch != 0, speed != 1.0, echo > 0, robot > 0, chorus > 0, tremolo_depth > 0]):
|
||||||
|
print(f"Applying {AudioEffects.count_active_effects(**effects)} effect(s)...")
|
||||||
# Squeeze to 1D for librosa effects, then reshape back
|
# Squeeze to 1D for librosa effects, then reshape back
|
||||||
audio_1d = audio_np.squeeze()
|
audio_1d = audio_np.squeeze()
|
||||||
audio_1d, show_processing = AudioEffects.apply_effects(
|
audio_1d, show_processing = AudioEffects.apply_effects(
|
||||||
audio_1d, model.sample_rate, pitch, speed
|
audio_1d, model.sample_rate,
|
||||||
|
pitch, speed, echo, robot, chorus, tremolo_depth, tremolo_rate
|
||||||
)
|
)
|
||||||
# Reshape back to 2D
|
# Reshape back to 2D
|
||||||
audio_np = audio_1d.reshape(-1, 1)
|
audio_np = audio_1d.reshape(-1, 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user