| import random |
| import os |
| import numpy as np |
| import torch |
| import gradio as gr |
| import spaces |
| from chatterbox.tts_turbo import ChatterboxTurboTTS |
|
|
|
|
| MODEL = ChatterboxTurboTTS.from_pretrained("cuda" ) |
|
|
|
|
| CUSTOM_CSS = """ |
| .tag-container { |
| display: flex !important; |
| flex-wrap: wrap !important; |
| gap: 8px !important; |
| margin-top: 5px !important; |
| margin-bottom: 10px !important; |
| border: none !important; |
| background: transparent !important; |
| } |
| |
| .tag-btn { |
| min-width: fit-content !important; |
| width: auto !important; |
| height: 32px !important; |
| font-size: 13px !important; |
| background: #eef2ff !important; |
| border: 1px solid #c7d2fe !important; |
| color: #3730a3 !important; |
| border-radius: 6px !important; |
| padding: 0 10px !important; |
| margin: 0 !important; |
| box-shadow: none !important; |
| } |
| |
| .tag-btn:hover { |
| background: #c7d2fe !important; |
| transform: translateY(-1px); |
| } |
| """ |
|
|
| INSERT_TAG_JS = """ |
| (tag_val, current_text) => { |
| const textarea = document.querySelector('#main_textbox textarea'); |
| if (!textarea) return current_text + " " + tag_val; |
| |
| const start = textarea.selectionStart; |
| const end = textarea.selectionEnd; |
| |
| let prefix = " "; |
| let suffix = " "; |
| |
| if (start === 0) prefix = ""; |
| else if (current_text[start - 1] === ' ') prefix = ""; |
| |
| if (end < current_text.length && current_text[end] === ' ') suffix = ""; |
| |
| return current_text.slice(0, start) + prefix + tag_val + suffix + current_text.slice(end); |
| } |
| """ |
|
|
| def set_seed(seed: int): |
| torch.manual_seed(seed) |
| torch.cuda.manual_seed(seed) |
| torch.cuda.manual_seed_all(seed) |
| random.seed(seed) |
| np.random.seed(seed) |
|
|
| @spaces.GPU |
| def generate( |
| text, |
| audio_prompt_path, |
| temperature, |
| seed_num, |
| top_p, |
| top_k, |
| repetition_penalty, |
| norm_loudness |
| ): |
| if seed_num != 0: |
| set_seed(int(seed_num)) |
|
|
| wav = MODEL.generate( |
| text, |
| audio_prompt_path=audio_prompt_path, |
| temperature=temperature, |
| top_p=top_p, |
| top_k=int(top_k), |
| repetition_penalty=repetition_penalty, |
| norm_loudness=norm_loudness, |
| ) |
|
|
| return (MODEL.sr, wav.squeeze(0).cpu().numpy()) |
|
|
|
|
| VOICE_OPTIONS = { |
| "mic": "samples/mic_trimmed.wav", |
| "nic": "samples/nic_trimmed.wav" |
| } |
|
|
| def update_ref_audio(voice_name): |
| return VOICE_OPTIONS.get(voice_name, list(VOICE_OPTIONS.values())[0]) |
|
|
| with gr.Blocks(title="Chatterbox Turbo") as demo: |
| gr.Markdown("# ⚡ Chatterbox Turbo") |
|
|
| with gr.Row(): |
| with gr.Column(): |
| text = gr.Textbox( |
| value="København er Danmarks hovedstad og ligger på øerne Sjælland og Amager, hvor mange turister besøger de smukke kanaler og historiske bygninger.", |
| label="Text to synthesize (max chars 300)", |
| max_lines=5, |
| elem_id="main_textbox" |
| ) |
|
|
| voice = gr.Dropdown( |
| choices=list(VOICE_OPTIONS.keys()), |
| value="mic", |
| label="Voice Selection", |
| info="Choose a voice or upload your own below" |
| ) |
|
|
| ref_wav = gr.Audio( |
| sources=["upload", "microphone"], |
| type="filepath", |
| label="Reference Audio File", |
| value=VOICE_OPTIONS["mic"], |
| ) |
|
|
| run_btn = gr.Button("Generate ⚡", variant="primary") |
|
|
| with gr.Column(): |
| audio_output = gr.Audio(label="Output Audio") |
|
|
| with gr.Accordion("Advanced Options", open=False): |
| seed_num = gr.Number(value=0, label="Random seed (0 for random)") |
| temp = gr.Slider(0.05, 2.0, step=.05, label="Temperature", value=0.7) |
| top_p = gr.Slider(0.00, 1.00, step=0.01, label="Top P", value=0.95) |
| top_k = gr.Slider(0, 1000, step=10, label="Top K", value=600) |
| repetition_penalty = gr.Slider(1.00, 2.00, step=0.05, label="Repetition Penalty", value=1.2) |
| norm_loudness = gr.Checkbox(value=True, label="Normalize Loudness (-27 LUFS)") |
|
|
| voice.change( |
| fn=update_ref_audio, |
| inputs=[voice], |
| outputs=[ref_wav], |
| ) |
|
|
| run_btn.click( |
| fn=generate, |
| inputs=[ |
| text, |
| ref_wav, |
| temp, |
| seed_num, |
| top_p, |
| top_k, |
| repetition_penalty, |
| norm_loudness, |
| ], |
| outputs=audio_output, |
| ) |
|
|
| if __name__ == "__main__": |
| demo.queue().launch( |
| mcp_server=True, |
| css=CUSTOM_CSS, |
| ssr_mode=False |
| ) |
|
|