Spaces:

alexandrainst
/

roest-chatterbox-demo

Sleeping

App Files Files Community

Biorrith commited on Sep 22, 2025

Commit

0951916

1 Parent(s): f5abb2c

fixed for real this time, hopefully

Browse files

Files changed (1) hide show

app.py +3 -4

app.py CHANGED Viewed

@@ -111,7 +111,6 @@ def generate_tts_audio(
     language_id: str,
     audio_prompt_path_input: str = None,
     danish_voice_input: str = "mic",
-    exaggeration_input: float = 0.5,
     temperature_input: float = 0.8,
     seed_num_input: int = 0,
     cfgw_input: float = 0.5
@@ -128,7 +127,6 @@ def generate_tts_audio(
         text_input (str): The text to synthesize into speech (maximum 300 characters)
         language_id (str): The language code for synthesis (eg. en, fr, de, es, it, pt, hi)
         audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
-        exaggeration_input (float, optional): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable). Defaults to 0.5.
         temperature_input (float, optional): Controls randomness in generation (0.05-5.0, higher=more varied). Defaults to 0.8.
         seed_num_input (int, optional): Random seed for reproducible results (0 for random generation). Defaults to 0.
         cfgw_input (float, optional): CFG/Pace weight controlling generation guidance (0.2-1.0). Defaults to 0.5, 0 for language transfer.
@@ -137,6 +135,8 @@ def generate_tts_audio(
         tuple[int, np.ndarray]: A tuple containing the sample rate (int) and the generated audio waveform (numpy.ndarray)
     """
     current_model = get_or_load_model()
     if current_model is None:
         raise RuntimeError("TTS model is not loaded.")
@@ -150,7 +150,7 @@ def generate_tts_audio(
     chosen_prompt = resolve_audio_prompt(language_id, audio_prompt_path_input, danish_voice_input)
     generate_kwargs = {
-        "exaggeration": exaggeration_input,
         "temperature": temperature_input,
         "cfg_weight": cfgw_input,
     }
@@ -263,7 +263,6 @@ with gr.Blocks() as demo:
             language_id,
             ref_wav,
             danish_voice,
-            0.5,  # Fixed exaggeration
             temp,
             seed_num,
             cfg_weight,

     language_id: str,
     audio_prompt_path_input: str = None,
     danish_voice_input: str = "mic",
     temperature_input: float = 0.8,
     seed_num_input: int = 0,
     cfgw_input: float = 0.5
         text_input (str): The text to synthesize into speech (maximum 300 characters)
         language_id (str): The language code for synthesis (eg. en, fr, de, es, it, pt, hi)
         audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
         temperature_input (float, optional): Controls randomness in generation (0.05-5.0, higher=more varied). Defaults to 0.8.
         seed_num_input (int, optional): Random seed for reproducible results (0 for random generation). Defaults to 0.
         cfgw_input (float, optional): CFG/Pace weight controlling generation guidance (0.2-1.0). Defaults to 0.5, 0 for language transfer.
         tuple[int, np.ndarray]: A tuple containing the sample rate (int) and the generated audio waveform (numpy.ndarray)
     """
     current_model = get_or_load_model()
+    exaggeration: float = 0.5,
     if current_model is None:
         raise RuntimeError("TTS model is not loaded.")
     chosen_prompt = resolve_audio_prompt(language_id, audio_prompt_path_input, danish_voice_input)
     generate_kwargs = {
+        "exaggeration": exaggeration,
         "temperature": temperature_input,
         "cfg_weight": cfgw_input,
     }
             language_id,
             ref_wav,
             danish_voice,
             temp,
             seed_num,
             cfg_weight,