Biorrith commited on
Commit
0951916
·
1 Parent(s): f5abb2c

fixed for real this time, hopefully

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -111,7 +111,6 @@ def generate_tts_audio(
111
  language_id: str,
112
  audio_prompt_path_input: str = None,
113
  danish_voice_input: str = "mic",
114
- exaggeration_input: float = 0.5,
115
  temperature_input: float = 0.8,
116
  seed_num_input: int = 0,
117
  cfgw_input: float = 0.5
@@ -128,7 +127,6 @@ def generate_tts_audio(
128
  text_input (str): The text to synthesize into speech (maximum 300 characters)
129
  language_id (str): The language code for synthesis (eg. en, fr, de, es, it, pt, hi)
130
  audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
131
- exaggeration_input (float, optional): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable). Defaults to 0.5.
132
  temperature_input (float, optional): Controls randomness in generation (0.05-5.0, higher=more varied). Defaults to 0.8.
133
  seed_num_input (int, optional): Random seed for reproducible results (0 for random generation). Defaults to 0.
134
  cfgw_input (float, optional): CFG/Pace weight controlling generation guidance (0.2-1.0). Defaults to 0.5, 0 for language transfer.
@@ -137,6 +135,8 @@ def generate_tts_audio(
137
  tuple[int, np.ndarray]: A tuple containing the sample rate (int) and the generated audio waveform (numpy.ndarray)
138
  """
139
  current_model = get_or_load_model()
 
 
140
 
141
  if current_model is None:
142
  raise RuntimeError("TTS model is not loaded.")
@@ -150,7 +150,7 @@ def generate_tts_audio(
150
  chosen_prompt = resolve_audio_prompt(language_id, audio_prompt_path_input, danish_voice_input)
151
 
152
  generate_kwargs = {
153
- "exaggeration": exaggeration_input,
154
  "temperature": temperature_input,
155
  "cfg_weight": cfgw_input,
156
  }
@@ -263,7 +263,6 @@ with gr.Blocks() as demo:
263
  language_id,
264
  ref_wav,
265
  danish_voice,
266
- 0.5, # Fixed exaggeration
267
  temp,
268
  seed_num,
269
  cfg_weight,
 
111
  language_id: str,
112
  audio_prompt_path_input: str = None,
113
  danish_voice_input: str = "mic",
 
114
  temperature_input: float = 0.8,
115
  seed_num_input: int = 0,
116
  cfgw_input: float = 0.5
 
127
  text_input (str): The text to synthesize into speech (maximum 300 characters)
128
  language_id (str): The language code for synthesis (eg. en, fr, de, es, it, pt, hi)
129
  audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
 
130
  temperature_input (float, optional): Controls randomness in generation (0.05-5.0, higher=more varied). Defaults to 0.8.
131
  seed_num_input (int, optional): Random seed for reproducible results (0 for random generation). Defaults to 0.
132
  cfgw_input (float, optional): CFG/Pace weight controlling generation guidance (0.2-1.0). Defaults to 0.5, 0 for language transfer.
 
135
  tuple[int, np.ndarray]: A tuple containing the sample rate (int) and the generated audio waveform (numpy.ndarray)
136
  """
137
  current_model = get_or_load_model()
138
+ exaggeration: float = 0.5,
139
+
140
 
141
  if current_model is None:
142
  raise RuntimeError("TTS model is not loaded.")
 
150
  chosen_prompt = resolve_audio_prompt(language_id, audio_prompt_path_input, danish_voice_input)
151
 
152
  generate_kwargs = {
153
+ "exaggeration": exaggeration,
154
  "temperature": temperature_input,
155
  "cfg_weight": cfgw_input,
156
  }
 
263
  language_id,
264
  ref_wav,
265
  danish_voice,
 
266
  temp,
267
  seed_num,
268
  cfg_weight,