Biorrith commited on
Commit
18de4f2
·
1 Parent(s): 82892a4

Refactor to use local voices with Git LFS support

Browse files

- Replace online voice URLs with local files from voices/ folder
- Add Danish voice selection dropdown (mic vs nic)
- Configure Git LFS for audio file handling
- Update UI to show/hide Danish voice options dynamically

Files changed (5) hide show
  1. .gitattributes +2 -0
  2. app.py +51 -9
  3. voices/en_f1.flac +3 -0
  4. voices/mic.wav +3 -0
  5. voices/nic.wav +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
37
+ *.flac filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -12,24 +12,36 @@ MODEL = None
12
 
13
  LANGUAGE_CONFIG = {
14
  "da": {
15
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/da_m1.flac",
 
 
 
 
16
  "text": "Sidste måned nåede vi en ny milepæl med to milliarder visninger på vores YouTube-kanal."
17
  },
18
  "en": {
19
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/en_f1.flac",
20
  "text": "Last month, we reached a new milestone with two billion views on our YouTube channel."
21
  },
22
  }
23
 
24
  # --- UI Helpers ---
25
- def default_audio_for_ui(lang: str) -> str | None:
26
- return LANGUAGE_CONFIG.get(lang, {}).get("audio")
 
 
 
27
 
28
 
29
  def default_text_for_ui(lang: str) -> str:
30
  return LANGUAGE_CONFIG.get(lang, {}).get("text", "")
31
 
32
 
 
 
 
 
 
33
  def get_supported_languages_display() -> str:
34
  """Generate a formatted display of all supported languages."""
35
  language_items = []
@@ -80,21 +92,23 @@ def set_seed(seed: int):
80
  random.seed(seed)
81
  np.random.seed(seed)
82
 
83
- def resolve_audio_prompt(language_id: str, provided_path: str | None) -> str | None:
84
  """
85
  Decide which audio prompt to use:
86
  - If user provided a path (upload/mic/url), use it.
87
  - Else, fall back to language-specific default (if any).
 
88
  """
89
  if provided_path and str(provided_path).strip():
90
  return provided_path
91
- return LANGUAGE_CONFIG.get(language_id, {}).get("audio")
92
 
93
 
94
  def generate_tts_audio(
95
  text_input: str,
96
  language_id: str,
97
  audio_prompt_path_input: str = None,
 
98
  exaggeration_input: float = 0.5,
99
  temperature_input: float = 0.8,
100
  seed_num_input: int = 0,
@@ -131,7 +145,7 @@ def generate_tts_audio(
131
  print(f"Generating audio for text: '{text_input[:50]}...'")
132
 
133
  # Handle optional audio prompt
134
- chosen_prompt = audio_prompt_path_input or default_audio_for_ui(language_id)
135
 
136
  generate_kwargs = {
137
  "exaggeration": exaggeration_input,
@@ -178,6 +192,14 @@ with gr.Blocks() as demo:
178
  info="Select the language for text-to-speech synthesis"
179
  )
180
 
 
 
 
 
 
 
 
 
181
  ref_wav = gr.Audio(
182
  sources=["upload", "microphone"],
183
  type="filepath",
@@ -207,12 +229,31 @@ with gr.Blocks() as demo:
207
  audio_output = gr.Audio(label="Output Audio")
208
 
209
  def on_language_change(lang, current_ref, current_text):
210
- return default_audio_for_ui(lang), default_text_for_ui(lang)
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  language_id.change(
213
  fn=on_language_change,
214
  inputs=[language_id, ref_wav, text],
215
- outputs=[ref_wav, text],
 
 
 
 
 
 
 
216
  show_progress=False
217
  )
218
 
@@ -222,6 +263,7 @@ with gr.Blocks() as demo:
222
  text,
223
  language_id,
224
  ref_wav,
 
225
  exaggeration,
226
  temp,
227
  seed_num,
 
12
 
13
  LANGUAGE_CONFIG = {
14
  "da": {
15
+ "audio_options": {
16
+ "mic": "voices/mic.wav",
17
+ "nic": "voices/nic.wav"
18
+ },
19
+ "default_audio": "voices/mic.wav", # Default to mic
20
  "text": "Sidste måned nåede vi en ny milepæl med to milliarder visninger på vores YouTube-kanal."
21
  },
22
  "en": {
23
+ "audio": "voices/en_f1.flac",
24
  "text": "Last month, we reached a new milestone with two billion views on our YouTube channel."
25
  },
26
  }
27
 
28
  # --- UI Helpers ---
29
+ def default_audio_for_ui(lang: str, danish_voice: str = "mic") -> str | None:
30
+ config = LANGUAGE_CONFIG.get(lang, {})
31
+ if lang == "da" and "audio_options" in config:
32
+ return config["audio_options"].get(danish_voice, config.get("default_audio"))
33
+ return config.get("audio")
34
 
35
 
36
  def default_text_for_ui(lang: str) -> str:
37
  return LANGUAGE_CONFIG.get(lang, {}).get("text", "")
38
 
39
 
40
+ def get_danish_voice_options() -> list[tuple[str, str]]:
41
+ """Get the available Danish voice options for the dropdown."""
42
+ return [("Mic", "mic"), ("Nic", "nic")]
43
+
44
+
45
  def get_supported_languages_display() -> str:
46
  """Generate a formatted display of all supported languages."""
47
  language_items = []
 
92
  random.seed(seed)
93
  np.random.seed(seed)
94
 
95
+ def resolve_audio_prompt(language_id: str, provided_path: str | None, danish_voice: str = "mic") -> str | None:
96
  """
97
  Decide which audio prompt to use:
98
  - If user provided a path (upload/mic/url), use it.
99
  - Else, fall back to language-specific default (if any).
100
+ - For Danish, use the selected voice option.
101
  """
102
  if provided_path and str(provided_path).strip():
103
  return provided_path
104
+ return default_audio_for_ui(language_id, danish_voice)
105
 
106
 
107
  def generate_tts_audio(
108
  text_input: str,
109
  language_id: str,
110
  audio_prompt_path_input: str = None,
111
+ danish_voice_input: str = "mic",
112
  exaggeration_input: float = 0.5,
113
  temperature_input: float = 0.8,
114
  seed_num_input: int = 0,
 
145
  print(f"Generating audio for text: '{text_input[:50]}...'")
146
 
147
  # Handle optional audio prompt
148
+ chosen_prompt = resolve_audio_prompt(language_id, audio_prompt_path_input, danish_voice_input)
149
 
150
  generate_kwargs = {
151
  "exaggeration": exaggeration_input,
 
192
  info="Select the language for text-to-speech synthesis"
193
  )
194
 
195
+ danish_voice = gr.Dropdown(
196
+ choices=get_danish_voice_options(),
197
+ value="mic",
198
+ label="Danish Voice Selection",
199
+ info="Choose between different Danish voice options",
200
+ visible=(initial_lang == "da")
201
+ )
202
+
203
  ref_wav = gr.Audio(
204
  sources=["upload", "microphone"],
205
  type="filepath",
 
229
  audio_output = gr.Audio(label="Output Audio")
230
 
231
  def on_language_change(lang, current_ref, current_text):
232
+ is_danish = (lang == "da")
233
+ danish_voice_val = "mic" if is_danish else "mic" # Default to mic
234
+ return (
235
+ default_audio_for_ui(lang, danish_voice_val),
236
+ default_text_for_ui(lang),
237
+ gr.update(visible=is_danish), # Update Danish voice dropdown visibility
238
+ danish_voice_val
239
+ )
240
+
241
+ def on_danish_voice_change(lang, danish_voice_val):
242
+ if lang == "da":
243
+ return default_audio_for_ui(lang, danish_voice_val)
244
+ return gr.update() # No change if not Danish
245
 
246
  language_id.change(
247
  fn=on_language_change,
248
  inputs=[language_id, ref_wav, text],
249
+ outputs=[ref_wav, text, danish_voice, danish_voice],
250
+ show_progress=False
251
+ )
252
+
253
+ danish_voice.change(
254
+ fn=on_danish_voice_change,
255
+ inputs=[language_id, danish_voice],
256
+ outputs=[ref_wav],
257
  show_progress=False
258
  )
259
 
 
263
  text,
264
  language_id,
265
  ref_wav,
266
+ danish_voice,
267
  exaggeration,
268
  temp,
269
  seed_num,
voices/en_f1.flac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81950378b53827aed08d164ebd332a7b8c5805b8a97b4f79ab057071e359c72
3
+ size 83323
voices/mic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40956f5a6e515ea8cb322432516a768bac4ca6bc89d22876204e74c10fbd65d8
3
+ size 1584998
voices/nic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4406e09d3a1cdac713f54ea5153b56adb7bf4996c47d975aff27c65b034a2575
3
+ size 1700098