Spaces:
Sleeping
Sleeping
Refactor to use local voices with Git LFS support
Browse files- Replace online voice URLs with local files from voices/ folder
- Add Danish voice selection dropdown (mic vs nic)
- Configure Git LFS for audio file handling
- Update UI to show/hide Danish voice options dynamically
- .gitattributes +2 -0
- app.py +51 -9
- voices/en_f1.flac +3 -0
- voices/mic.wav +3 -0
- voices/nic.wav +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -12,24 +12,36 @@ MODEL = None
|
|
| 12 |
|
| 13 |
LANGUAGE_CONFIG = {
|
| 14 |
"da": {
|
| 15 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
"text": "Sidste måned nåede vi en ny milepæl med to milliarder visninger på vores YouTube-kanal."
|
| 17 |
},
|
| 18 |
"en": {
|
| 19 |
-
"audio": "
|
| 20 |
"text": "Last month, we reached a new milestone with two billion views on our YouTube channel."
|
| 21 |
},
|
| 22 |
}
|
| 23 |
|
| 24 |
# --- UI Helpers ---
|
| 25 |
-
def default_audio_for_ui(lang: str) -> str | None:
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def default_text_for_ui(lang: str) -> str:
|
| 30 |
return LANGUAGE_CONFIG.get(lang, {}).get("text", "")
|
| 31 |
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def get_supported_languages_display() -> str:
|
| 34 |
"""Generate a formatted display of all supported languages."""
|
| 35 |
language_items = []
|
|
@@ -80,21 +92,23 @@ def set_seed(seed: int):
|
|
| 80 |
random.seed(seed)
|
| 81 |
np.random.seed(seed)
|
| 82 |
|
| 83 |
-
def resolve_audio_prompt(language_id: str, provided_path: str | None) -> str | None:
|
| 84 |
"""
|
| 85 |
Decide which audio prompt to use:
|
| 86 |
- If user provided a path (upload/mic/url), use it.
|
| 87 |
- Else, fall back to language-specific default (if any).
|
|
|
|
| 88 |
"""
|
| 89 |
if provided_path and str(provided_path).strip():
|
| 90 |
return provided_path
|
| 91 |
-
return
|
| 92 |
|
| 93 |
|
| 94 |
def generate_tts_audio(
|
| 95 |
text_input: str,
|
| 96 |
language_id: str,
|
| 97 |
audio_prompt_path_input: str = None,
|
|
|
|
| 98 |
exaggeration_input: float = 0.5,
|
| 99 |
temperature_input: float = 0.8,
|
| 100 |
seed_num_input: int = 0,
|
|
@@ -131,7 +145,7 @@ def generate_tts_audio(
|
|
| 131 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
| 132 |
|
| 133 |
# Handle optional audio prompt
|
| 134 |
-
chosen_prompt =
|
| 135 |
|
| 136 |
generate_kwargs = {
|
| 137 |
"exaggeration": exaggeration_input,
|
|
@@ -178,6 +192,14 @@ with gr.Blocks() as demo:
|
|
| 178 |
info="Select the language for text-to-speech synthesis"
|
| 179 |
)
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
ref_wav = gr.Audio(
|
| 182 |
sources=["upload", "microphone"],
|
| 183 |
type="filepath",
|
|
@@ -207,12 +229,31 @@ with gr.Blocks() as demo:
|
|
| 207 |
audio_output = gr.Audio(label="Output Audio")
|
| 208 |
|
| 209 |
def on_language_change(lang, current_ref, current_text):
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
language_id.change(
|
| 213 |
fn=on_language_change,
|
| 214 |
inputs=[language_id, ref_wav, text],
|
| 215 |
-
outputs=[ref_wav, text],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
show_progress=False
|
| 217 |
)
|
| 218 |
|
|
@@ -222,6 +263,7 @@ with gr.Blocks() as demo:
|
|
| 222 |
text,
|
| 223 |
language_id,
|
| 224 |
ref_wav,
|
|
|
|
| 225 |
exaggeration,
|
| 226 |
temp,
|
| 227 |
seed_num,
|
|
|
|
| 12 |
|
| 13 |
LANGUAGE_CONFIG = {
|
| 14 |
"da": {
|
| 15 |
+
"audio_options": {
|
| 16 |
+
"mic": "voices/mic.wav",
|
| 17 |
+
"nic": "voices/nic.wav"
|
| 18 |
+
},
|
| 19 |
+
"default_audio": "voices/mic.wav", # Default to mic
|
| 20 |
"text": "Sidste måned nåede vi en ny milepæl med to milliarder visninger på vores YouTube-kanal."
|
| 21 |
},
|
| 22 |
"en": {
|
| 23 |
+
"audio": "voices/en_f1.flac",
|
| 24 |
"text": "Last month, we reached a new milestone with two billion views on our YouTube channel."
|
| 25 |
},
|
| 26 |
}
|
| 27 |
|
| 28 |
# --- UI Helpers ---
|
| 29 |
+
def default_audio_for_ui(lang: str, danish_voice: str = "mic") -> str | None:
|
| 30 |
+
config = LANGUAGE_CONFIG.get(lang, {})
|
| 31 |
+
if lang == "da" and "audio_options" in config:
|
| 32 |
+
return config["audio_options"].get(danish_voice, config.get("default_audio"))
|
| 33 |
+
return config.get("audio")
|
| 34 |
|
| 35 |
|
| 36 |
def default_text_for_ui(lang: str) -> str:
|
| 37 |
return LANGUAGE_CONFIG.get(lang, {}).get("text", "")
|
| 38 |
|
| 39 |
|
| 40 |
+
def get_danish_voice_options() -> list[tuple[str, str]]:
|
| 41 |
+
"""Get the available Danish voice options for the dropdown."""
|
| 42 |
+
return [("Mic", "mic"), ("Nic", "nic")]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
def get_supported_languages_display() -> str:
|
| 46 |
"""Generate a formatted display of all supported languages."""
|
| 47 |
language_items = []
|
|
|
|
| 92 |
random.seed(seed)
|
| 93 |
np.random.seed(seed)
|
| 94 |
|
| 95 |
+
def resolve_audio_prompt(language_id: str, provided_path: str | None, danish_voice: str = "mic") -> str | None:
|
| 96 |
"""
|
| 97 |
Decide which audio prompt to use:
|
| 98 |
- If user provided a path (upload/mic/url), use it.
|
| 99 |
- Else, fall back to language-specific default (if any).
|
| 100 |
+
- For Danish, use the selected voice option.
|
| 101 |
"""
|
| 102 |
if provided_path and str(provided_path).strip():
|
| 103 |
return provided_path
|
| 104 |
+
return default_audio_for_ui(language_id, danish_voice)
|
| 105 |
|
| 106 |
|
| 107 |
def generate_tts_audio(
|
| 108 |
text_input: str,
|
| 109 |
language_id: str,
|
| 110 |
audio_prompt_path_input: str = None,
|
| 111 |
+
danish_voice_input: str = "mic",
|
| 112 |
exaggeration_input: float = 0.5,
|
| 113 |
temperature_input: float = 0.8,
|
| 114 |
seed_num_input: int = 0,
|
|
|
|
| 145 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
| 146 |
|
| 147 |
# Handle optional audio prompt
|
| 148 |
+
chosen_prompt = resolve_audio_prompt(language_id, audio_prompt_path_input, danish_voice_input)
|
| 149 |
|
| 150 |
generate_kwargs = {
|
| 151 |
"exaggeration": exaggeration_input,
|
|
|
|
| 192 |
info="Select the language for text-to-speech synthesis"
|
| 193 |
)
|
| 194 |
|
| 195 |
+
danish_voice = gr.Dropdown(
|
| 196 |
+
choices=get_danish_voice_options(),
|
| 197 |
+
value="mic",
|
| 198 |
+
label="Danish Voice Selection",
|
| 199 |
+
info="Choose between different Danish voice options",
|
| 200 |
+
visible=(initial_lang == "da")
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
ref_wav = gr.Audio(
|
| 204 |
sources=["upload", "microphone"],
|
| 205 |
type="filepath",
|
|
|
|
| 229 |
audio_output = gr.Audio(label="Output Audio")
|
| 230 |
|
| 231 |
def on_language_change(lang, current_ref, current_text):
|
| 232 |
+
is_danish = (lang == "da")
|
| 233 |
+
danish_voice_val = "mic" if is_danish else "mic" # Default to mic
|
| 234 |
+
return (
|
| 235 |
+
default_audio_for_ui(lang, danish_voice_val),
|
| 236 |
+
default_text_for_ui(lang),
|
| 237 |
+
gr.update(visible=is_danish), # Update Danish voice dropdown visibility
|
| 238 |
+
danish_voice_val
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
def on_danish_voice_change(lang, danish_voice_val):
|
| 242 |
+
if lang == "da":
|
| 243 |
+
return default_audio_for_ui(lang, danish_voice_val)
|
| 244 |
+
return gr.update() # No change if not Danish
|
| 245 |
|
| 246 |
language_id.change(
|
| 247 |
fn=on_language_change,
|
| 248 |
inputs=[language_id, ref_wav, text],
|
| 249 |
+
outputs=[ref_wav, text, danish_voice, danish_voice],
|
| 250 |
+
show_progress=False
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
danish_voice.change(
|
| 254 |
+
fn=on_danish_voice_change,
|
| 255 |
+
inputs=[language_id, danish_voice],
|
| 256 |
+
outputs=[ref_wav],
|
| 257 |
show_progress=False
|
| 258 |
)
|
| 259 |
|
|
|
|
| 263 |
text,
|
| 264 |
language_id,
|
| 265 |
ref_wav,
|
| 266 |
+
danish_voice,
|
| 267 |
exaggeration,
|
| 268 |
temp,
|
| 269 |
seed_num,
|
voices/en_f1.flac
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e81950378b53827aed08d164ebd332a7b8c5805b8a97b4f79ab057071e359c72
|
| 3 |
+
size 83323
|
voices/mic.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40956f5a6e515ea8cb322432516a768bac4ca6bc89d22876204e74c10fbd65d8
|
| 3 |
+
size 1584998
|
voices/nic.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4406e09d3a1cdac713f54ea5153b56adb7bf4996c47d975aff27c65b034a2575
|
| 3 |
+
size 1700098
|