Plana-Archive commited on
Commit
704defd
·
verified ·
1 Parent(s): c18c7ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -31
app.py CHANGED
@@ -4,11 +4,10 @@ import datetime
4
  from pathlib import Path
5
  import random
6
 
7
- # --- BAGIAN 1: DOWNLOAD OTOMATIS ASSET BERT ---
8
- # Menggunakan huggingface_hub untuk mengambil folder yang tidak kita upload
9
  from huggingface_hub import snapshot_download
10
 
11
- # Mengaktifkan hf_transfer untuk kecepatan download maksimal (membutuhkan hf_transfer di requirements.txt)
12
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
13
 
14
  def download_bert_assets():
@@ -17,34 +16,31 @@ def download_bert_assets():
17
  SOURCE_SUBFOLDER = "sbv2-chupa-demo/bert"
18
  DEST_FOLDER = "./bert"
19
 
20
- # Hanya download jika folder ./bert belum ada di Space
21
  if not os.path.exists(DEST_FOLDER):
22
  try:
23
  print(f"Downloading BERT assets from {REPO_ID}... Mohon tunggu sebentar.")
24
- # Download folder bert saja dari subfolder repo sumber
25
  temp_dir = snapshot_download(
26
  repo_id=REPO_ID,
27
  allow_patterns=[f"{SOURCE_SUBFOLDER}/**/*"],
28
- token=os.getenv("HF_TOKEN") # Mengambil token dari Secret Space
29
  )
30
 
31
- # Pindahkan isi dari folder hasil download ke root/bert agar terbaca sistem
32
  src_path = os.path.join(temp_dir, SOURCE_SUBFOLDER)
33
  if os.path.exists(src_path):
34
  shutil.copytree(src_path, DEST_FOLDER)
35
- print("✅ BERT assets downloaded and linked successfully.")
36
  else:
37
- print("⚠️ Folder bert tidak ditemukan di hasil download snapshot.")
38
  except Exception as e:
39
  print(f"❌ Failed to download BERT: {e}")
40
  else:
41
  print("✅ BERT assets already exist.")
42
 
43
- # Jalankan download aset sebelum melakukan import library TTS
44
  download_bert_assets()
45
 
46
 
47
- # --- BAGIAN 2: LOGIKA APLIKASI ASLI ---
48
  import gradio as gr
49
  from style_bert_vits2.constants import (
50
  DEFAULT_LENGTH,
@@ -58,7 +54,7 @@ from style_bert_vits2.models.infer import InvalidToneError
58
  from style_bert_vits2.nlp.japanese import pyopenjtalk_worker as pyopenjtalk
59
  from style_bert_vits2.tts_model import TTSModelHolder
60
 
61
- # Inisialisasi worker untuk teks Jepang
62
  pyopenjtalk.initialize_worker()
63
 
64
  example_file = "chupa_examples.txt"
@@ -66,7 +62,7 @@ initial_text = (
66
  "ちゅぱ、ちゅるる、ぢゅ、んく、れーれゅれろれろれろ、じゅぽぽぽぽぽ……ちゅううう!"
67
  )
68
 
69
- # Load contoh teks jika file tersedia
70
  if os.path.exists(example_file):
71
  with open(example_file, "r", encoding="utf-8") as f:
72
  examples = f.read().splitlines()
@@ -77,8 +73,9 @@ def get_random_text() -> str:
77
  return random.choice(examples)
78
 
79
  initial_md = """
80
- # チュパ音合成デモ (Chupa Sound Synthesis Demo)
81
- 2024-07-07: initial ver - Migrated to Hugging Face Space
 
82
  """
83
 
84
  def make_interactive():
@@ -134,6 +131,7 @@ def create_inference_app(model_holder: TTSModelHolder) -> gr.Blocks:
134
 
135
  end_time = datetime.datetime.now()
136
  duration = (end_time - start_time).total_seconds()
 
137
  message = f"Success, time: {duration} seconds."
138
  return message, (sr, audio)
139
 
@@ -142,15 +140,16 @@ def create_inference_app(model_holder: TTSModelHolder) -> gr.Blocks:
142
 
143
  model_names = model_holder.model_names
144
  if len(model_names) == 0:
145
- logger.error(f"モデルが見つかりませんでした。{model_holder.root_dir}にモデルを置いてください。")
146
  with gr.Blocks() as app:
147
- gr.Markdown(f"Error: モデルが見つかりませんでした。{model_holder.root_dir}にモデルを置いてください。")
148
  return app
149
 
150
  initial_pth_files = get_model_files(model_names[0])
151
  model = model_holder.get_model(model_names[0], initial_pth_files[0])
152
  speakers = list(model.spk2id.keys())
153
 
 
154
  with gr.Blocks(theme="ParityError/Anime") as app:
155
  gr.Markdown(initial_md)
156
  with gr.Row():
@@ -177,24 +176,50 @@ def create_inference_app(model_holder: TTSModelHolder) -> gr.Blocks:
177
  random_button.click(get_random_text, outputs=[text_input])
178
  with gr.Row():
179
  length_scale = gr.Slider(
180
- minimum=0.1, maximum=2, value=DEFAULT_LENGTH, step=0.1, label="生成音声の長さ(Length)"
 
 
 
 
181
  )
182
  sdp_ratio = gr.Slider(
183
- minimum=0, maximum=1, value=1, step=0.1, label="SDP Ratio"
 
 
 
 
184
  )
185
  line_split = gr.Checkbox(
186
- label="改行で分けて生成(分けたほうが感情が乗ります)",
187
  value=DEFAULT_LINE_SPLIT,
188
  visible=False,
189
  )
190
  split_interval = gr.Slider(
191
- minimum=0.0, maximum=2, value=DEFAULT_SPLIT_INTERVAL, step=0.1, label="改行ごとに挟む無音の長さ(秒)"
 
 
 
 
 
 
 
192
  )
193
- language = gr.Dropdown(choices=["JP"], value="JP", label="Language", visible=False)
194
  speaker = gr.Dropdown(label="話者", choices=speakers, value=speakers[0])
195
  with gr.Accordion(label="詳細設定", open=True):
196
- noise_scale = gr.Slider(minimum=0.1, maximum=2, value=DEFAULT_NOISE, step=0.1, label="Noise")
197
- noise_scale_w = gr.Slider(minimum=0.1, maximum=2, value=DEFAULT_NOISEW, step=0.1, label="Noise_W")
 
 
 
 
 
 
 
 
 
 
 
 
198
  with gr.Column():
199
  tts_button = gr.Button("音声合成", variant="primary")
200
  text_output = gr.Textbox(label="情報")
@@ -202,14 +227,24 @@ def create_inference_app(model_holder: TTSModelHolder) -> gr.Blocks:
202
 
203
  tts_button.click(
204
  tts_fn,
205
- inputs=[model_name, model_path, text_input, language, sdp_ratio, noise_scale, noise_scale_w, length_scale, line_split, split_interval, speaker],
 
 
 
 
206
  outputs=[text_output, audio_output],
207
  )
208
 
209
- model_name.change(model_holder.update_model_files_for_gradio, inputs=[model_name], outputs=[model_path])
 
 
 
 
210
  model_path.change(make_non_interactive, outputs=[tts_button])
211
- refresh_button.click(model_holder.update_model_names_for_gradio, outputs=[model_name, model_path, tts_button])
212
-
 
 
213
  style = gr.Dropdown(label="スタイル", choices=[], visible=False)
214
  load_button.click(
215
  model_holder.get_model_for_gradio,
@@ -219,17 +254,17 @@ def create_inference_app(model_holder: TTSModelHolder) -> gr.Blocks:
219
 
220
  return app
221
 
 
222
  if __name__ == "__main__":
223
  import torch
224
  from style_bert_vits2.constants import Languages
225
  from style_bert_vits2.nlp import bert_models
226
 
227
- # Load model BERT yang sudah di-download sebelumnya
228
  bert_models.load_model(Languages.JP)
229
  bert_models.load_tokenizer(Languages.JP)
230
 
231
  device = "cuda" if torch.cuda.is_available() else "cpu"
232
- # Pastikan folder model_assets sudah terisi file .safetensors/config
233
  model_holder = TTSModelHolder(Path("model_assets"), device)
234
  app = create_inference_app(model_holder)
235
- app.launch(inbrowser=True)
 
4
  from pathlib import Path
5
  import random
6
 
7
+ # --- BAGIAN 1: DOWNLOAD OTOMATIS ASSET BERT (WAJIB AGAR JALAN) ---
 
8
  from huggingface_hub import snapshot_download
9
 
10
+ # Mengaktifkan hf_transfer untuk kecepatan download maksimal
11
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
12
 
13
  def download_bert_assets():
 
16
  SOURCE_SUBFOLDER = "sbv2-chupa-demo/bert"
17
  DEST_FOLDER = "./bert"
18
 
 
19
  if not os.path.exists(DEST_FOLDER):
20
  try:
21
  print(f"Downloading BERT assets from {REPO_ID}... Mohon tunggu sebentar.")
 
22
  temp_dir = snapshot_download(
23
  repo_id=REPO_ID,
24
  allow_patterns=[f"{SOURCE_SUBFOLDER}/**/*"],
25
+ token=os.getenv("HF_TOKEN")
26
  )
27
 
 
28
  src_path = os.path.join(temp_dir, SOURCE_SUBFOLDER)
29
  if os.path.exists(src_path):
30
  shutil.copytree(src_path, DEST_FOLDER)
31
+ print("✅ BERT assets downloaded successfully.")
32
  else:
33
+ print("⚠️ Folder bert tidak ditemukan.")
34
  except Exception as e:
35
  print(f"❌ Failed to download BERT: {e}")
36
  else:
37
  print("✅ BERT assets already exist.")
38
 
39
+ # Jalankan download aset sebelum import library lainnya
40
  download_bert_assets()
41
 
42
 
43
+ # --- BAGIAN 2: LOGIKA ASLI (TANPA UBAH TAMPILAN) ---
44
  import gradio as gr
45
  from style_bert_vits2.constants import (
46
  DEFAULT_LENGTH,
 
54
  from style_bert_vits2.nlp.japanese import pyopenjtalk_worker as pyopenjtalk
55
  from style_bert_vits2.tts_model import TTSModelHolder
56
 
57
+ # Inisialisasi worker
58
  pyopenjtalk.initialize_worker()
59
 
60
  example_file = "chupa_examples.txt"
 
62
  "ちゅぱ、ちゅるる、ぢゅ、んく、れーれゅれろれろれろ、じゅぽぽぽぽぽ……ちゅううう!"
63
  )
64
 
65
+ # Load examples
66
  if os.path.exists(example_file):
67
  with open(example_file, "r", encoding="utf-8") as f:
68
  examples = f.read().splitlines()
 
73
  return random.choice(examples)
74
 
75
  initial_md = """
76
+ # チュパ音合成デモ
77
+
78
+ 2024-07-07: initial ver
79
  """
80
 
81
  def make_interactive():
 
131
 
132
  end_time = datetime.datetime.now()
133
  duration = (end_time - start_time).total_seconds()
134
+
135
  message = f"Success, time: {duration} seconds."
136
  return message, (sr, audio)
137
 
 
140
 
141
  model_names = model_holder.model_names
142
  if len(model_names) == 0:
143
+ logger.error(f"モデルが見つかりませんでした。")
144
  with gr.Blocks() as app:
145
+ gr.Markdown(f"Error: モデルが見つかりませんでした。")
146
  return app
147
 
148
  initial_pth_files = get_model_files(model_names[0])
149
  model = model_holder.get_model(model_names[0], initial_pth_files[0])
150
  speakers = list(model.spk2id.keys())
151
 
152
+ # TAMPILAN ASLI (TIDAK DIUBAH)
153
  with gr.Blocks(theme="ParityError/Anime") as app:
154
  gr.Markdown(initial_md)
155
  with gr.Row():
 
176
  random_button.click(get_random_text, outputs=[text_input])
177
  with gr.Row():
178
  length_scale = gr.Slider(
179
+ minimum=0.1,
180
+ maximum=2,
181
+ value=DEFAULT_LENGTH,
182
+ step=0.1,
183
+ label="生成音声の長さ(Length)",
184
  )
185
  sdp_ratio = gr.Slider(
186
+ minimum=0,
187
+ maximum=1,
188
+ value=1,
189
+ step=0.1,
190
+ label="SDP Ratio",
191
  )
192
  line_split = gr.Checkbox(
193
+ label="改行で分けて生成",
194
  value=DEFAULT_LINE_SPLIT,
195
  visible=False,
196
  )
197
  split_interval = gr.Slider(
198
+ minimum=0.0,
199
+ maximum=2,
200
+ value=DEFAULT_SPLIT_INTERVAL,
201
+ step=0.1,
202
+ label="改行ごとに挟む無音の長さ(秒)",
203
+ )
204
+ language = gr.Dropdown(
205
+ choices=["JP"], value="JP", label="Language", visible=False
206
  )
 
207
  speaker = gr.Dropdown(label="話者", choices=speakers, value=speakers[0])
208
  with gr.Accordion(label="詳細設定", open=True):
209
+ noise_scale = gr.Slider(
210
+ minimum=0.1,
211
+ maximum=2,
212
+ value=DEFAULT_NOISE,
213
+ step=0.1,
214
+ label="Noise",
215
+ )
216
+ noise_scale_w = gr.Slider(
217
+ minimum=0.1,
218
+ maximum=2,
219
+ value=DEFAULT_NOISEW,
220
+ step=0.1,
221
+ label="Noise_W",
222
+ )
223
  with gr.Column():
224
  tts_button = gr.Button("音声合成", variant="primary")
225
  text_output = gr.Textbox(label="情報")
 
227
 
228
  tts_button.click(
229
  tts_fn,
230
+ inputs=[
231
+ model_name, model_path, text_input, language, sdp_ratio,
232
+ noise_scale, noise_scale_w, length_scale, line_split,
233
+ split_interval, speaker
234
+ ],
235
  outputs=[text_output, audio_output],
236
  )
237
 
238
+ model_name.change(
239
+ model_holder.update_model_files_for_gradio,
240
+ inputs=[model_name],
241
+ outputs=[model_path],
242
+ )
243
  model_path.change(make_non_interactive, outputs=[tts_button])
244
+ refresh_button.click(
245
+ model_holder.update_model_names_for_gradio,
246
+ outputs=[model_name, model_path, tts_button],
247
+ )
248
  style = gr.Dropdown(label="スタイル", choices=[], visible=False)
249
  load_button.click(
250
  model_holder.get_model_for_gradio,
 
254
 
255
  return app
256
 
257
+
258
  if __name__ == "__main__":
259
  import torch
260
  from style_bert_vits2.constants import Languages
261
  from style_bert_vits2.nlp import bert_models
262
 
 
263
  bert_models.load_model(Languages.JP)
264
  bert_models.load_tokenizer(Languages.JP)
265
 
266
  device = "cuda" if torch.cuda.is_available() else "cpu"
267
+ # Menggunakan folder model_assets yang sudah ada
268
  model_holder = TTSModelHolder(Path("model_assets"), device)
269
  app = create_inference_app(model_holder)
270
+ app.launch()