import os import shutil import datetime from pathlib import Path import random import torch import gradio as gr from huggingface_hub import snapshot_download # --- BAGIAN 1: LOGIKA MODEL --- from style_bert_vits2.constants import ( DEFAULT_LENGTH, DEFAULT_LINE_SPLIT, DEFAULT_NOISE, DEFAULT_NOISEW, DEFAULT_SPLIT_INTERVAL, Languages ) from style_bert_vits2.logging import logger from style_bert_vits2.tts_model import TTSModelHolder from style_bert_vits2.nlp import bert_models os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" def download_bert_assets(): REPO_ID = "Plana-Archive/Plana-TTS" SOURCE_SUBFOLDER = "sbv2-chupa-demo/bert" DEST_FOLDER = "./bert" if not os.path.exists(DEST_FOLDER): try: temp_dir = snapshot_download(repo_id=REPO_ID, allow_patterns=[f"{SOURCE_SUBFOLDER}/**/*"]) src_path = os.path.join(temp_dir, SOURCE_SUBFOLDER) if os.path.exists(src_path): if os.path.exists(DEST_FOLDER): shutil.rmtree(DEST_FOLDER) shutil.copytree(src_path, DEST_FOLDER) except Exception: pass download_bert_assets() # --- BAGIAN 2: CSS CUSTOM (ESTETIK & CLEAN) --- css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Quicksand:wght@400;600;700&display=swap'); body, .gradio-container { background-color: #ffffff !important; font-family: 'Inter', sans-serif !important; } footer { display: none !important; } .header-img-container { text-align: center; padding: 10px 0; } .header-img { width: 100%; max-width: 500px; border-radius: 15px; margin: 0 auto; display: block; } .status-card { background: #ffffff; border: 2px solid #ffe4ec; border-radius: 14px; padding: 15px 10px; margin: 0 auto 20px auto; max-width: 400px; display: flex; flex-direction: column; align-items: center; box-shadow: 0 4px 15px rgba(255, 105, 180, 0.1); } .side-bar-container { border-right: 3px solid #ff69b4 !important; padding-right: 25px !important; } .pink-accordion { border: 1px solid #ffe4ec !important; border-radius: 10px !important; overflow: hidden; margin-top: 10px; } .pink-accordion .label-wrap { background: linear-gradient(135deg, #ff69b4 0%, #ff1493 100%) !important; padding: 10px !important; } .pink-accordion .label-wrap span { color: white !important; font-weight: 700 !important; } .generate-btn { background: linear-gradient(135deg, #ff69b4 0%, #ff1493 100%) !important; color: white !important; border-radius: 12px !important; font-weight: 700 !important; height: 50px !important; font-size: 1em !important; margin-top: 15px; border: none !important; width: 100% !important; } .output-audio-box { border: none !important; box-shadow: none !important; background: transparent !important; padding: 1px 1px 8px 1px !important; } .output-audio-box > div { border: 1px solid #ffe4ec !important; border-bottom: 1px solid #ffe4ec !important; border-radius: 10px !important; background: #ffffff !important; overflow: hidden !important; } /* --- CHATBOT SIMULATION CSS --- */ .chat-container { background: #ffffff; border: 2px solid #ffe4ec; border-radius: 15px; padding: 20px; margin-top: 20px; box-shadow: 0 5px 15px rgba(255, 105, 180, 0.05); } .chat-header { display: flex; justify-content: space-between; align-items: center; border-bottom: 2px solid #fff0f7; padding-bottom: 10px; margin-bottom: 15px; } .chat-title { color: #ff69b4; font-weight: 800; font-size: 15px; } .chat-status { background: #e6fffa; color: #38b2ac; padding: 2px 10px; border-radius: 20px; font-size: 10px; font-weight: 700; letter-spacing: 1px; } .chat-bubble-bot { background: #fff5f8; border: 1px solid #ffe4ec; padding: 12px; border-radius: 15px 15px 15px 0px; color: #7b4d5a; font-size: 13px; line-height: 1.6; } .chat-button-group { display: flex; flex-direction: column; gap: 6px; margin-top: 15px; } .chat-btn-opt { background: white !important; border: 1px solid #ffe4ec !important; color: #7b4d5a !important; text-align: left !important; font-size: 12px !important; padding: 8px 12px !important; min-height: unset !important; border-radius: 8px !important; } .chat-btn-opt:hover { border-color: #ff69b4 !important; color: #ff69b4 !important; background: #fffafc !important; } .note-box { background: #fffafa; padding: 20px; border-radius: 12px; margin-top: 25px; border: 1px solid #ffe4ec; } .note-title { color: #ff69b4; font-weight: 800; font-size: 14px; margin-bottom: 12px; display: flex; align-items: center; gap: 8px; text-transform: uppercase; letter-spacing: 1px;} .note-content { color: #7b4d5a; font-size: 13px; line-height: 1.6; } .footer-container { text-align: center; margin-top: 50px; padding: 30px; border-top: 2px solid #ffe4ec; background: #fffafc; } .dot-online { height: 8px; width: 8px; background-color: #ff69b4; border-radius: 50%; display: inline-block; animation: blink 1.5s infinite; } @keyframes blink { 0% { opacity: 1; } 50% { opacity: 0.4; } 100% { opacity: 1; } } """ def get_random_text(): return random.choice([ "ちゅぱ、ちゅるる、ぢゅ、んく、れーれゅれろれろれろ、じゅぽぽぽぽぽ……ちゅううう!", "んっ……ぷはっ……はぁ……ぺろっ、ちゅ、ちゅうぅ……。", "あむっ、んぐっ、んちゅ……はぁ、じゅるっ……。" ]) # --- BAGIAN 3: APLIKASI UTAMA --- def create_inference_app(model_holder: TTSModelHolder) -> gr.Blocks: model_names = model_holder.model_names current_model_name = model_names[0] if model_names else None initial_pth_files = [str(f) for f in model_holder.model_files_dict[current_model_name]] if current_model_name else [] current_model_path = initial_pth_files[0] if initial_pth_files else None if current_model_name and current_model_path: model_holder.get_model(current_model_name, current_model_path) # Variabel pesan awal welcome_msg = '''
(ꈍoꈍ🌸) Halo! Saya Plana Asisten.
Saya bisa membantu hal ini :
• Cara menggunakan TTS
• Informasi bahasa
• Efek input non-Jepang
Klik tombol di bawah untuk melihat detail!
''' def tts_fn(text, sdp_ratio, noise_scale, noise_scale_w, length_scale): try: sr, audio = model_holder.current_model.infer( text=text, language="JP", sdp_ratio=sdp_ratio, noise=noise_scale, noise_w=noise_scale_w, length=length_scale, line_split=False, split_interval=0.5, speaker_id=0, ) return "Generation Complete! ✅", (sr, audio) except Exception as e: return f"Error: {e}", None def chat_respond(choice): responses = { 1: '''
Cara Menggunakan:
1. Masukkan teks Jepang di input
2. Settings jika perlu
3. Atur kecepatan jika perlu
4. Klik 'Generate Voice'
5. dan hasil muncul!

🌥️ NOTES : tidak di sarankan mengubah settingan kecuali bagian atur kecepatan aja.
''', 2: '''
Model ini hanya support bahasa Jepang aja sementara lainnya tidak bisa.
''', 3: '''
⚠️ Input Bukan Jepang ⚠️
Karakter akan terdengar aneh karena model khusus bahasa Jepang.
''' } return responses.get(choice, welcome_msg) with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="pink")) as app: gr.HTML('
') # STATUS CARD gr.HTML('''
Voice Conversion System Online
👥 1 Students
Ready
📊 Total Models
Database: 1
''') with gr.Row(): # SIDEBAR with gr.Column(scale=1, elem_classes="side-bar-container"): gr.Markdown("### ⚙️ Audio Settings") length_scale = gr.Slider(0.1, 2, value=1.0, step=0.1, label="Kecepatan") sdp_ratio = gr.Slider(0, 1, value=0.2, step=0.1, label="SDP Ratio") with gr.Accordion("🔮 Advanced Settings 🔮", open=False, elem_classes="pink-accordion"): noise_scale = gr.Slider(0.1, 2, value=0.6, step=0.1, label="Noise") noise_scale_w = gr.Slider(0.1, 2, value=0.8, step=0.1, label="Noise_W") # MAIN CONTENT with gr.Column(scale=2): with gr.Group(): text_input = gr.TextArea(label="Input Teks (JP)", value="ちゅぱ、ちゅるる...", lines=10) random_button = gr.Button("🎲 KLIK ACAK ISI TEXT 🎲", variant="secondary") tts_button = gr.Button("Generate Voice", variant="primary", elem_classes="generate-btn") # --- CHATBOT AREA --- with gr.Column(elem_classes="chat-container"): gr.HTML('''
Plana AI - Asisten ● CONNECTED
''') chat_display = gr.HTML(welcome_msg) with gr.Column(elem_classes="chat-button-group"): btn_opt1 = gr.Button("1. Cara pakai VITS ini", elem_classes="chat-btn-opt") btn_opt2 = gr.Button("2. Support bahasa apa aja VITS ini", elem_classes="chat-btn-opt") btn_opt3 = gr.Button("3. Apa yang terjadi jika inputs bukan Jepang", elem_classes="chat-btn-opt") with gr.Row(): text_output = gr.Textbox(label="Status", interactive=False) audio_output = gr.Audio(label="Output", interactive=False, elem_classes="output-audio-box") gr.HTML('''
📑 TENTANG FITUR SETTINGS 📑
Kecepatan: Mengatur tempo bicara (Default 1.0).
SDP Ratio: Ritme bicara. Angka tinggi lebih ekspresif.
Noise: Stabilitas suara. Kurangi jika suara pecah.
Noise_W: Dinamika durasi antar kata.
''') # FOOTER gr.HTML(''' ''') # Event Handlers random_button.click(get_random_text, outputs=[text_input]) tts_button.click( tts_fn, inputs=[text_input, sdp_ratio, noise_scale, noise_scale_w, length_scale], outputs=[text_output, audio_output] ) # Chatbot Handlers btn_opt1.click(fn=lambda: chat_respond(1), outputs=chat_display) btn_opt2.click(fn=lambda: chat_respond(2), outputs=chat_display) btn_opt3.click(fn=lambda: chat_respond(3), outputs=chat_display) return app if __name__ == "__main__": bert_models.load_model(Languages.JP) bert_models.load_tokenizer(Languages.JP) device = "cuda" if torch.cuda.is_available() else "cpu" model_holder = TTSModelHolder(Path("model_assets"), device) create_inference_app(model_holder).launch()