NSW-CHUPA / app.py
Plana-Archive's picture
Update app.py
07a9b6e verified
raw
history blame
13.1 kB
import os
import shutil
import datetime
from pathlib import Path
import random
import torch
import gradio as gr
from huggingface_hub import snapshot_download
# --- BAGIAN 1: LOGIKA MODEL ---
from style_bert_vits2.constants import (
DEFAULT_LENGTH, DEFAULT_LINE_SPLIT, DEFAULT_NOISE,
DEFAULT_NOISEW, DEFAULT_SPLIT_INTERVAL, Languages
)
from style_bert_vits2.logging import logger
from style_bert_vits2.tts_model import TTSModelHolder
from style_bert_vits2.nlp import bert_models
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
def download_bert_assets():
REPO_ID = "Plana-Archive/Plana-TTS"
SOURCE_SUBFOLDER = "sbv2-chupa-demo/bert"
DEST_FOLDER = "./bert"
if not os.path.exists(DEST_FOLDER):
try:
temp_dir = snapshot_download(repo_id=REPO_ID, allow_patterns=[f"{SOURCE_SUBFOLDER}/**/*"])
src_path = os.path.join(temp_dir, SOURCE_SUBFOLDER)
if os.path.exists(src_path):
if os.path.exists(DEST_FOLDER): shutil.rmtree(DEST_FOLDER)
shutil.copytree(src_path, DEST_FOLDER)
except Exception: pass
download_bert_assets()
# --- BAGIAN 2: CSS CUSTOM (ESTETIK & CLEAN) ---
css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Quicksand:wght@400;600;700&display=swap');
body, .gradio-container { background-color: #ffffff !important; font-family: 'Inter', sans-serif !important; }
footer { display: none !important; }
.header-img-container { text-align: center; padding: 10px 0; }
.header-img { width: 100%; max-width: 500px; border-radius: 15px; margin: 0 auto; display: block; }
.status-card {
background: #ffffff; border: 2px solid #ffe4ec; border-radius: 14px;
padding: 15px 10px; margin: 0 auto 20px auto; max-width: 400px;
display: flex; flex-direction: column; align-items: center;
box-shadow: 0 4px 15px rgba(255, 105, 180, 0.1);
}
.side-bar-container { border-right: 3px solid #ff69b4 !important; padding-right: 25px !important; }
.pink-accordion { border: 1px solid #ffe4ec !important; border-radius: 10px !important; overflow: hidden; margin-top: 10px; }
.pink-accordion .label-wrap { background: linear-gradient(135deg, #ff69b4 0%, #ff1493 100%) !important; padding: 10px !important; }
.pink-accordion .label-wrap span { color: white !important; font-weight: 700 !important; }
.generate-btn {
background: linear-gradient(135deg, #ff69b4 0%, #ff1493 100%) !important;
color: white !important; border-radius: 12px !important; font-weight: 700 !important;
height: 50px !important; font-size: 1em !important; margin-top: 15px; border: none !important;
width: 100% !important;
}
.output-audio-box {
border: none !important; box-shadow: none !important; background: transparent !important;
padding: 1px 1px 8px 1px !important;
}
.output-audio-box > div {
border: 1px solid #ffe4ec !important; border-bottom: 1px solid #ffe4ec !important;
border-radius: 10px !important; background: #ffffff !important; overflow: hidden !important;
}
/* --- CHATBOT SIMULATION CSS --- */
.chat-container {
background: #ffffff; border: 2px solid #ffe4ec; border-radius: 15px;
padding: 20px; margin-top: 20px; box-shadow: 0 5px 15px rgba(255, 105, 180, 0.05);
}
.chat-header {
display: flex; justify-content: space-between; align-items: center;
border-bottom: 2px solid #fff0f7; padding-bottom: 10px; margin-bottom: 15px;
}
.chat-title { color: #ff69b4; font-weight: 800; font-size: 15px; }
.chat-status {
background: #e6fffa; color: #38b2ac; padding: 2px 10px;
border-radius: 20px; font-size: 10px; font-weight: 700; letter-spacing: 1px;
}
.chat-bubble-bot {
background: #fff5f8; border: 1px solid #ffe4ec; padding: 12px;
border-radius: 15px 15px 15px 0px; color: #7b4d5a; font-size: 13px; line-height: 1.6;
}
.chat-button-group { display: flex; flex-direction: column; gap: 6px; margin-top: 15px; }
.chat-btn-opt {
background: white !important; border: 1px solid #ffe4ec !important;
color: #7b4d5a !important; text-align: left !important; font-size: 12px !important;
padding: 8px 12px !important; min-height: unset !important; border-radius: 8px !important;
}
.chat-btn-opt:hover { border-color: #ff69b4 !important; color: #ff69b4 !important; background: #fffafc !important; }
.note-box {
background: #fffafa; padding: 20px; border-radius: 12px;
margin-top: 25px; border: 1px solid #ffe4ec;
}
.note-title { color: #ff69b4; font-weight: 800; font-size: 14px; margin-bottom: 12px; display: flex; align-items: center; gap: 8px; text-transform: uppercase; letter-spacing: 1px;}
.note-content { color: #7b4d5a; font-size: 13px; line-height: 1.6; }
.footer-container { text-align: center; margin-top: 50px; padding: 30px; border-top: 2px solid #ffe4ec; background: #fffafc; }
.dot-online { height: 8px; width: 8px; background-color: #ff69b4; border-radius: 50%; display: inline-block; animation: blink 1.5s infinite; }
@keyframes blink { 0% { opacity: 1; } 50% { opacity: 0.4; } 100% { opacity: 1; } }
"""
def get_random_text():
return random.choice([
"ใกใ‚…ใฑใ€ใกใ‚…ใ‚‹ใ‚‹ใ€ใขใ‚…ใ€ใ‚“ใใ€ใ‚Œใƒผใ‚Œใ‚…ใ‚Œใ‚ใ‚Œใ‚ใ‚Œใ‚ใ€ใ˜ใ‚…ใฝใฝใฝใฝใฝโ€ฆโ€ฆใกใ‚…ใ†ใ†ใ†๏ผ",
"ใ‚“ใฃโ€ฆโ€ฆใทใฏใฃโ€ฆโ€ฆใฏใโ€ฆโ€ฆใบใ‚ใฃใ€ใกใ‚…ใ€ใกใ‚…ใ†ใ…โ€ฆโ€ฆใ€‚",
"ใ‚ใ‚€ใฃใ€ใ‚“ใใฃใ€ใ‚“ใกใ‚…โ€ฆโ€ฆใฏใใ€ใ˜ใ‚…ใ‚‹ใฃโ€ฆโ€ฆใ€‚"
])
# --- BAGIAN 3: APLIKASI UTAMA ---
def create_inference_app(model_holder: TTSModelHolder) -> gr.Blocks:
model_names = model_holder.model_names
current_model_name = model_names[0] if model_names else None
initial_pth_files = [str(f) for f in model_holder.model_files_dict[current_model_name]] if current_model_name else []
current_model_path = initial_pth_files[0] if initial_pth_files else None
if current_model_name and current_model_path:
model_holder.get_model(current_model_name, current_model_path)
# Variabel pesan awal
welcome_msg = '''
<div class="chat-bubble-bot">
(๊ˆo๊ˆ๐ŸŒธ) Halo! Saya Plana Asisten.<br>
Saya bisa membantu hal ini :<br>
โ€ข Cara menggunakan TTS<br>
โ€ข Informasi bahasa<br>
โ€ข Efek input non-Jepang<br>
Klik tombol di bawah untuk melihat detail!
</div>
'''
def tts_fn(text, sdp_ratio, noise_scale, noise_scale_w, length_scale):
try:
sr, audio = model_holder.current_model.infer(
text=text, language="JP", sdp_ratio=sdp_ratio,
noise=noise_scale, noise_w=noise_scale_w, length=length_scale,
line_split=False, split_interval=0.5, speaker_id=0,
)
return "Generation Complete! โœ…", (sr, audio)
except Exception as e: return f"Error: {e}", None
def chat_respond(choice):
responses = {
1: '''<div class="chat-bubble-bot">
<b>Cara Menggunakan:</b><br>
1. Masukkan teks Jepang di input<br>
2. Settings jika perlu<br>
3. Atur kecepatan jika perlu<br>
4. Klik 'Generate Voice'<br>
5. dan hasil muncul!<br><br>
๐ŸŒฅ๏ธ <b>NOTES :</b> tidak di sarankan mengubah settingan kecuali bagian atur kecepatan aja.
</div>''',
2: '''<div class="chat-bubble-bot">
Model ini hanya support <b>bahasa Jepang</b> aja sementara lainnya tidak bisa.
</div>''',
3: '''<div class="chat-bubble-bot">
โš ๏ธ <b>Input Bukan Jepang</b> โš ๏ธ<br>
Karakter akan terdengar aneh karena model khusus bahasa Jepang.
</div>'''
}
return responses.get(choice, welcome_msg)
with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="pink")) as app:
gr.HTML('<div class="header-img-container"><img src="https://huggingface.co/spaces/Plana-Archive/Bocchi-the-Rock/resolve/main/Bocchi-the-Rock.PNG" class="header-img"></div>')
# STATUS CARD
gr.HTML('''
<div class="status-card">
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 12px;">
<span class="dot-online"></span><b style="color: #ff69b4;">Voice Conversion System Online</b>
</div>
<div style="display: flex; width: 100%; border-top: 1px solid #fff0f7; padding-top: 10px; justify-content: space-around;">
<div style="text-align:center;"><span style="font-size: 13px; font-weight: 600; color: #7b4d5a;">๐Ÿ‘ฅ 1 Students</span><br><span style="font-size: 11px; color: #b07d8b;">Ready</span></div>
<div style="text-align:center;"><span style="font-size: 13px; font-weight: 600; color: #7b4d5a;">๐Ÿ“Š Total Models</span><br><span style="font-size: 11px; color: #b07d8b;">Database: 1</span></div>
</div>
</div>
''')
with gr.Row():
# SIDEBAR
with gr.Column(scale=1, elem_classes="side-bar-container"):
gr.Markdown("### โš™๏ธ Audio Settings")
length_scale = gr.Slider(0.1, 2, value=1.0, step=0.1, label="Kecepatan")
sdp_ratio = gr.Slider(0, 1, value=0.2, step=0.1, label="SDP Ratio")
with gr.Accordion("๐Ÿ”ฎ Advanced Settings ๐Ÿ”ฎ", open=False, elem_classes="pink-accordion"):
noise_scale = gr.Slider(0.1, 2, value=0.6, step=0.1, label="Noise")
noise_scale_w = gr.Slider(0.1, 2, value=0.8, step=0.1, label="Noise_W")
# MAIN CONTENT
with gr.Column(scale=2):
with gr.Group():
text_input = gr.TextArea(label="Input Teks (JP)", value="ใกใ‚…ใฑใ€ใกใ‚…ใ‚‹ใ‚‹...", lines=10)
random_button = gr.Button("๐ŸŽฒ KLIK ACAK ISI TEXT ๐ŸŽฒ", variant="secondary")
tts_button = gr.Button("Generate Voice", variant="primary", elem_classes="generate-btn")
# --- CHATBOT AREA ---
with gr.Column(elem_classes="chat-container"):
gr.HTML('''
<div class="chat-header">
<span class="chat-title">Plana AI - Asisten</span>
<span class="chat-status">โ— CONNECTED</span>
</div>
''')
chat_display = gr.HTML(welcome_msg)
with gr.Column(elem_classes="chat-button-group"):
btn_opt1 = gr.Button("1. Cara pakai VITS ini", elem_classes="chat-btn-opt")
btn_opt2 = gr.Button("2. Support bahasa apa aja VITS ini", elem_classes="chat-btn-opt")
btn_opt3 = gr.Button("3. Apa yang terjadi jika inputs bukan Jepang", elem_classes="chat-btn-opt")
with gr.Row():
text_output = gr.Textbox(label="Status", interactive=False)
audio_output = gr.Audio(label="Output", interactive=False, elem_classes="output-audio-box")
gr.HTML('''
<div class="note-box">
<div class="note-title">๐Ÿ“‘ TENTANG FITUR SETTINGS ๐Ÿ“‘</div>
<div class="note-content">
<b>Kecepatan:</b> Mengatur tempo bicara (Default 1.0).<br>
<b>SDP Ratio:</b> Ritme bicara. Angka tinggi lebih ekspresif.<br>
<b>Noise:</b> Stabilitas suara. Kurangi jika suara pecah.<br>
<b>Noise_W:</b> Dinamika durasi antar kata.
</div>
</div>
''')
# FOOTER
gr.HTML('''
<div class="footer-container">
<div style="font-family: 'Quicksand', sans-serif; font-size: 14px; color: #b07d8b; font-weight: 700;">
๐Ÿ‘… NSW Anime โ€ข TTS ๐Ÿ‘…
</div>
<div style="font-size: 11px; color: #d0a4b0; margin-top: 5px;">By Mutsumi โ€ข Style-Bert-VITS2</div>
</div>
''')
# Event Handlers
random_button.click(get_random_text, outputs=[text_input])
tts_button.click(
tts_fn,
inputs=[text_input, sdp_ratio, noise_scale, noise_scale_w, length_scale],
outputs=[text_output, audio_output]
)
# Chatbot Handlers
btn_opt1.click(fn=lambda: chat_respond(1), outputs=chat_display)
btn_opt2.click(fn=lambda: chat_respond(2), outputs=chat_display)
btn_opt3.click(fn=lambda: chat_respond(3), outputs=chat_display)
return app
if __name__ == "__main__":
bert_models.load_model(Languages.JP)
bert_models.load_tokenizer(Languages.JP)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_holder = TTSModelHolder(Path("model_assets"), device)
create_inference_app(model_holder).launch()