Spaces:

Liangjiejie
/

cicishi

Configuration error

App Files Files Community

Liangjiejie commited on Nov 11, 2025

Commit

d8c96d3

verified ·

1 Parent(s): 724b57f

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -103

app.py CHANGED Viewed

@@ -1,121 +1,176 @@
 import gradio as gr
-import torch
-import scipy.io.wavfile as wav
-import numpy as np
 import os
-import logging
-from pypinyin import Style, pinyin
-import jieba
-# ----------------------------------------------------
-# ❗ 注意：为了运行此代码，您需要将 BERT-VITS2 的
-# 推理逻辑（如 text_to_sequence 和 g2p 模块）
-# 复制到您的 Space 中。由于无法直接提供，这里假设
-# 您已手动下载并添加到名为 'custom_modules' 的文件夹。
-# ----------------------------------------------------
-# 配置日志
-logging.basicConfig(level=logging.INFO)
-# --- 1. 模型初始化 ---
-# 警告：此 ID 仅用于占位，您需要手动上传 BERT-VITS2 的
-# .pth 权重文件和 config.json 文件到您的 Space 中。
-# 这里无法使用 from_pretrained 方法！
-# **您需要手动上传的模型文件到 Space 根目录**
-MODEL_NAME = "model_best"
-MODEL_PATH = f"logs/44k/{MODEL_NAME}.pth"
-CONFIG_PATH = "configs/bert_vits2_ljs_44k.json"
-# 假设模型和配置已经存在（需要您手动上传）
 try:
-    # 模拟加载 BERT-VITS2 所需的定制化组件
-    # 这一步在实际部署中将失败，因为缺少 BERT-VITS2 库文件
-    # 真正的 BERT-VITS2 部署需要将整个推理代码克隆下来
-    # 暂时使用占位符，如果模型加载失败，则退回到纯文本状态
-    class DummyModel:
-        def __init__(self):
-            self.config = type('Config', (object,), {'sampling_rate': 44100})
-            logging.error("使用占位模型！请手动上传 BERT-VITS2 文件。")
-        def generate(self, *args, **kwargs):
-            # 模拟生成一个 2 秒的静音音频
-            sample_rate = 44100
-            duration = 2
-            t = np.linspace(0, duration, int(sample_rate * duration), False)
-            audio = 0.5 * np.sin(2 * np.pi * 440 * t)
-            # 模拟模型输出
-            class Output:
-                def __init__(self, waveform):
-                    self.waveform = torch.tensor(waveform).unsqueeze(0).float()
-            return Output(audio)
-    model = DummyModel()
-    tokenizer = None
-    device = "cpu"
-    logging.info("模型加载状态：当前为 BERT-VITS2 占位模式。")
 except Exception as e:
-    logging.error(f"BERT-VITS2 复杂加载失败，错误: {e}")
     model = None
-    tokenizer = None
-# --- 2. 核心 TTS 函数 (使用拼音和分词) ---
-def generate_speech(text):
-    if not text or model is None:
-        return None, "❌ 模型未加载或文本为空。请检查 Logs 并上传 BERT-VITS2 文件。"
     try:
-        # 1. 中文分词和预处理 (BERT-VITS2需要复杂的音素处理)
-        processed_text = " ".join(jieba.cut(text))
-        logging.info(f"使用 BERT-VITS2 预处理文本: {processed_text[:60]}...")
-        # 2. 调用模型生成 (此处是占位符，实际应调用 BERT-VITS2.synthesize)
-        output = model.generate(text=processed_text)
-        # 3. 提取音频波形
-        speech = output.waveform.squeeze(0).cpu().numpy()
-        sampling_rate = model.config.sampling_rate
-        # 4. 保存为临时的WAV文件
-        output_file_path = "output_speech.wav"
-        speech_int16 = (speech * 32767).astype(np.int16)
-        wav.write(output_file_path, sampling_rate, speech_int16)
-        logging.info("语音生成占位成功。")
-        return output_file_path, f"⚠️ 占位模型成功运行。请上传 BERT-VITS2 文件以获得真人效果。"
     except Exception as e:
-        logging.error(f"语音生成过程中发生错误: {e}")
-        return None, f"❌ 语音生成过程中发生错误：{e}"
-# --- 3. Gradio 界面 (保持不变) ---
-with gr.Blocks() as demo:
-    gr.Markdown("# 🏆 BERT-VITS2 中文 TTS 部署 (需手动上传文件)")
-    gr.Markdown("**注意：这是目前开源领域效果最好的模型架构，但需要您手动上传模型权重文件！**")
-    gr.Markdown(f"当前模型状态：{'已加载占位模型' if model else '模型加载失败'}")
-    # 定义所有组件变量
-    text_input = gr.Textbox(lines=5, label="输入中文文本", placeholder="你好，这是 BERT-VITS2 部署。效果应该非常自然！")
-    audio_output = gr.Audio(label="生成的语音", type="filepath")
-    status_text = gr.Textbox(label="状态信息", interactive=False)
-    generate_btn = gr.Button("🚀 开始生成语音")
-    # 定义布局
     with gr.Row():
-        gr.Column(text_input, scale=3)
-        with gr.Column(scale=2):
-            audio_output
-            status_text
-            generate_btn
-    # 绑定按钮和函数
-    generate_btn.click(fn=generate_speech,
-                       inputs=text_input,
-                       outputs=[audio_output, status_text])
-demo.launch()

 import gradio as gr
+import requests
 import os
+import json
+from tqdm import tqdm
+from transformers import AutoTokenizer, AutoModelForTextToSpeech, AutoProcessor, AutoConfig
+from transformers.utils import is_flash_attn_available
+# 仓库信息
+REPO_ID = "BricksDisplay/ellie-Bert-VITS2"
+FILES_TO_DOWNLOAD = [
+    "model.safetensors",
+    "config.json",
+    "configuration_bert_vits2.py",
+    "modeling_bert_vits2.py",
+    "preprocessor_config.json",
+    "processing_bert_vits2.py",
+    "processor_config.json",
+    "special_tokens_map.json",
+    "tokenizer_bert_vits2.py",
+    "tokenizer.json",
+    "tokenizer_config.json",
+    # 文件夹内容
+    "bert_zh/config.json",
+    "bert_zh/tokenizer_config.json",
+    "bert_zh/vocab.txt",
+    "data/pinyin.json",
+    "data/symbols.json",
+    "onnx/config.json",
+    "onnx/model_index.json",
+    "onnx/tokenizer_config.json",
+]
+def download_file(file_path):
+    """从 Hugging Face CDN 下载文件到本地。"""
+    if os.path.exists(file_path):
+        print(f"文件已存在: {file_path}")
+        return
+    # 构造下载链接
+    url = f"https://huggingface.co/{REPO_ID}/resolve/main/{file_path}"
+    # 确保文件夹存在
+    os.makedirs(os.path.dirname(file_path) or '.', exist_ok=True)
+    print(f"正在下载: {file_path}")
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status() # 检查是否有 HTTP 错误
+        total_size = int(response.headers.get('content-length', 0))
+        block_size = 1024 # 1 Kibibyte
+        with open(file_path, 'wb') as f, tqdm(
+            desc=file_path,
+            total=total_size,
+            unit='iB',
+            unit_scale=True,
+            unit_divisor=1024,
+        ) as bar:
+            for data in response.iter_content(block_size):
+                bar.update(len(data))
+                f.write(data)
+        print(f"下载完成: {file_path}")
+    except Exception as e:
+        print(f"下载 {file_path} 失败: {e}")
+        # 如果是文件夹路径，确保创建它
+        if file_path.endswith('/') or '.' not in file_path.split('/')[-1]:
+            os.makedirs(file_path, exist_ok=True)
+        else:
+             raise Exception(f"无法下载文件: {file_path}")
+def ensure_files_exist():
+    """检查并下载所有必需的文件。"""
+    print("开始检查模型文件...")
+    for file in FILES_TO_DOWNLOAD:
+        download_file(file)
+# 确保文件存在
+ensure_files_exist()
+# --- 模型加载和推理 ---
 try:
+    # 尝试使用 flash_attn, 仅在支持时
+    attn_implementation = "flash_attention_2" if is_flash_attn_available() else "eager"
+    # 由于文件已下载到本地，直接从本地加载（路径为 '.'）
+    model = AutoModelForTextToSpeech.from_pretrained(
+        ".",
+        attn_implementation=attn_implementation,
+        device_map="auto" # 自动映射到 CPU (您的 Space 硬件)
+    )
+    processor = AutoProcessor.from_pretrained(".")
+    # 设定默认说话人ID
+    DEFAULT_SPEAKER_ID = 0
+    print("模型加载成功！")
 except Exception as e:
+    print(f"模型加载失败，请检查下载的文件是否完整: {e}")
+    # 设置一个假模型以避免 Gradio 启动失败
     model = None
+    processor = None
+    DEFAULT_SPEAKER_ID = 0
+def tts_generate(text):
+    """
+    文本转语音生成函数
+    """
+    if model is None or processor is None:
+        return None, "模型未加载成功，请检查日志或上传文件。"
+    if not text:
+        return None, "请输入中文文本。"
+    # 将文本和说话人ID传入处理器
+    inputs = processor(
+        text=text,
+        speaker_id=DEFAULT_SPEAKER_ID,
+        return_tensors="pt"
+    )
+    # 移到模型所在设备
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
     try:
+        # 生成语音
+        with torch.no_grad():
+            output = model.generate(**inputs, do_sample=True, top_k=50, top_p=0.9, temperature=0.7)
+        # 获取采样率
+        sampling_rate = model.config.sampling_rate
+        return (sampling_rate, output.squeeze().cpu().numpy()), "成功！"
     except Exception as e:
+        return None, f"生成语音失败: {e}"
+# --- Gradio 界面 ---
+import torch
+# 使用 torch 库作为依赖
+torch_version = torch.__version__
+title = "免费中文文本转语音演示 (BERT-VITS2 模型)"
+description = f"""
+**当前模型:** BricksDisplay/ellie-Bert-VITS2 (目前中文语音效果最好的开源模型之一)
+- **注意:** 由于您使用的是免费的 CPU Basic 硬件，模型较大（1.59 GB），语音生成会有较长延迟，这是正常的。
+- **状态:** 模型和文件已在启动时自动下载并加载。
+- **PyTorch 版本:** {torch_version}
+"""
+with gr.Blocks() as demo:
+    gr.Markdown(f"# {title}")
+    gr.Markdown(description)
+    with gr.Row():
+        text_input = gr.Textbox(label="输入中文文本", placeholder="请输入您想合成的中文语句...")
     with gr.Row():
+        generate_button = gr.Button("🚀 开始生成语音")
+    with gr.Row():
+        audio_output = gr.Audio(label="生成的语音", type="numpy")
+        status_text = gr.Textbox(label="状态信息", value="模型等待输入...")
+    generate_button.click(
+        fn=tts_generate,
+        inputs=[text_input],
+        outputs=[audio_output, status_text]
+    )
+# 启动 Gradio
+demo.queue().launch()