Spaces:

abePclWaseda
/

J-Moshi-Arena

Paused

App Files Files Community

yutoAb commited on Jan 1

Commit

4ee8d5c

1 Parent(s): b2eed5f

Add application file

Browse files

Files changed (4) hide show

Dockerfile +39 -0
README.md +59 -1
app.py +201 -0
requirements.txt +2 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,39 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Triton/Inductor の JIT に必要（gcc等）
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy application files
+COPY requirements.txt .
+COPY app.py .
+RUN pip install --no-cache-dir uv
+RUN uv pip install --system --no-cache-dir -r requirements.txt
+# 安定化
+ENV OMP_NUM_THREADS=1
+ENV MKL_NUM_THREADS=1
+ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+# ✅ ログが止まって見える問題を解消
+ENV PYTHONUNBUFFERED=1
+# ✅ 起動を軽くする（コンパイル無効）
+ENV TORCHDYNAMO_DISABLE=1
+ENV TORCHINDUCTOR_DISABLE=1
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+# Expose ports for:
+# - 7860: Main Gradio interface
+# - 8998: Moshi server 1 (finetuned-step-9282)
+# - 8999: Moshi server 2 (j-moshi)
+EXPOSE 7860 8998 8999
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -6,6 +6,64 @@ colorTo: indigo
 sdk: docker
 pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk: docker
 pinned: false
 license: apache-2.0
+short_description: Compare two Moshi models with tabbed interface
+app_port: 7860
 ---
+# 🎙️ Dual Moshi Model Interface
+Compare two Moshi voice AI models side-by-side with a tabbed interface.
+Each model runs with full WebRTC support for real-time voice conversation.
+## Available Models
+### 1. Finetuned Step 9282
+- **Repository**: `abePclWaseda/moshi-finetuned-step-9282`
+- Fine-tuned Moshi model at training step 9282
+- Optimized for specific use cases
+- **Port**: 8998
+### 2. J-Moshi (Japanese)
+- **Repository**: `nu-dialogue/j-moshi`
+- Japanese-optimized full-duplex spoken dialogue system
+- Built on Moshi 7B with additional Japanese training data
+- Supports natural turn-taking and backchannel responses (相槌)
+- **Port**: 8999
+## Features
+- **Tabbed Interface**: Switch between models using tabs
+- **Full WebRTC Support**: Complete `moshi.server` UI for each model
+- **Dual Model Execution**: Both models run simultaneously
+- **Real-time Voice**: Full-duplex conversation with microphone input
+- **GPU Optimized**: Designed for 48GB+ GPU environments
+## Architecture
+The application runs:
+1. **Main Gradio Interface** (Port 7860) - Tabbed UI for model selection
+2. **Moshi Server 1** (Port 8998) - Finetuned Step 9282 model
+3. **Moshi Server 2** (Port 8999) - J-Moshi model
+Each tab embeds the complete `moshi.server` interface with WebRTC support.
+## Requirements
+- **GPU**: A100 (48GB) or equivalent recommended
+- **Memory**: ~48GB GPU VRAM (24GB per model running simultaneously)
+- **Docker**: Containerized deployment
+## Usage
+1. Open the application (port 7860)
+2. Click on a tab to select which model to use
+3. Click inside the embedded interface to interact with Moshi
+4. Use the microphone button to start voice conversation
+5. Switch tabs to compare different models
+## Technical Details
+- **Framework**: Gradio + moshi.server
+- **Models**: Moshi (7B parameters each)
+- **Codec**: MIMI audio codec
+- **Ports**: 7860 (main), 8998, 8999 (model servers)

app.py ADDED Viewed

	@@ -0,0 +1,201 @@

+#!/usr/bin/env python3
+"""
+Dual Moshi Model Interface
+Runs two moshi.server instances and provides a tabbed interface for comparison
+"""
+import logging
+import multiprocessing
+import os
+import subprocess
+import sys
+import time
+from typing import List
+import gradio as gr
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Model configurations
+MODELS = [
+    {
+        "name": "Finetuned Step 9282",
+        "repo": "abePclWaseda/moshi-finetuned-step-9282",
+        "port": 8998,
+        "description": "Fine-tuned Moshi model at training step 9282"
+    },
+    {
+        "name": "J-Moshi (Japanese)",
+        "repo": "nu-dialogue/j-moshi",
+        "port": 8999,
+        "description": "Japanese-optimized full-duplex dialogue system"
+    }
+]
+def start_moshi_server(model_repo: str, port: int, model_name: str):
+    """Start a moshi.server instance for a specific model"""
+    logger.info(f"Starting {model_name} on port {port}...")
+    logger.info(f"  Repository: {model_repo}")
+    try:
+        # Run moshi.server with specified model and port
+        cmd = [
+            "uv", "run", "-m", "moshi.server",
+            "--hf-repo", model_repo,
+            "--host", "0.0.0.0",
+            "--port", str(port)
+        ]
+        logger.info(f"  Command: {' '.join(cmd)}")
+        # Start the process
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            universal_newlines=True,
+            bufsize=1
+        )
+        # Log output
+        for line in process.stdout:
+            logger.info(f"  [{model_name}:{port}] {line.rstrip()}")
+        return_code = process.wait()
+        if return_code != 0:
+            logger.error(f"{model_name} exited with code {return_code}")
+    except Exception as e:
+        logger.error(f"Failed to start {model_name}: {e}")
+        raise
+def create_gradio_interface() -> gr.Blocks:
+    """Create the Gradio tabbed interface"""
+    with gr.Blocks(
+        title="Dual Moshi Model Interface",
+        theme=gr.themes.Soft(),
+        css="""
+        .iframe-container {
+            width: 100%;
+            height: 800px;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            overflow: hidden;
+        }
+        .iframe-container iframe {
+            width: 100%;
+            height: 100%;
+            border: none;
+        }
+        """
+    ) as demo:
+        gr.Markdown("""
+        # 🎙️ Dual Moshi Model Interface
+        Compare two Moshi voice AI models side-by-side. Each tab shows the full WebRTC interface
+        for a different model. Switch tabs to compare responses.
+        """)
+        with gr.Tabs() as tabs:
+            for model in MODELS:
+                with gr.TabItem(f"🎤 {model['name']}"):
+                    gr.Markdown(f"""
+                    ### {model['name']}
+                    **Repository**: `{model['repo']}`
+                    {model['description']}
+                    ---
+                    """)
+                    # Embed moshi.server UI in iframe
+                    gr.HTML(f"""
+                    <div class="iframe-container">
+                        <iframe src="http://localhost:{model['port']}"
+                                allow="microphone; camera"
+                                title="{model['name']} Interface">
+                        </iframe>
+                    </div>
+                    """)
+                    gr.Markdown(f"""
+                    ---
+                    💡 **Tip**: Click inside the frame to interact with the Moshi interface.
+                    The server is running on port {model['port']}.
+                    """)
+        gr.Markdown("""
+        ---
+        ### 使い方
+        1. **タブを選択**: 上のタブで使用したいモデルを選択
+        2. **音声入力**: 各タブ内のMoshi UIでマイクボタンをクリック
+        3. **比較**: タブを切り替えて異なるモデルの応答を比較
+        ### システム情報
+        - 両方のモデルが独立したプロセスで同時に実行中
+        - 各モデルは専用のポートで動作（8998, 8999）
+        - フルWebRTC対応の音声ストリーミング
+        """)
+    return demo
+def main():
+    """Main entry point"""
+    logger.info("="*60)
+    logger.info("Dual Moshi Model Interface Starting...")
+    logger.info("="*60)
+    # Start moshi.server instances in separate processes
+    processes: List[multiprocessing.Process] = []
+    for model in MODELS:
+        logger.info(f"Launching process for {model['name']}...")
+        process = multiprocessing.Process(
+            target=start_moshi_server,
+            args=(model['repo'], model['port'], model['name']),
+            daemon=True
+        )
+        process.start()
+        processes.append(process)
+        logger.info(f"  Process started: PID {process.pid}")
+    # Wait a bit for servers to start
+    logger.info("Waiting for moshi.server instances to initialize...")
+    time.sleep(10)
+    # Create and launch the main Gradio interface
+    logger.info("Creating Gradio interface...")
+    demo = create_gradio_interface()
+    # Launch on a different port (main interface)
+    main_port = 7860
+    logger.info(f"Launching main interface on port {main_port}...")
+    try:
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=main_port,
+            share=False
+        )
+    except KeyboardInterrupt:
+        logger.info("Shutting down...")
+        for process in processes:
+            process.terminate()
+        logger.info("All processes terminated")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ moshi>=0.2.0,<=0.2.2
2	+ gradio>=4.0.0