import os import json from fastapi.responses import HTMLResponse from gradio import Server from openai import OpenAI app = Server() @app.get("/") async def homepage(): html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html") if not os.path.exists(html_path): return HTMLResponse(content="

index.html not found

", status_code=404) with open(html_path, "r", encoding="utf-8") as f: return HTMLResponse(content=f.read(), status_code=200) @app.api(name="chat", stream_every=0.05) def chat(prompt: str, history_json: str, image_b64: str = "") -> str: """ Streaming chat — yields delta tokens via Gradio SSE queue. stream_every=0.05 means tokens are flushed every 50ms. """ api_key = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_TOKEN") or "" if not api_key: yield "[ERROR]: HF_TOKEN not configured. Add it to your Space secrets." return try: messages = json.loads(history_json) if history_json.strip() else [] except Exception: messages = [] # Build user content user_content = [] if image_b64 and image_b64.startswith("data:"): user_content.append({ "type": "image_url", "image_url": {"url": image_b64} }) user_content.append({"type": "text", "text": prompt}) messages.append({ "role": "user", "content": user_content if len(user_content) > 1 else prompt }) try: oai = OpenAI( base_url="https://router.huggingface.co/v1", api_key=api_key, default_headers={"X-HF-Bill-To": "huggingface"} ) stream = oai.chat.completions.create( model="moonshotai/Kimi-K2.7-Code:novita", messages=messages, stream=True, ) for chunk in stream: if chunk.choices and chunk.choices[0].delta.content: yield chunk.choices[0].delta.content except Exception as e: yield f"\n[ERROR]: {str(e)}" if __name__ == "__main__": app.launch()