Kimi-K2.7-Code / app.py
akhaliq's picture
akhaliq HF Staff
Switch provider from :fastest to :novita
db0634f
import os
import json
from fastapi.responses import HTMLResponse
from gradio import Server
from openai import OpenAI
app = Server()
@app.get("/")
async def homepage():
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
if not os.path.exists(html_path):
return HTMLResponse(content="<h1>index.html not found</h1>", status_code=404)
with open(html_path, "r", encoding="utf-8") as f:
return HTMLResponse(content=f.read(), status_code=200)
@app.api(name="chat", stream_every=0.05)
def chat(prompt: str, history_json: str, image_b64: str = "") -> str:
"""
Streaming chat — yields delta tokens via Gradio SSE queue.
stream_every=0.05 means tokens are flushed every 50ms.
"""
api_key = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_TOKEN") or ""
if not api_key:
yield "[ERROR]: HF_TOKEN not configured. Add it to your Space secrets."
return
try:
messages = json.loads(history_json) if history_json.strip() else []
except Exception:
messages = []
# Build user content
user_content = []
if image_b64 and image_b64.startswith("data:"):
user_content.append({
"type": "image_url",
"image_url": {"url": image_b64}
})
user_content.append({"type": "text", "text": prompt})
messages.append({
"role": "user",
"content": user_content if len(user_content) > 1 else prompt
})
try:
oai = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=api_key,
default_headers={"X-HF-Bill-To": "huggingface"}
)
stream = oai.chat.completions.create(
model="moonshotai/Kimi-K2.7-Code:novita",
messages=messages,
stream=True,
)
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as e:
yield f"\n[ERROR]: {str(e)}"
if __name__ == "__main__":
app.launch()