miike-ai
/

DeepSeek-R1-Distill-Llama-70B-FP4

+---
+base_model:
+- deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+---
+```sh
+pip uninstall -y torch torchvision torchaudio
+pip install --pre torch torchvision torchaudio \
+  --index-url https://download.pytorch.org/whl/nightly/cu128
+export VLLM_VERSION=0.9.0
+pip install https://vllm-wheels.s3.us-west-2.amazonaws.com/nightly/vllm-${VLLM_VERSION}-cp38-abi3-manylinux1_x86_64.whl
+pip install hf_transfer
+pip install flashinfer-python
+pip install requests
+python3 -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 --model miike-ai/Deepseek-R1-Distill-Llama-70B-fp4
+```
+```python
+import requests
+import json
+import sys
+from typing import List, Dict
+class ChatSession:
+    def __init__(self, model: str = "miike-ai/Deepseek-R1-Distill-Llama-70B-fp4"):
+        self.url = "http://localhost:8000/v1/chat/completions"
+        self.model = model
+        self.messages: List[Dict[str, str]] = []
+        self.headers = {
+            "Content-Type": "application/json",
+            "Accept": "text/event-stream"  # For streaming support
+        }
+    def add_message(self, role: str, content: str):
+        self.messages.append({"role": role, "content": content})
+    def stream_response(self):
+        data = {
+            "model": self.model,
+            "messages": self.messages,
+            "temperature": 0.7,
+            "stream": True
+        }
+        try:
+            with requests.post(self.url, headers=self.headers, json=data, stream=True) as response:
+                if response.status_code != 200:
+                    print(f"\nError: API request failed with status code {response.status_code}")
+                    print("Response:", response.text)
+                    return
+                print("\nAssistant: ", end="", flush=True)
+                collected_content = []
+                for line in response.iter_lines():
+                    if line:
+                        try:
+                            line = line.decode('utf-8')
+                            if line.startswith('data: '):
+                                json_str = line[6:]  # Remove 'data: ' prefix
+                                if json_str.strip() == '[DONE]':
+                                    break
+                                try:
+                                    chunk = json.loads(json_str)
+                                    if content := chunk.get('choices', [{}])[0].get('delta', {}).get('content'):
+                                        print(content, end="", flush=True)
+                                        collected_content.append(content)
+                                except json.JSONDecodeError:
+                                    continue
+                        except Exception as e:
+                            print(f"\nError processing chunk: {str(e)}")
+                            continue
+                print()  # New line after response
+                full_content = "".join(collected_content)
+                if full_content:
+                    self.add_message("assistant", full_content)
+        except requests.exceptions.ConnectionError:
+            print("\nError: Could not connect to the API. Make sure the server is running on localhost:8000")
+        except Exception as e:
+            print(f"\nUnexpected error: {str(e)}")
+def run_chat_interface():
+    """
+    Run an interactive chat interface in the terminal
+    """
+    print("\nChat Interface for Local API Testing")
+    print("=====================================")
+    print("Endpoint: http://localhost:8000/v1/chat/completions")
+    print("Type 'exit' or 'quit' to end the chat")
+    print("Type 'clear' to start a new chat session")
+    print("----------------------------------------\n")
+    chat = ChatSession()
+    while True:
+        try:
+            user_input = input("User: ").strip()
+            if not user_input:
+                continue
+            if user_input.lower() in ['exit', 'quit']:
+                print("\nGoodbye!")
+                break
+            if user_input.lower() == 'clear':
+                chat = ChatSession()
+                print("\nStarted new chat session")
+                continue
+            chat.add_message("user", user_input)
+            chat.stream_response()
+        except KeyboardInterrupt:
+            print("\n\nGoodbye!")
+            break
+        except EOFError:
+            print("\nGoodbye!")
+            break
+if __name__ == "__main__":
+    run_chat_interface()
+```