miike-ai commited on
Commit
df832be
·
verified ·
1 Parent(s): c63afa7

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +128 -0
README.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - deepseek-ai/DeepSeek-R1-Distill-Llama-70B
4
+ ---
5
+
6
+ ```sh
7
+ pip uninstall -y torch torchvision torchaudio
8
+ pip install --pre torch torchvision torchaudio \
9
+ --index-url https://download.pytorch.org/whl/nightly/cu128
10
+
11
+ export VLLM_VERSION=0.9.0
12
+ pip install https://vllm-wheels.s3.us-west-2.amazonaws.com/nightly/vllm-${VLLM_VERSION}-cp38-abi3-manylinux1_x86_64.whl
13
+
14
+ pip install hf_transfer
15
+ pip install flashinfer-python
16
+ pip install requests
17
+
18
+ python3 -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 --model miike-ai/Deepseek-R1-Distill-Llama-70B-fp4
19
+ ```
20
+
21
+ ```python
22
+ import requests
23
+ import json
24
+ import sys
25
+ from typing import List, Dict
26
+
27
+ class ChatSession:
28
+ def __init__(self, model: str = "miike-ai/Deepseek-R1-Distill-Llama-70B-fp4"):
29
+ self.url = "http://localhost:8000/v1/chat/completions"
30
+ self.model = model
31
+ self.messages: List[Dict[str, str]] = []
32
+ self.headers = {
33
+ "Content-Type": "application/json",
34
+ "Accept": "text/event-stream" # For streaming support
35
+ }
36
+
37
+ def add_message(self, role: str, content: str):
38
+ self.messages.append({"role": role, "content": content})
39
+
40
+ def stream_response(self):
41
+ data = {
42
+ "model": self.model,
43
+ "messages": self.messages,
44
+ "temperature": 0.7,
45
+ "stream": True
46
+ }
47
+
48
+ try:
49
+ with requests.post(self.url, headers=self.headers, json=data, stream=True) as response:
50
+ if response.status_code != 200:
51
+ print(f"\nError: API request failed with status code {response.status_code}")
52
+ print("Response:", response.text)
53
+ return
54
+
55
+ print("\nAssistant: ", end="", flush=True)
56
+ collected_content = []
57
+
58
+ for line in response.iter_lines():
59
+ if line:
60
+ try:
61
+ line = line.decode('utf-8')
62
+ if line.startswith('data: '):
63
+ json_str = line[6:] # Remove 'data: ' prefix
64
+ if json_str.strip() == '[DONE]':
65
+ break
66
+ try:
67
+ chunk = json.loads(json_str)
68
+ if content := chunk.get('choices', [{}])[0].get('delta', {}).get('content'):
69
+ print(content, end="", flush=True)
70
+ collected_content.append(content)
71
+ except json.JSONDecodeError:
72
+ continue
73
+ except Exception as e:
74
+ print(f"\nError processing chunk: {str(e)}")
75
+ continue
76
+
77
+ print() # New line after response
78
+ full_content = "".join(collected_content)
79
+ if full_content:
80
+ self.add_message("assistant", full_content)
81
+
82
+ except requests.exceptions.ConnectionError:
83
+ print("\nError: Could not connect to the API. Make sure the server is running on localhost:8000")
84
+ except Exception as e:
85
+ print(f"\nUnexpected error: {str(e)}")
86
+
87
+ def run_chat_interface():
88
+ """
89
+ Run an interactive chat interface in the terminal
90
+ """
91
+ print("\nChat Interface for Local API Testing")
92
+ print("=====================================")
93
+ print("Endpoint: http://localhost:8000/v1/chat/completions")
94
+ print("Type 'exit' or 'quit' to end the chat")
95
+ print("Type 'clear' to start a new chat session")
96
+ print("----------------------------------------\n")
97
+
98
+ chat = ChatSession()
99
+
100
+ while True:
101
+ try:
102
+ user_input = input("User: ").strip()
103
+
104
+ if not user_input:
105
+ continue
106
+
107
+ if user_input.lower() in ['exit', 'quit']:
108
+ print("\nGoodbye!")
109
+ break
110
+
111
+ if user_input.lower() == 'clear':
112
+ chat = ChatSession()
113
+ print("\nStarted new chat session")
114
+ continue
115
+
116
+ chat.add_message("user", user_input)
117
+ chat.stream_response()
118
+
119
+ except KeyboardInterrupt:
120
+ print("\n\nGoodbye!")
121
+ break
122
+ except EOFError:
123
+ print("\nGoodbye!")
124
+ break
125
+
126
+ if __name__ == "__main__":
127
+ run_chat_interface()
128
+ ```