Echo-ai commited on
Commit
f023e65
·
verified ·
1 Parent(s): 3ddd5b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -16
app.py CHANGED
@@ -2,20 +2,21 @@ import os
2
  import requests
3
  from fastapi import FastAPI, HTTPException
4
  from fastapi.middleware.cors import CORSMiddleware
 
5
  from llama_cpp import Llama
6
  from pydantic import BaseModel
7
  import uvicorn
8
 
9
-
10
  MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
11
  MODEL_NAME = "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
12
  MODEL_DIR = "model"
13
  MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
14
 
15
-
16
  os.makedirs(MODEL_DIR, exist_ok=True)
17
 
18
-
19
  if not os.path.exists(MODEL_PATH):
20
  print(f"Downloading model from {MODEL_URL}...")
21
  response = requests.get(MODEL_URL, stream=True)
@@ -29,8 +30,12 @@ if not os.path.exists(MODEL_PATH):
29
  else:
30
  print("Model already exists. Skipping download.")
31
 
32
-
33
- app = FastAPI(title="DeepSeek-R1 OpenAI-Compatible API")
 
 
 
 
34
 
35
  # CORS Configuration
36
  app.add_middleware(
@@ -40,21 +45,76 @@ app.add_middleware(
40
  allow_headers=["*"],
41
  )
42
 
43
-
44
  print("Loading model...")
45
  try:
46
  llm = Llama(
47
  model_path=MODEL_PATH,
48
- n_ctx=2048,
49
- n_threads=4,
50
- n_gpu_layers=0,
51
  verbose=False
52
  )
53
  print("Model loaded successfully!")
54
  except Exception as e:
55
  raise RuntimeError(f"Failed to load model: {str(e)}")
56
 
57
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  class ChatCompletionRequest(BaseModel):
59
  model: str = "DeepSeek-R1-Distill-Qwen-1.5B"
60
  messages: list[dict]
@@ -63,7 +123,7 @@ class ChatCompletionRequest(BaseModel):
63
  top_p: float = 0.9
64
  stream: bool = False
65
 
66
-
67
  class ChatCompletionResponse(BaseModel):
68
  id: str = "chatcmpl-12345"
69
  object: str = "chat.completion"
@@ -72,14 +132,12 @@ class ChatCompletionResponse(BaseModel):
72
  choices: list[dict]
73
  usage: dict
74
 
75
-
76
  @app.post("/v1/chat/completions")
77
  async def chat_completion(request: ChatCompletionRequest):
78
  try:
79
  prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in request.messages])
80
  prompt += "\nassistant:"
81
 
82
-
83
  response = llm(
84
  prompt=prompt,
85
  max_tokens=request.max_tokens,
@@ -88,7 +146,6 @@ async def chat_completion(request: ChatCompletionRequest):
88
  stop=["</s>"]
89
  )
90
 
91
-
92
  return ChatCompletionResponse(
93
  choices=[{
94
  "index": 0,
@@ -107,11 +164,9 @@ async def chat_completion(request: ChatCompletionRequest):
107
  except Exception as e:
108
  raise HTTPException(status_code=500, detail=str(e))
109
 
110
-
111
  @app.get("/health")
112
  def health_check():
113
  return {"status": "healthy"}
114
 
115
-
116
  if __name__ == "__main__":
117
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
2
  import requests
3
  from fastapi import FastAPI, HTTPException
4
  from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi.responses import HTMLResponse
6
  from llama_cpp import Llama
7
  from pydantic import BaseModel
8
  import uvicorn
9
 
10
+ # Configuration
11
  MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
12
  MODEL_NAME = "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
13
  MODEL_DIR = "model"
14
  MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
15
 
16
+ # Create model directory if it doesn't exist
17
  os.makedirs(MODEL_DIR, exist_ok=True)
18
 
19
+ # Download the model if it doesn't exist
20
  if not os.path.exists(MODEL_PATH):
21
  print(f"Downloading model from {MODEL_URL}...")
22
  response = requests.get(MODEL_URL, stream=True)
 
30
  else:
31
  print("Model already exists. Skipping download.")
32
 
33
+ # Initialize FastAPI
34
+ app = FastAPI(
35
+ title="DeepSeek-R1 OpenAI-Compatible API",
36
+ description="OpenAI-compatible API for DeepSeek-R1-Distill-Qwen-1.5B",
37
+ version="1.0.0"
38
+ )
39
 
40
  # CORS Configuration
41
  app.add_middleware(
 
45
  allow_headers=["*"],
46
  )
47
 
48
+ # Load the model
49
  print("Loading model...")
50
  try:
51
  llm = Llama(
52
  model_path=MODEL_PATH,
53
+ n_ctx=2048,
54
+ n_threads=4,
55
+ n_gpu_layers=0,
56
  verbose=False
57
  )
58
  print("Model loaded successfully!")
59
  except Exception as e:
60
  raise RuntimeError(f"Failed to load model: {str(e)}")
61
 
62
+ # Root endpoint with documentation
63
+ @app.get("/", response_class=HTMLResponse)
64
+ async def root():
65
+ return f"""
66
+ <html>
67
+ <head>
68
+ <title>DeepSeek-R1 OpenAI API</title>
69
+ <style>
70
+ body {{ font-family: Arial, sans-serif; max-width: 800px; margin: 20px auto; padding: 0 20px; }}
71
+ .warning {{ color: #dc3545; background: #ffeef0; padding: 15px; border-radius: 5px; }}
72
+ a {{ color: #007bff; text-decoration: none; }}
73
+ code {{ background: #f8f9fa; padding: 2px 4px; border-radius: 4px; }}
74
+ </style>
75
+ </head>
76
+ <body>
77
+ <h1>DeepSeek-R1 OpenAI-Compatible API</h1>
78
+
79
+ <div class="warning">
80
+ <h3>⚠️ Important Notice</h3>
81
+ <p>For private use, please duplicate this space:<br>
82
+ 1. Click your profile picture in the top-right<br>
83
+ 2. Select "Duplicate Space"<br>
84
+ 3. Set visibility to Private</p>
85
+ </div>
86
+
87
+ <h2>API Documentation</h2>
88
+ <ul>
89
+ <li><a href="/docs">Interactive Swagger Documentation</a></li>
90
+ <li><a href="/redoc">ReDoc Documentation</a></li>
91
+ </ul>
92
+
93
+ <h2>API Endpoints</h2>
94
+ <h3>Chat Completion</h3>
95
+ <p><code>POST /v1/chat/completions</code></p>
96
+ <p>Parameters:</p>
97
+ <ul>
98
+ <li><strong>messages</strong>: List of message objects</li>
99
+ <li><strong>max_tokens</strong>: Maximum response length (default: 128)</li>
100
+ <li><strong>temperature</strong>: Sampling temperature (default: 0.7)</li>
101
+ <li><strong>top_p</strong>: Nucleus sampling threshold (default: 0.9)</li>
102
+ </ul>
103
+
104
+ <h2>Example Request</h2>
105
+ <pre>
106
+ curl -X POST "{os.environ.get('SPACE_HOST', 'http://localhost:7860')}/v1/chat/completions" \\
107
+ -H "Content-Type: application/json" \\
108
+ -d '{{
109
+ "messages": [{{"role": "user", "content": "Explain quantum computing"}}],
110
+ "max_tokens": 150
111
+ }}'
112
+ </pre>
113
+ </body>
114
+ </html>
115
+ """
116
+
117
+ # OpenAI-Compatible Request Schema
118
  class ChatCompletionRequest(BaseModel):
119
  model: str = "DeepSeek-R1-Distill-Qwen-1.5B"
120
  messages: list[dict]
 
123
  top_p: float = 0.9
124
  stream: bool = False
125
 
126
+ # OpenAI-Compatible Response Schema
127
  class ChatCompletionResponse(BaseModel):
128
  id: str = "chatcmpl-12345"
129
  object: str = "chat.completion"
 
132
  choices: list[dict]
133
  usage: dict
134
 
 
135
  @app.post("/v1/chat/completions")
136
  async def chat_completion(request: ChatCompletionRequest):
137
  try:
138
  prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in request.messages])
139
  prompt += "\nassistant:"
140
 
 
141
  response = llm(
142
  prompt=prompt,
143
  max_tokens=request.max_tokens,
 
146
  stop=["</s>"]
147
  )
148
 
 
149
  return ChatCompletionResponse(
150
  choices=[{
151
  "index": 0,
 
164
  except Exception as e:
165
  raise HTTPException(status_code=500, detail=str(e))
166
 
 
167
  @app.get("/health")
168
  def health_check():
169
  return {"status": "healthy"}
170
 
 
171
  if __name__ == "__main__":
172
  uvicorn.run(app, host="0.0.0.0", port=7860)