Spaces:

Echo-AI-official
/

Deepseek-R1-1.5b-API

Sleeping

App Files Files Community

Echo-ai commited on Feb 3

Commit

3ddd5b6

verified ·

1 Parent(s): 3ba6e2c

Create app.py

Browse files

Files changed (1) hide show

app.py +117 -0

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+import requests
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from llama_cpp import Llama
+from pydantic import BaseModel
+import uvicorn
+MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
+MODEL_NAME = "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
+MODEL_DIR = "model"
+MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
+os.makedirs(MODEL_DIR, exist_ok=True)
+if not os.path.exists(MODEL_PATH):
+    print(f"Downloading model from {MODEL_URL}...")
+    response = requests.get(MODEL_URL, stream=True)
+    if response.status_code == 200:
+        with open(MODEL_PATH, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print("Model downloaded successfully!")
+    else:
+        raise RuntimeError(f"Failed to download model: HTTP {response.status_code}")
+else:
+    print("Model already exists. Skipping download.")
+app = FastAPI(title="DeepSeek-R1 OpenAI-Compatible API")
+# CORS Configuration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+print("Loading model...")
+try:
+    llm = Llama(
+        model_path=MODEL_PATH,
+        n_ctx=2048,
+        n_threads=4,
+        n_gpu_layers=0,
+        verbose=False
+    )
+    print("Model loaded successfully!")
+except Exception as e:
+    raise RuntimeError(f"Failed to load model: {str(e)}")
+class ChatCompletionRequest(BaseModel):
+    model: str = "DeepSeek-R1-Distill-Qwen-1.5B"
+    messages: list[dict]
+    max_tokens: int = 128
+    temperature: float = 0.7
+    top_p: float = 0.9
+    stream: bool = False
+class ChatCompletionResponse(BaseModel):
+    id: str = "chatcmpl-12345"
+    object: str = "chat.completion"
+    created: int = 1693161600
+    model: str = "DeepSeek-R1-Distill-Qwen-1.5B"
+    choices: list[dict]
+    usage: dict
+@app.post("/v1/chat/completions")
+async def chat_completion(request: ChatCompletionRequest):
+    try:
+        prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in request.messages])
+        prompt += "\nassistant:"
+        response = llm(
+            prompt=prompt,
+            max_tokens=request.max_tokens,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            stop=["</s>"]
+        )
+        return ChatCompletionResponse(
+            choices=[{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": response['choices'][0]['text'].strip()
+                },
+                "finish_reason": "stop"
+            }],
+            usage={
+                "prompt_tokens": len(prompt),
+                "completion_tokens": len(response['choices'][0]['text']),
+                "total_tokens": len(prompt) + len(response['choices'][0]['text'])
+            }
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+def health_check():
+    return {"status": "healthy"}
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)