arya-ai-model commited on
Commit
d6f9a33
·
1 Parent(s): ad07cce

First commit

Browse files
Files changed (6) hide show
  1. Dockerfile +13 -0
  2. app/api.py +14 -0
  3. app/config.py +5 -0
  4. app/model.py +18 -0
  5. app/requirements.txt +5 -0
  6. run.sh +2 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ # Make the script executable
11
+ RUN chmod +x run.sh
12
+
13
+ CMD ["bash", "run.sh"]
app/api.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from app.model import model
4
+
5
+ app = FastAPI()
6
+
7
+ class PromptRequest(BaseModel):
8
+ prompt: str
9
+ max_length: int = 256
10
+
11
+ @app.post("/generate")
12
+ async def generate_text(request: PromptRequest):
13
+ result = model.generate(request.prompt, request.max_length)
14
+ return {"response": result}
app/config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+
3
+ MODEL_NAME = "deepseek-ai/deepseek-llm-r1-7b"
4
+ HF_TOKEN = os.getenv("HF_TOKEN") # Set this in the Hugging Face environment
5
+ DEVICE = "cuda" # Change to "cpu" if no GPU
app/model.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+ from app.config import MODEL_NAME, DEVICE
4
+
5
+ class DeepSeekModel:
6
+ def __init__(self):
7
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=os.getenv("HF_TOKEN"))
8
+ self.model = AutoModelForCausalLM.from_pretrained(
9
+ MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
10
+ )
11
+
12
+ def generate(self, prompt, max_length=256):
13
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(DEVICE)
14
+ with torch.no_grad():
15
+ outputs = self.model.generate(**inputs, max_length=max_length)
16
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
17
+
18
+ model = DeepSeekModel()
app/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch
4
+ transformers
5
+ accelerate
run.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ uvicorn app.api:app --host 0.0.0.0 --port 8000 --workers 1