arya-ai-model commited on
Commit
102a3b0
·
1 Parent(s): 88267dd

First commit

Browse files
Files changed (9) hide show
  1. Dockerfile +1 -5
  2. README.md +15 -9
  3. app.py +34 -0
  4. app/api.py +0 -14
  5. app/config.py +0 -5
  6. app/model.py +0 -19
  7. gitignore +4 -0
  8. requirements.txt +2 -3
  9. run.sh +0 -2
Dockerfile CHANGED
@@ -1,13 +1,9 @@
1
  FROM python:3.10
2
 
3
  WORKDIR /app
4
-
5
  COPY requirements.txt .
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
8
  COPY . .
9
 
10
- # Make the script executable
11
- RUN chmod +x run.sh
12
-
13
- CMD ["bash", "run.sh"]
 
1
  FROM python:3.10
2
 
3
  WORKDIR /app
 
4
  COPY requirements.txt .
5
  RUN pip install --no-cache-dir -r requirements.txt
6
 
7
  COPY . .
8
 
9
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
README.md CHANGED
@@ -1,10 +1,16 @@
1
- ---
2
- title: Deepseek 7b
3
- emoji: 🐢
4
- colorFrom: red
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepSeek API Server
2
+ This is a FastAPI-based text generation API using the DeepSeek-R1 7B model.
 
 
 
 
 
 
3
 
4
+ ## Installation
5
+ 1. Clone this repository.
6
+ 2. Install dependencies: `pip install -r requirements.txt`
7
+ 3. Run the server: `uvicorn app:app --host 0.0.0.0 --port 7860`
8
+
9
+ ## Usage
10
+ Send a POST request to `/generate` with a JSON payload:
11
+
12
+ ```json
13
+ {
14
+ "prompt": "What is AI?",
15
+ "max_tokens": 100
16
+ }
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
5
+
6
+ # Model setup
7
+ MODEL_NAME = "deepseek-ai/deepseek-llm-7b-base"
8
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ # Load model and tokenizer
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto"
14
+ )
15
+ model.generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
16
+ model.generation_config.pad_token_id = model.generation_config.eos_token_id
17
+
18
+ # FastAPI app
19
+ app = FastAPI()
20
+
21
+ # Request payload
22
+ class TextGenerationRequest(BaseModel):
23
+ prompt: str
24
+ max_tokens: int = 100
25
+
26
+ @app.post("/generate")
27
+ async def generate_text(request: TextGenerationRequest):
28
+ try:
29
+ inputs = tokenizer(request.prompt, return_tensors="pt").to(DEVICE)
30
+ outputs = model.generate(**inputs, max_new_tokens=request.max_tokens)
31
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
+ return {"generated_text": result}
33
+ except Exception as e:
34
+ raise HTTPException(status_code=500, detail=str(e))
app/api.py DELETED
@@ -1,14 +0,0 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
- from app.model import model
4
-
5
- app = FastAPI()
6
-
7
- class PromptRequest(BaseModel):
8
- prompt: str
9
- max_length: int = 256
10
-
11
- @app.post("/generate")
12
- async def generate_text(request: PromptRequest):
13
- result = model.generate(request.prompt, request.max_length)
14
- return {"response": result}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/config.py DELETED
@@ -1,5 +0,0 @@
1
- import os
2
-
3
- MODEL_NAME = "deepseek-ai/deepseek-llm-7b-base"
4
- HF_TOKEN = os.getenv("HF_TOKEN") # Set this in the Hugging Face environment
5
- DEVICE = "cuda" # Change to "cpu" if no GPU
 
 
 
 
 
 
app/model.py DELETED
@@ -1,19 +0,0 @@
1
- import os
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
- from app.config import MODEL_NAME, DEVICE
5
-
6
- class DeepSeekModel:
7
- def __init__(self):
8
- self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=os.getenv("HF_TOKEN"))
9
- self.model = AutoModelForCausalLM.from_pretrained(
10
- MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
11
- )
12
-
13
- def generate(self, prompt, max_length=256):
14
- inputs = self.tokenizer(prompt, return_tensors="pt").to(DEVICE)
15
- with torch.no_grad():
16
- outputs = self.model.generate(**inputs, max_length=max_length)
17
- return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
18
-
19
- model = DeepSeekModel()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.log
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- fastapi
2
- uvicorn
3
  torch
4
  transformers
5
- accelerate
 
 
 
 
1
  torch
2
  transformers
3
+ fastapi
4
+ uvicorn
run.sh DELETED
@@ -1,2 +0,0 @@
1
- #!/bin/bash
2
- uvicorn app.api:app --host 0.0.0.0 --port 8000 --workers 1