|
from fastapi import FastAPI |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
import torch |
|
|
|
MODEL_NAME = "EQUES/TinyDeepSeek-1.5B" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_NAME, |
|
torch_dtype=torch.float16, |
|
device_map="auto", |
|
low_cpu_mem_usage=True |
|
) |
|
|
|
generator = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
app = FastAPI() |
|
|
|
@app.get("/") |
|
def root(): |
|
return {"message": "TinyDeepSeek API is running!"} |
|
|
|
@app.get("/generate") |
|
def generate(prompt: str, max_length: int = 100): |
|
result = generator(prompt, max_length=max_length)[0]['generated_text'] |
|
return {"response": result} |
|
|
|
|
|
|