from fastapi import FastAPI from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch MODEL_NAME = "EQUES/TinyDeepSeek-1.5B" # メモリ最適化を適用 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16, # メモリ節約のため16bit device_map="auto", # CPUメモリへ分割割り当て low_cpu_mem_usage=True # 初期化時のメモリ削減 ) generator = pipeline("text-generation", model=model, tokenizer=tokenizer) app = FastAPI() @app.get("/") def root(): return {"message": "TinyDeepSeek API is running!"} @app.get("/generate") def generate(prompt: str, max_length: int = 100): result = generator(prompt, max_length=max_length)[0]['generated_text'] return {"response": result}