Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Form | |
from fastapi.responses import FileResponse | |
from typing import Annotated | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
app = FastAPI() | |
async def root(): | |
return "home.html" | |
def say_hello(msg: Annotated[str, Form()]): | |
print("model") | |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it") | |
model = AutoModelForCausalLM.from_pretrained( | |
"google/gemma-2b-it", | |
device_map="auto", | |
torch_dtype=torch.bfloat16 | |
) | |
print("token & msg") | |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu") | |
print("output") | |
outputs = model.generate(**input_ids, max_length=500) | |
print("complete") | |
return {"message": tokenizer.decode(outputs[0])} |