Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Form | |
| from fastapi.responses import FileResponse | |
| from typing import Annotated | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| app = FastAPI() | |
| async def root(): | |
| return "home.html" | |
| async def say_hello(msg: Annotated[str, Form()]): | |
| print("model") | |
| tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "google/gemma-2b-it", | |
| device_map="auto", | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| print("token & msg") | |
| input_ids = tokenizer(msg, return_tensors="pt").to("cpu") | |
| print("output") | |
| outputs = model.generate(**input_ids, max_length=500) | |
| print("complete") | |
| return {"message": tokenizer.decode(outputs[0])} |