0xroyce's picture
Update handler.py
8773892 verified
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model = None
tokenizer = None
device = None
def init():
global model, tokenizer, device
model_name_or_path = "0xroyce/NazareAI-Senior-Marketing-Strategist"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
def inference(model_inputs: dict) -> dict:
global model, tokenizer, device
prompt = model_inputs.get("prompt", "")
if not prompt:
return {"error": "No prompt provided."}
inputs = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(**inputs, max_new_tokens=128, do_sample=True, top_p=0.9, temperature=0.7)
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return {"generated_text": generated_text}