|
import os |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
model = None |
|
tokenizer = None |
|
device = None |
|
|
|
def init(): |
|
global model, tokenizer, device |
|
model_name_or_path = "0xroyce/NazareAI-Senior-Marketing-Strategist" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name_or_path, |
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 |
|
) |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
model.eval() |
|
|
|
def inference(model_inputs: dict) -> dict: |
|
global model, tokenizer, device |
|
prompt = model_inputs.get("prompt", "") |
|
if not prompt: |
|
return {"error": "No prompt provided."} |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
output_ids = model.generate(**inputs, max_new_tokens=128, do_sample=True, top_p=0.9, temperature=0.7) |
|
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
return {"generated_text": generated_text} |