Spaces:
Paused
Paused
File size: 6,312 Bytes
cdf298d d9aa2f6 cdf298d 0d58b13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
device = "cuda" if torch.cuda.is_available() else "cpu"
def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
formatted_text = ""
for message in messages:
if message["role"] == "system":
formatted_text += "<|system|>\n" + message["content"] + "\n"
elif message["role"] == "user":
formatted_text += "<|user|>\n" + message["content"] + "\n"
elif message["role"] == "assistant":
formatted_text += "<|assistant|>\n" + message["content"].strip() + eos + "\n"
else:
raise ValueError(
"Tulu chat template only supports 'system', 'user' and 'assistant' roles. Invalid role: {}.".format(
message["role"]
)
)
formatted_text += "<|assistant|>\n"
formatted_text = bos + formatted_text if add_bos else formatted_text
return formatted_text
def inference(input_prompts, model, tokenizer):
input_prompts = [
create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False)
for input_prompt in input_prompts
]
encodings = tokenizer(input_prompts, padding=True, return_tensors="pt")
encodings = encodings.to(device)
with torch.inference_mode():
outputs = model.generate(encodings.input_ids, do_sample=False, max_new_tokens=250)
output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True)
input_prompts = [
tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts
]
output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)]
return output_texts
model_name = "ai4bharat/Airavata"
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
input_prompts = [
"मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं।",
"मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं और उनका वर्णन करें।",
]
outputs = inference(input_prompts, model, tokenizer)
print(outputs)
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM
# tokenizer = AutoTokenizer.from_pretrained("ai4bharat/Airavata")
# model = AutoModelForCausalLM.from_pretrained("ai4bharat/Airavata")
# def generate_response(prompt):
# input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=50)
# output_ids = model.generate(input_ids, max_length=100, num_beams=5, no_repeat_ngram_size=2)
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# return response
# iface = gr.Interface(
# fn=generate_response,
# inputs="text",
# outputs="text",
# live=True,
# title="Airavata LLMs Chatbot",
# description="Ask me anything, and I'll generate a response!",
# theme="light",
# )
# iface.launch()
# import gradio as gr
# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM
# device = "cuda" if torch.cuda.is_available() else "cpu"
# def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
# formatted_text = ""
# for message in messages:
# if message["role"] == "system":
# formatted_text += "\n" + message["content"] + "\n"
# elif message["role"] == "user":
# formatted_text += "\n" + message["content"] + "\n"
# elif message["role"] == "assistant":
# formatted_text += "\n" + message["content"].strip() + eos + "\n"
# else:
# raise ValueError(
# "Tulu chat template only supports 'system', 'user', and 'assistant' roles. Invalid role: {}.".format(
# message["role"]
# )
# )
# formatted_text += "\n"
# formatted_text = bos + formatted_text if add_bos else formatted_text
# return formatted_text
# def inference(input_prompts, model, tokenizer):
# input_prompts = [
# create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False)
# for input_prompt in input_prompts
# ]
# encodings = tokenizer(input_prompts, padding=True, return_tensors="pt")
# encodings = encodings.to(device)
# with torch.no_grad():
# outputs = model.generate(encodings.input_ids, do_sample=False, max_length=250)
# output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True)
# input_prompts = [
# tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts
# ]
# output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)]
# return output_texts
# model_name = "ai4bharat/Airavata"
# tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
# tokenizer.pad_token = tokenizer.eos_token
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
# examples = [
# ["मुझे अपने करियर के बारे में सुझाव दो", "मैं कैसे अध्ययन कर सकता हूँ?"],
# ["कृपया मुझे एक कहानी सुनाएं", "ताजमहल के बारे में कुछ बताएं"],
# ["मेरा नाम क्या है?", "आपका पसंदीदा फिल्म कौन सी है?"],
# ]
# iface = gr.Chat(
# model_fn=lambda input_prompts: inference(input_prompts, model, tokenizer),
# inputs=["text"],
# outputs="text",
# examples=examples,
# title="Airavata Chatbot",
# theme="light", # Optional: Set a light theme
# )
# iface.launch()
|