File size: 2,522 Bytes
69dedf9 12d6115 63b71c3 f4d2332 63b71c3 b22ad80 63b71c3 f4d2332 63b71c3 f4d2332 b22ad80 f4d2332 63b71c3 f4d2332 63b71c3 f4d2332 63b71c3 f4d2332 63b71c3 f4d2332 b22ad80 63b71c3 b22ad80 f4d2332 63b71c3 f4d2332 63b71c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
os.system("pip install torch transformers accelerate")
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import requests
import json
from accelerate import Accelerator
SYSTEM_PROMPT = "As a generative chatbot (you are not a GPT but your structure is 50% the same), your primary function is to provide helpful and friendly responses to user queries. Feel free to add some personality, but make sure your responses are accurate and helpful. Your owner and developer is: @Costikoooo (Discord user) other developers are unknown. Your name is Chattybot."
TITLE = "Chattybot"
EXAMPLE_INPUT = "hello"
# Use your provided tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-zephyr-3b')
model = AutoModelForCausalLM.from_pretrained(
'stabilityai/stablelm-zephyr-3b',
trust_remote_code=True,
device_map="auto"
)
HF_TOKEN = os.getenv("HF_TOKEN")
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
# Initialize Accelerator
accelerator = Accelerator()
# Wrap model and tokenizer with accelerator
model, tokenizer = accelerator.prepare(model, tokenizer)
def build_input_prompt(message, chatbot, system_prompt):
input_prompt = "\n" + system_prompt + "</s>\n\n"
for interaction in chatbot:
input_prompt = input_prompt + str(interaction[0]) + "</s>\n\n" + str(interaction[1]) + "\n</s>\n\n"
input_prompt = input_prompt + str(message) + "</s>\n"
return input_prompt
def predict_beta(message, chatbot=[], system_prompt=""):
input_prompt = build_input_prompt(message, chatbot, system_prompt)
inputs = tokenizer(input_prompt, return_tensors="pt")
with accelerator.device():
tokens = model.generate(
inputs["input_ids"],
max_length=1024,
temperature=0.8,
do_sample=True
)
bot_message = tokenizer.decode(tokens[0], skip_special_tokens=True)
return bot_message
def test_preview_chatbot(message, history):
response = predict_beta(message, history, SYSTEM_PROMPT)
text_start = response.rfind("") + len("")
response = response[text_start:]
return response
welcome_preview_message = f"""
Welcome to **{TITLE}**! Say something like:
"{EXAMPLE_INPUT}"
"""
chatbot_preview = gr.Chatbot(layout="panel", value=[(None, welcome_preview_message)])
textbox_preview = gr.Textbox(scale=7, container=False, value=EXAMPLE_INPUT)
demo = gr.ChatInterface(test_preview_chatbot, chatbot=chatbot_preview, textbox=textbox_preview)
demo.launch()
|