File size: 5,155 Bytes
739f132 40bd42d d7683a1 739f132 d7683a1 739f132 178e1da e86a410 739f132 1ea6bba 8b46088 739f132 1ea6bba 8b46088 739f132 8b46088 739f132 1ea6bba 739f132 8b46088 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import os
import re
import logging
import gradio as gr
import openai
print(os.environ)
openai.api_base = os.environ.get("OPENAI_API_BASE")
openai.api_key = os.environ.get("OPENAI_API_KEY")
BASE_SYSTEM_MESSAGE = """The following is a conversation between a human and an AI assistant named OpenOrcaChat. OpenOrcaChat is an open-source AI assistant developed by the OpenOrca and OpenChat teams. The team members include Guan Wang "One", Bleys Goodson, "Entropi", Wing Lian "Caseus", Eugene Pentland "neverendingtoast", Austin Cook "AutoMeta", Chanvichet Vong "Nanobit" and "Teknium". """
def make_prediction(prompt, max_tokens=None, temperature=None, top_p=None, top_k=None, repetition_penalty=None):
completion = openai.Completion.create(model="Open-Orca/OpenOrcaxOpenChat-Preview2-13B", prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stream=True)
for chunk in completion:
yield chunk["choices"][0]["text"]
def delay_typer(words, delay=0.8):
tokens = re.findall(r'\s*\S+\s*', words)
for s in tokens:
yield s
sleep(delay)
def clear_chat(chat_history_state, chat_message):
chat_history_state = []
chat_message = ''
return chat_history_state, chat_message
def user(message, history):
history = history or []
# Append the user's message to the conversation history
history.append([message, ""])
return "", history
def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
history = history or []
messages = BASE_SYSTEM_MESSAGE + system_message.strip() + "\n" + \
"\n".join(["\n".join(["User: "+item[0]+"<|end_of_turn|>", "Assistant: "+item[1]+"<|end_of_turn|>"])
for item in history])
# strip the last `<|end_of_turn|>` from the messages
messages = messages.rstrip("<|end_of_turn|>")
# remove last space from assistant, some models output a ZWSP if you leave a space
messages = messages.rstrip()
prediction = make_prediction(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
)
for tokens in prediction:
tokens = re.findall(r'(.*?)(\s|$)', tokens)
for subtoken in tokens:
subtoken = "".join(subtoken)
answer = subtoken
history[-1][1] += answer
# stream the response
yield history, history, ""
start_message = ""
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown(f"""
## This demo is an unquantized GPU chatbot of [OpenOrcaxOpenChat-Preview2-13B](https://huggingface.co/Open-Orca/OpenOrcaxOpenChat-Preview2-13B)
Brought to you by your friends at Alignment Lab AI, OpenChat, and Open Access AI Collective!
""")
with gr.Tab("Chatbot"):
gr.Markdown("# π OpenOrca x OpenChat - Preview2 - 13B Playground Space! π")
chatbot = gr.Chatbot().style(height=500)
with gr.Row():
message = gr.Textbox(
label="What do you want to chat about?",
placeholder="Ask me anything.",
lines=3,
)
with gr.Row():
submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
stop = gr.Button(value="Stop", variant="secondary").style(full_width=False)
with gr.Row():
with gr.Column():
max_tokens = gr.Slider(20, 1000, label="Max Tokens", step=20, value=500)
temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=0.8)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95)
top_k = gr.Slider(0, 100, label="Top K", step=1, value=40)
repetition_penalty = gr.Slider(0.0, 2.0, label="Repetition Penalty", step=0.1, value=1.1)
system_msg = gr.Textbox(
start_message, label="System Message", interactive=True, visible=True, placeholder="System prompt. Provide instructions which you want the model to remember.", lines=5)
chat_history_state = gr.State()
clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message], queue=False)
clear.click(lambda: None, None, chatbot, queue=False)
submit_click_event = submit.click(
fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
).then(
fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[chatbot, chat_history_state, message], queue=True
)
stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event], queue=False)
demo.queue(max_size=128, concurrency_count=48).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|