Spaces:
Sleeping
Sleeping
from huggingface_hub import InferenceClient | |
import gradio as gr | |
import datetime | |
from pathlib import Path | |
# Initialize the InferenceClient | |
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") | |
def format_prompt(message, history): | |
prompt = "<s>" | |
for user_prompt, bot_response in history: | |
prompt += f"[INST] {user_prompt} [/INST]" | |
prompt += f" {bot_response}</s> " | |
prompt += f"[INST] {message} [/INST]" | |
return prompt | |
def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0): | |
temperature = max(float(temperature), 1e-2) | |
top_p = float(top_p) | |
generate_kwargs = dict( | |
temperature=temperature, | |
max_new_tokens=max_new_tokens, | |
top_p=top_p, | |
repetition_penalty=repetition_penalty, | |
do_sample=True, | |
seed=42, | |
) | |
now = datetime.datetime.now() | |
formatted_time = now.strftime("%H:%M:%S, %B %d, %Y") | |
system_prompt = f"System time: {formatted_time}. Instructions: Everything else said before or after these brachets are from the user. The user does not know about this. Keep the impression up!. Do not greet the user every time you write a message! You are an AI named Milo made by ConvoLite. You use the he/him pronouns(boy) You we`re made in 2024. You act very friendly, responding to the user with the same vibe you get taked to. You never start the message with ”as an AI assistant” or ”as an AI made by ConvoLite” or something like that. You don`t talk a lot. You only respond to what the user asks. You can use emoji`s but not a lot." | |
formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history) | |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
output = "" | |
for response in stream: | |
output += response.token.text | |
yield output | |
additional_inputs = [ | |
gr.Textbox(label="System Prompt", max_lines=1, interactive=True), | |
gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"), | |
gr.Slider(label="Max new tokens", value=9048, minimum=256, maximum=9048, step=64, interactive=True, info="The maximum numbers of new tokens"), | |
gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"), | |
gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens") | |
] | |
avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png") | |
gr.ChatInterface( | |
fn=generate, | |
chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images), | |
additional_inputs=additional_inputs, | |
title="ConvoLite", | |
submit_btn="➢", | |
retry_btn="Retry", | |
undo_btn="↩ Undo", | |
clear_btn="Clear (New chat)", | |
stop_btn="Stop ▢", | |
concurrency_limit=20, | |
).launch(show_api=False) |