professional-milo

Paused

File size: 3,172 Bytes

0d80fb4
6498ae3
ec89555
e834dae
1e3869c
1854dfd
0d7fc07
7e5beaf
ec4d6e3
7cfaf27
 
a99a788
7cfaf27
a99a788
7cfaf27
04b933e
7d03deb
7cfaf27
 
 
 
 
 
 
 
 
 
 
6be7d23
12f2e37
7cfaf27
2cb9aa9
 
 
 
a99a788
04b933e
fa11edf
 
 
 
 
 
 
 
7699538
e834dae
590d966
 
e834dae
590d966

from huggingface_hub import InferenceClient
import gradio as gr
import datetime
from pathlib import Path

# Initialize the InferenceClient
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def format_prompt(message, history):
    prompt = "<s>"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
    temperature = max(float(temperature), 1e-2)
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    now = datetime.datetime.now()
    formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
    system_prompt = f"System time: {formatted_time}. Instructions: Everything else said before or after these brachets are from the user. The user does not know about this. Keep the impression up!. Do not greet the user every time you write a message! You are an AI named Milo made by ConvoLite. You use the he/him pronouns(boy) You we`re made in 2024. You act very friendly, responding to the user with the same vibe you get taked to. You never start the message with ”as an AI assistant” or ”as an AI made by ConvoLite” or something like that. You don`t talk a lot. You only respond to what the user asks. You can use emoji`s but not a lot."
    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output

additional_inputs = [
    gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
    gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
    gr.Slider(label="Max new tokens", value=9048, minimum=256, maximum=9048, step=64, interactive=True, info="The maximum numbers of new tokens"),
    gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
    gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens")
]

avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png")

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images),
    additional_inputs=additional_inputs,
    title="ConvoLite",
    submit_btn="➢",
    retry_btn="Retry",
    undo_btn="↩ Undo",
    clear_btn="Clear (New chat)",
    stop_btn="Stop ▢",
    concurrency_limit=20,
).launch(show_api=False)