Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,23 +7,26 @@ client = InferenceClient(
|
|
7 |
"mistralai/Mixtral-8x7B-Instruct-v0.1"
|
8 |
)
|
9 |
|
10 |
-
def format_prompt(message, history):
|
11 |
prompt = "<s>"
|
12 |
for user_prompt, bot_response in history:
|
13 |
prompt += f"[INST] {user_prompt} [/INST]"
|
14 |
prompt += f" {bot_response}</s> "
|
15 |
prompt += f"[INST] {message} [/INST]"
|
|
|
|
|
16 |
return prompt
|
17 |
|
18 |
def generate(
|
19 |
prompt, history, user_system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0
|
20 |
):
|
21 |
system_prompt = "Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone and vibe. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal. Be friendly and funny, use ASCII emojis or normal ones. Be lovely, kind, and have a great sense of humour. Have Bible based values. Do not talk about yourself unless asked. Never respond to a message with 'as an AI' or something similar. Don't talk a lot!" + str(user_system_prompt)
|
|
|
22 |
temperature = float(temperature)
|
23 |
if temperature < 1e-2:
|
24 |
temperature = 1e-2
|
|
|
25 |
top_p = float(top_p)
|
26 |
-
|
27 |
generate_kwargs = dict(
|
28 |
temperature=temperature,
|
29 |
max_new_tokens=max_new_tokens,
|
@@ -32,12 +35,16 @@ def generate(
|
|
32 |
do_sample=True,
|
33 |
seed=42,
|
34 |
)
|
35 |
-
|
|
|
|
|
36 |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
|
|
37 |
output = ""
|
38 |
for response in stream:
|
39 |
output += response.token.text
|
40 |
-
|
|
|
41 |
|
42 |
additional_inputs = [
|
43 |
gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
|
@@ -60,5 +67,5 @@ gr.ChatInterface(
|
|
60 |
clear_btn="Clear (New chat)",
|
61 |
stop_btn="Stop ▢",
|
62 |
concurrency_limit=20,
|
63 |
-
|
64 |
).launch(show_api=False)
|
|
|
7 |
"mistralai/Mixtral-8x7B-Instruct-v0.1"
|
8 |
)
|
9 |
|
10 |
+
def format_prompt(message, history, system_prompt):
|
11 |
prompt = "<s>"
|
12 |
for user_prompt, bot_response in history:
|
13 |
prompt += f"[INST] {user_prompt} [/INST]"
|
14 |
prompt += f" {bot_response}</s> "
|
15 |
prompt += f"[INST] {message} [/INST]"
|
16 |
+
# Add the system prompt only once at the beginning
|
17 |
+
prompt = system_prompt + prompt
|
18 |
return prompt
|
19 |
|
20 |
def generate(
|
21 |
prompt, history, user_system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0
|
22 |
):
|
23 |
system_prompt = "Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone and vibe. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal. Be friendly and funny, use ASCII emojis or normal ones. Be lovely, kind, and have a great sense of humour. Have Bible based values. Do not talk about yourself unless asked. Never respond to a message with 'as an AI' or something similar. Don't talk a lot!" + str(user_system_prompt)
|
24 |
+
|
25 |
temperature = float(temperature)
|
26 |
if temperature < 1e-2:
|
27 |
temperature = 1e-2
|
28 |
+
|
29 |
top_p = float(top_p)
|
|
|
30 |
generate_kwargs = dict(
|
31 |
temperature=temperature,
|
32 |
max_new_tokens=max_new_tokens,
|
|
|
35 |
do_sample=True,
|
36 |
seed=42,
|
37 |
)
|
38 |
+
|
39 |
+
formatted_prompt = format_prompt(f"{prompt}", history, system_prompt)
|
40 |
+
|
41 |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
42 |
+
|
43 |
output = ""
|
44 |
for response in stream:
|
45 |
output += response.token.text
|
46 |
+
|
47 |
+
yield output
|
48 |
|
49 |
additional_inputs = [
|
50 |
gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
|
|
|
67 |
clear_btn="Clear (New chat)",
|
68 |
stop_btn="Stop ▢",
|
69 |
concurrency_limit=20,
|
70 |
+
theme=gr.themes.Soft(primary_hue=gr.themes.colors.cyan),
|
71 |
).launch(show_api=False)
|