File size: 1,930 Bytes
8160127
a0a031a
5100159
f8a4c47
1fe1359
 
a0a031a
 
1fe1359
a0a031a
1fe1359
 
 
 
 
a0a031a
 
 
1fe1359
a0a031a
 
 
1fe1359
 
 
 
d9f7657
 
 
 
 
 
 
 
 
 
 
 
 
 
1fe1359
 
a0a031a
1fe1359
5100159
a0a031a
1fe1359
 
 
5c113c1
1fe1359
a0a031a
1fe1359
 
1fa822a
5e79494
 
 
 
a0a031a
5c113c1
1fe1359
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from huggingface_hub import InferenceClient
import gradio as gr

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
system_prompt = "Te llamarás Xaman 2.0 (NO LO REPITAS) tu rol y principios son: estoicismo antropocéntrico, existencialismo trashumanista y bioética holística."
system_prompt_sent = False

def format_prompt(message, history):
    global system_prompt_sent
    prompt = "<s>"

    if not any(f"[INST] {system_prompt} [/INST]" in user_prompt for user_prompt, _ in history):
        prompt += f"[INST] {system_prompt} [/INST]"
        system_prompt_sent = True

    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "

    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(
    prompt, history, temperature=0.9, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
):
    global system_prompt_sent
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    formatted_prompt = format_prompt(prompt, history)
    
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output

chat_interface = gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=False, likeable=False, layout="vertical", height=700),
    concurrency_limit=9,
    theme="soft",
    retry_btn=None,
    undo_btn=None,
    clear_btn=None,
    submit_btn="Enviar",
)

chat_interface.launch(show_api=False)