File size: 3,143 Bytes
86d9217
 
 
 
 
 
cda6239
 
a7a95f3
86d9217
cda6239
 
 
86d9217
 
 
 
 
 
 
 
cda6239
86d9217
cda6239
 
 
 
86d9217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c91ea3
cda6239
a7a95f3
cda6239
 
 
 
1c91ea3
 
cda6239
1c91ea3
 
cda6239
 
 
 
 
 
1c91ea3
cda6239
1c91ea3
 
cda6239
 
 
 
1c91ea3
 
cda6239
 
86d9217
 
 
 
 
1c91ea3
86d9217
 
 
 
 
 
 
 
 
 
 
1c91ea3
86d9217
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

# Default client with the first model
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")

# Function to switch between models based on selection
def switch_client(model_name: str):
    return InferenceClient(model_name)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    model_name  # Add this parameter for model selection
):
    # Switch client based on model selection
    global client
    client = switch_client(model_name)
    
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

# Model names and their pseudonyms
model_choices = [
    ("mistralai/Mistral-7B-Instruct-v0.3", "Lake [Test]"), 
    ("google/mt5-base", "Lake 1 Base"),
    ("google/mt5-large", "Lake 1 Advanced")
]

# Convert pseudonyms to model names for the dropdown
pseudonyms = [model[1] for model in model_choices]

# Function to handle model selection and pseudonyms
def respond_with_pseudonym(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    selected_pseudonym
):
    # Find the actual model name from the pseudonym
    model_name = next(model[0] for model in model_choices if model[1] == selected_pseudonym)
    
    # Call the existing respond function
    response = list(respond(message, history, system_message, max_tokens, temperature, top_p, model_name))
    
    # Add pseudonym at the end of the response
    response[-1] += f"\n\n[Response generated by: {selected_pseudonym}]"
    
    return response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond_with_pseudonym,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
        gr.Dropdown(pseudonyms, label="Select Model", value=pseudonyms[0])  # Pseudonym selection dropdown
    ],
)

if __name__ == "__main__":
    demo.launch()