File size: 7,361 Bytes
038f313
 
4c18bfc
038f313
880ced6
 
038f313
 
4c18bfc
038f313
 
 
 
 
 
 
 
 
880ced6
69b4a5f
 
038f313
 
 
3a64d68
880ced6
038f313
5b1509d
69b4a5f
a430d0d
038f313
 
69b4a5f
880ced6
69b4a5f
 
5b1509d
 
69b4a5f
5b1509d
 
038f313
69b4a5f
 
 
 
880ced6
4c18bfc
69b4a5f
038f313
5b1509d
 
 
 
 
 
038f313
69b4a5f
038f313
 
69b4a5f
038f313
69b4a5f
880ced6
69b4a5f
5b1509d
880ced6
038f313
880ced6
038f313
 
542c2ac
 
038f313
 
5b1509d
880ced6
5b1509d
038f313
 
 
3a64d68
69b4a5f
4c18bfc
880ced6
542c2ac
69b4a5f
 
 
 
 
 
 
 
 
880ced6
542c2ac
69b4a5f
 
 
 
880ced6
 
 
69b4a5f
880ced6
 
 
 
 
69b4a5f
880ced6
 
69b4a5f
 
 
 
 
 
880ced6
69b4a5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import gradio as gr
from openai import OpenAI
import os

# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

# Initialize the OpenAI client with the Hugging Face Inference API endpoint
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    model,
    custom_model,
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed
):
    """
    This function handles the chatbot response.
    """
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"Model: {model}")
    print(f"Custom model: {custom_model}")
    print(f"System message: {system_message}")
    print(f"Parameters - Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")

    # Convert seed to None if -1
    if seed == -1:
        seed = None

    # Set the model based on selection or custom input
    selected_model = custom_model.strip() if custom_model.strip() != "" else model

    # Construct messages array
    messages = [{"role": "system", "content": system_message}]

    # Add conversation history
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            messages.append({"role": "user", "content": user_part})
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})

    # Append latest message
    messages.append({"role": "user", "content": message})

    # Start with empty response
    response = ""
    print("Sending request to API.")

    # Make the streaming request
    for message_chunk in client.chat.completions.create(
        model=selected_model,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        seed=seed,
        messages=messages,
    ):
        token_text = message_chunk.choices[0].delta.content
        print(f"Received token: {token_text}")
        response += token_text
        yield response

    print("Completed response generation.")

# Create Chatbot component
chatbot = gr.Chatbot(height=600)
print("Chatbot interface created.")

# Define available models
models_list = [
    "meta-llama/Llama-2-70b-chat-hf",
    "meta-llama/Llama-2-13b-chat-hf",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "mistralai/Mistral-7B-Instruct-v0.2",
    "HuggingFaceH4/zephyr-7b-beta",
]

# Create the Gradio interface with tabs
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
    with gr.Tab("Chat"):
        with gr.Row():
            with gr.Column():
                # Model selection accordion
                with gr.Accordion("Featured Models", open=True):
                    model_search = gr.Textbox(
                        label="Filter Models",
                        placeholder="Search for a model...",
                        lines=1
                    )
                    model = gr.Radio(
                        label="Select a model",
                        choices=models_list,
                        value="meta-llama/Llama-2-70b-chat-hf"
                    )
                
                # Custom model input
                custom_model = gr.Textbox(
                    label="Custom Model",
                    info="Enter Hugging Face model path (optional)",
                    placeholder="organization/model-name"
                )

                # System message and parameters
                system_message = gr.Textbox(label="System message")
                max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
                temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
                frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
                seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")

    with gr.Tab("Information"):
        with gr.Accordion("Featured Models", open=False):
            gr.HTML("""
            <p><a href="https://huggingface.co/models?pipeline_tag=text-generation&sort=trending">See all available models</a></p>
            <table style="width:100%; text-align:center; margin:auto;">
                <tr>
                    <th>Model Name</th>
                    <th>Parameters</th>
                    <th>Notes</th>
                </tr>
                <tr>
                    <td>Llama-2-70b-chat</td>
                    <td>70B</td>
                    <td>Meta's largest chat model</td>
                </tr>
                <tr>
                    <td>Mixtral-8x7B</td>
                    <td>47B</td>
                    <td>Mixture of Experts architecture</td>
                </tr>
                <tr>
                    <td>Mistral-7B</td>
                    <td>7B</td>
                    <td>Efficient base model</td>
                </tr>
            </table>
            """)

        with gr.Accordion("Parameters Overview", open=False):
            gr.Markdown("""
            ## System Message
            The system message sets the context and behavior for the AI assistant. It's like giving it a role or specific instructions.

            ## Max New Tokens
            Controls the maximum length of the generated response. Higher values allow for longer responses but take more time.

            ## Temperature
            Controls randomness in the response:
            - Lower (0.1-0.5): More focused and deterministic
            - Higher (0.7-1.0): More creative and varied

            ## Top-P
            Nucleus sampling parameter:
            - Lower values: More focused on likely tokens
            - Higher values: More diverse vocabulary usage

            ## Frequency Penalty
            Discourages repetition:
            - Negative: May allow more repetition
            - Positive: Encourages more diverse word choice

            ## Seed
            Controls randomness initialization:
            - -1: Random seed each time
            - Fixed value: Reproducible outputs
            """)

    # Function to filter models based on search
    def filter_models(search_term):
        filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
        return gr.update(choices=filtered_models)

    # Connect the search box to the model filter function
    model_search.change(filter_models, inputs=model_search, outputs=model)

    # Create the chat interface
    chat_interface = gr.ChatInterface(
        respond,
        additional_inputs=[
            model,
            custom_model,
            system_message,
            max_tokens,
            temperature,
            top_p,
            frequency_penalty,
            seed,
        ],
        chatbot=chatbot,
    )

print("Gradio interface initialized.")

if __name__ == "__main__":
    print("Launching the demo application.")
    demo.launch(show_api=False, share=False)