File size: 10,607 Bytes
038f313
fab24df
c5a20a4
038f313
880ced6
 
e13eb1b
038f313
a8fc89d
038f313
 
 
 
e13eb1b
038f313
 
27c8b8d
 
 
038f313
 
 
3a64d68
98674ca
c5a20a4
038f313
e13eb1b
7255410
 
 
 
 
 
 
 
 
be3f346
e13eb1b
7255410
27c8b8d
 
 
 
 
be3f346
f7c4208
c5a20a4
52ad57a
 
038f313
a8fc89d
c5a20a4
d6c98d8
27c8b8d
c5a20a4
27c8b8d
d6c98d8
a8fc89d
27c8b8d
a8fc89d
27c8b8d
 
a8fc89d
27c8b8d
 
c5a20a4
27c8b8d
d6c98d8
27c8b8d
d6c98d8
c5a20a4
77298b9
 
a8fc89d
 
 
27c8b8d
a8fc89d
27c8b8d
a8fc89d
 
 
 
 
 
 
 
27c8b8d
 
 
 
a8fc89d
c5a20a4
a8fc89d
542c2ac
e13eb1b
f7c4208
a8fc89d
 
 
be3f346
a8fc89d
ad258d3
a8fc89d
c5a20a4
a8fc89d
 
be3f346
a8fc89d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff59b6f
 
 
a8fc89d
ff59b6f
 
 
 
a8fc89d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be3f346
769901b
77298b9
27c8b8d
77298b9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import gradio as gr
from openai import OpenAI
import os

# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

# Initialize the OpenAI client with the Hugging Face Inference API endpoint
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model
):
    """
    This function handles the chatbot response. It takes in:
    - message: the user's new message
    - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
    - system_message: the system prompt
    - max_tokens: the maximum number of tokens to generate in the response
    - temperature: sampling temperature
    - top_p: top-p (nucleus) sampling
    - frequency_penalty: penalize repeated tokens in the output
    - seed: a fixed seed for reproducibility; -1 will mean 'random'
    - custom_model: the final model name in use, which may be set by selecting from the Featured Models radio or by typing a custom model
    """

    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
    print(f"Selected model (custom_model): {custom_model}")

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    # Construct the messages array required by the API
    messages = [{"role": "system", "content": system_message}]
    print("Initial messages array constructed.")

    # Add conversation history to the context
    for val in history:
        user_part = val[0]  # Extract user message from the tuple
        assistant_part = val[1]  # Extract assistant message from the tuple
        if user_part:
            messages.append({"role": "user", "content": user_part})  # Append user message
            print(f"Added user message to context: {user_part}")
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})  # Append assistant message
            print(f"Added assistant message to context: {assistant_part}")

    # Append the latest user message
    messages.append({"role": "user", "content": message})
    print("Latest user message appended.")

    # If user provided a model, use that; otherwise, fall back to a default model
    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
    print(f"Model selected for inference: {model_to_use}")

    # Start with an empty string to build the response as tokens stream in
    response = ""
    print("Sending request to OpenAI API.")

    # Make the streaming request to the HF Inference API via openai-like client
    for message_chunk in client.chat.completions.create(
        model=model_to_use,              # Use either the user-provided or default model
        max_tokens=max_tokens,           # Maximum tokens for the response
        stream=True,                     # Enable streaming responses
        temperature=temperature,         # Adjust randomness in response
        top_p=top_p,                     # Control diversity in response generation
        frequency_penalty=frequency_penalty,  # Penalize repeated phrases
        seed=seed,                       # Set random seed for reproducibility
        messages=messages,               # Contextual conversation messages
    ):
        # Extract the token text from the response chunk
        token_text = message_chunk.choices[0].delta.content
        print(f"Received token: {token_text}")
        response += token_text
        # Yield the partial response to Gradio so it can display in real-time
        yield response

    print("Completed response generation.")

# -------------------------
# GRADIO UI CONFIGURATION
# -------------------------

# Create a Chatbot component with a specified height
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")  # Define the height of the chatbot interface
print("Chatbot interface created.")

# Create textboxes and sliders for system prompt, tokens, and other parameters
system_message_box = gr.Textbox(value="", label="System message")  # Input box for system message

max_tokens_slider = gr.Slider(
    minimum=1,   # Minimum allowable tokens
    maximum=4096,  # Maximum allowable tokens
    value=512,   # Default value
    step=1,      # Increment step size
    label="Max new tokens"  # Slider label
)
temperature_slider = gr.Slider(
    minimum=0.1,  # Minimum temperature
    maximum=4.0,  # Maximum temperature
    value=0.7,    # Default value
    step=0.1,     # Increment step size
    label="Temperature"  # Slider label
)
top_p_slider = gr.Slider(
    minimum=0.1,  # Minimum top-p value
    maximum=1.0,  # Maximum top-p value
    value=0.95,   # Default value
    step=0.05,    # Increment step size
    label="Top-P"  # Slider label
)
frequency_penalty_slider = gr.Slider(
    minimum=-2.0,  # Minimum penalty
    maximum=2.0,   # Maximum penalty
    value=0.0,     # Default value
    step=0.1,      # Increment step size
    label="Frequency Penalty"  # Slider label
)
seed_slider = gr.Slider(
    minimum=-1,    # -1 for random seed
    maximum=65535, # Maximum seed value
    value=-1,      # Default value
    step=1,        # Increment step size
    label="Seed (-1 for random)"  # Slider label
)

# The custom_model_box is what the respond function sees as "custom_model"
custom_model_box = gr.Textbox(
    value="",  # Default value
    label="Custom Model",  # Label for the textbox
    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."  # Additional info
)

# Define a function that updates the custom model box when a featured model is selected
def set_custom_model_from_radio(selected):
    """
    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
    We will update the Custom Model text box with that selection automatically.
    """
    print(f"Featured model selected: {selected}")  # Log selected model
    return selected

# Create the main ChatInterface object
demo = gr.ChatInterface(
    fn=respond,  # The function to handle responses
    additional_inputs=[
        system_message_box,          # System message input
        max_tokens_slider,           # Max tokens slider
        temperature_slider,          # Temperature slider
        top_p_slider,                # Top-P slider
        frequency_penalty_slider,    # Frequency penalty slider
        seed_slider,                 # Seed slider
        custom_model_box             # Custom model input
    ],
    fill_height=True,  # Allow the chatbot to fill the container height
    chatbot=chatbot,   # Chatbot UI component
    textbox=gr.MultimodalTextbox(),
    multimodal=True,
    concurrency_limit=20,
    theme="Nymbo/Nymbo_Theme",  # Theme for the interface
    examples=[{"text": "Howdy, partner!",},
              {"text": "What's your model name and who trained you?",},
              {"text": "How many R's are there in the word Strawberry?"},],
    cache_examples=False
)

print("ChatInterface object created.")

# -----------
# ADDING THE "FEATURED MODELS" ACCORDION
# -----------
with demo:
    with gr.Accordion("Featured Models", open=False):  # Collapsible section for featured models
        model_search_box = gr.Textbox(
            label="Filter Models",  # Label for the search box
            placeholder="Search for a featured model...",  # Placeholder text
            lines=1  # Single-line input
        )
        print("Model search box created.")

        # Sample list of popular text models
        models_list = [
            "meta-llama/Llama-3.3-70B-Instruct",
            "meta-llama/Llama-3.2-3B-Instruct",
            "meta-llama/Llama-3.2-1B-Instruct",
            "meta-llama/Llama-3.1-8B-Instruct",
            "NousResearch/Hermes-3-Llama-3.1-8B",
            "google/gemma-2-27b-it",
            "google/gemma-2-9b-it",
            "google/gemma-2-2b-it",
            "mistralai/Mistral-Nemo-Instruct-2407",
            "mistralai/Mixtral-8x7B-Instruct-v0.1",
            "mistralai/Mistral-7B-Instruct-v0.3",
            "Qwen/Qwen2.5-72B-Instruct",
            "Qwen/QwQ-32B-Preview",
            "PowerInfer/SmallThinker-3B-Preview",
            "HuggingFaceTB/SmolLM2-1.7B-Instruct",
            "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
            "microsoft/Phi-3.5-mini-instruct",
        ]
        print("Models list initialized.")

        featured_model_radio = gr.Radio(
            label="Select a model below",  # Label for the radio buttons
            choices=models_list,  # List of available models
            value="meta-llama/Llama-3.3-70B-Instruct",  # Default selection
            interactive=True  # Allow user interaction
        )
        print("Featured models radio button created.")

        # Filter function for the radio button list
        def filter_models(search_term):
            print(f"Filtering models with search term: {search_term}")  # Log the search term
            filtered = [m for m in models_list if search_term.lower() in m.lower()]  # Filter models by search term
            print(f"Filtered models: {filtered}")  # Log filtered models
            return gr.update(choices=filtered)

        # Update the radio list when the search box value changes
        model_search_box.change(
            fn=filter_models,  # Function to filter models
            inputs=model_search_box,  # Input: search box value
            outputs=featured_model_radio  # Output: update radio button list
        )
        print("Model search box change event linked.")

        # Update the custom model textbox when a featured model is selected
        featured_model_radio.change(
            fn=set_custom_model_from_radio,  # Function to set custom model
            inputs=featured_model_radio,  # Input: selected model
            outputs=custom_model_box  # Output: update custom model textbox
        )
        print("Featured model radio button change event linked.")

print("Gradio interface initialized.")

if __name__ == "__main__":
    print("Launching the demo application.")
    demo.launch()