File size: 10,078 Bytes
038f313
 
4c18bfc
038f313
880ced6
 
e13eb1b
038f313
e13eb1b
038f313
 
 
 
e13eb1b
038f313
 
 
e13eb1b
69b4a5f
038f313
 
 
3a64d68
98674ca
8696822
e7683ca
038f313
e13eb1b
52ad57a
 
 
 
10ffb1d
 
 
 
 
e7683ca
 
e13eb1b
10ffb1d
f7c4208
 
86297f5
52ad57a
 
98674ca
e7683ca
f7c4208
52ad57a
 
 
038f313
e7683ca
 
 
 
 
 
 
 
e13eb1b
10ffb1d
f7c4208
 
e13eb1b
 
 
 
 
 
86297f5
e13eb1b
 
 
 
038f313
 
10ffb1d
038f313
b56d11c
f7c4208
e7683ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542c2ac
e13eb1b
f7c4208
e7683ca
 
 
 
 
 
d3123eb
 
 
 
e7683ca
8696822
e7683ca
10ffb1d
e7683ca
10ffb1d
e7683ca
 
 
10ffb1d
e7683ca
 
 
 
 
 
 
 
 
 
 
 
10ffb1d
e7683ca
 
 
 
 
 
 
10ffb1d
e7683ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10ffb1d
e7683ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10ffb1d
e7683ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10ffb1d
e7683ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10ffb1d
 
 
e7683ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
import gradio as gr
from openai import OpenAI
import os

# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

# Initialize the OpenAI client with the Hugging Face Inference API endpoint
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model,
    selected_featured_model
):
    """
    This function handles the chatbot response. It takes in:
    - message: the user's new message
    - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
    - system_message: the system prompt
    - max_tokens: the maximum number of tokens to generate in the response
    - temperature: sampling temperature
    - top_p: top-p (nucleus) sampling
    - frequency_penalty: penalize repeated tokens in the output
    - seed: a fixed seed for reproducibility; -1 will mean 'random'
    - custom_model: the user-provided custom model name (if any)
    - selected_featured_model: the model selected from featured models
    """

    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
    print(f"Custom model: {custom_model}")
    print(f"Selected featured model: {selected_featured_model}")

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    # Determine which model to use: either custom_model or selected featured model
    if custom_model.strip() != "":
        model_to_use = custom_model.strip()
        print(f"Using Custom Model: {model_to_use}")
    else:
        model_to_use = selected_featured_model
        print(f"Using Featured Model: {model_to_use}")

    # Construct the messages array required by the API
    messages = [{"role": "system", "content": system_message}]

    # Add conversation history to the context
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            messages.append({"role": "user", "content": user_part})
            print(f"Added user message to context: {user_part}")
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})
            print(f"Added assistant message to context: {assistant_part}")

    # Append the latest user message
    messages.append({"role": "user", "content": message})

    # Start with an empty string to build the response as tokens stream in
    response = ""
    print("Sending request to OpenAI API.")

    try:
        # Make the streaming request to the HF Inference API via openai-like client
        for message_chunk in client.chat.completions.create(
            model=model_to_use,              # Use either the user-provided custom model or selected featured model
            max_tokens=max_tokens,
            stream=True,                     # Stream the response
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            seed=seed,
            messages=messages,
        ):
            # Extract the token text from the response chunk
            token_text = message_chunk.choices[0].delta.content
            print(f"Received token: {token_text}")
            response += token_text
            # Yield the partial response to Gradio so it can display in real-time
            yield response
    except Exception as e:
        print(f"Error during API call: {e}")
        yield f"An error occurred: {e}"

    print("Completed response generation.")

# Create a Chatbot component with a specified height
chatbot = gr.Chatbot(height=600)
print("Chatbot interface created.")

# Placeholder featured models list
FEATURED_MODELS_LIST = [
    "meta-llama/Llama-3.1-8B-Instruct",
    "microsoft/Phi-3.5-mini-instruct",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "Qwen/Qwen2.5-72B-Instruct",
]

# Define the Gradio Blocks interface
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
    gr.Markdown("# Serverless-TextGen-Hub 📝🤖")
    gr.Markdown(
        """
        Welcome to the **Serverless-TextGen-Hub**! Chat with your favorite models seamlessly.
        """
    )
    
    with gr.Row():
        # Chatbot component
        chatbot_component = gr.Chatbot(height=600)

    with gr.Row():
        # System message input
        system_message = gr.Textbox(
            value="You are a helpful assistant.",
            label="System Message",
            placeholder="Enter system message here...",
            lines=2,
        )

    with gr.Row():
        # User message input
        user_message = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here...",
            lines=2,
        )
        # Run button
        run_button = gr.Button("Send", variant="primary")

    with gr.Row():
        # Additional settings
        with gr.Column(scale=1):
            max_tokens = gr.Slider(
                minimum=1,
                maximum=4096,
                value=512,
                step=1,
                label="Max New Tokens",
            )
            temperature = gr.Slider(
                minimum=0.1,
                maximum=4.0,
                value=0.7,
                step=0.1,
                label="Temperature",
            )
            top_p = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.95,
                step=0.05,
                label="Top-P",
            )
            frequency_penalty = gr.Slider(
                minimum=-2.0,
                maximum=2.0,
                value=0.0,
                step=0.1,
                label="Frequency Penalty",
            )
            seed = gr.Slider(
                minimum=-1,
                maximum=65535,  # Arbitrary upper limit for demonstration
                value=-1,
                step=1,
                label="Seed (-1 for random)",
            )
            custom_model = gr.Textbox(
                value="",
                label="Custom Model",
                info="(Optional) Provide a custom Hugging Face model path. This will override the selected featured model if not empty.",
                placeholder="e.g., meta-llama/Llama-3.3-70B-Instruct",
            )

    with gr.Accordion("Featured Models", open=True):
        with gr.Column():
            model_search = gr.Textbox(
                label="Filter Models",
                placeholder="Search for a featured model...",
                lines=1,
            )
            featured_model = gr.Radio(
                label="Select a model below",
                value=FEATURED_MODELS_LIST[0],
                choices=FEATURED_MODELS_LIST,
                interactive=True,
            )

    # Function to filter featured models based on search input
    def filter_featured_models(search_term):
        if not search_term:
            return gr.update(choices=FEATURED_MODELS_LIST, value=FEATURED_MODELS_LIST[0])
        filtered = [model for model in FEATURED_MODELS_LIST if search_term.lower() in model.lower()]
        if not filtered:
            return gr.update(choices=[], value=None)
        return gr.update(choices=filtered, value=filtered[0])

    # Update featured_model choices based on search
    model_search.change(
        fn=filter_featured_models,
        inputs=model_search,
        outputs=featured_model,
    )

    # Function to handle the chatbot response
    def handle_response(message, history, system_msg, max_tok, temp, tp, freq_pen, sd, custom_mod, selected_feat_mod):
        # Append user message to history
        history = history or []
        history.append((message, None))
        # Generate response using the respond function
        response = respond(
            message=message,
            history=history,
            system_message=system_msg,
            max_tokens=max_tok,
            temperature=temp,
            top_p=tp,
            frequency_penalty=freq_pen,
            seed=sd,
            custom_model=custom_mod,
            selected_featured_model=selected_feat_mod,
        )
        return response, history + [(message, response)]

    # Handle button click
    run_button.click(
        fn=handle_response,
        inputs=[
            user_message,
            chatbot_component,        # history
            system_message,
            max_tokens,
            temperature,
            top_p,
            frequency_penalty,
            seed,
            custom_model,
            featured_model,
        ],
        outputs=[
            chatbot_component,
            chatbot_component,        # Updated history
        ],
    )

    # Allow pressing Enter to send the message
    user_message.submit(
        fn=handle_response,
        inputs=[
            user_message,
            chatbot_component,        # history
            system_message,
            max_tokens,
            temperature,
            top_p,
            frequency_penalty,
            seed,
            custom_model,
            featured_model,
        ],
        outputs=[
            chatbot_component,
            chatbot_component,        # Updated history
        ],
    )

    # Custom CSS to enhance the UI
    demo.load(lambda: None, None, None, _js="""
    () => {
        const style = document.createElement('style');
        style.innerHTML = `
            footer {visibility: hidden !important;}
            .gradio-container {background-color: #f9f9f9;}
        `;
        document.head.appendChild(style);
    }
    """)

print("Launching Gradio interface...")  # Debug log

# Launch the Gradio interface without showing the API or sharing externally
demo.launch(show_api=False, share=False)