File size: 33,251 Bytes
cb919f0
 
c5a20a4
ea82e64
cb919f0
 
 
6f66243
 
cb919f0
 
6f66243
 
 
 
cb919f0
81286e1
6f66243
 
 
81286e1
 
 
6f66243
 
 
 
 
 
 
 
 
81286e1
6f66243
 
81286e1
 
 
 
 
6f66243
81286e1
6f66243
81286e1
 
 
6f66243
 
81286e1
 
 
 
cb919f0
81286e1
 
 
6f66243
81286e1
 
6f66243
 
 
81286e1
6f66243
81286e1
 
6f66243
 
81286e1
6f66243
 
 
 
cb919f0
81286e1
 
 
6f66243
75bf974
81286e1
 
 
6f66243
e45083a
81286e1
6f66243
e45083a
81286e1
cb919f0
81286e1
6f66243
81286e1
6f66243
81286e1
6f66243
81286e1
 
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
6f66243
 
 
 
81286e1
6f66243
81286e1
6f66243
81286e1
 
6f66243
 
 
 
81286e1
6f66243
 
 
 
81286e1
6f66243
 
81286e1
 
6f66243
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
cb919f0
6f66243
 
 
81286e1
6f66243
 
81286e1
6f66243
 
 
81286e1
 
6f66243
 
81286e1
6f66243
 
81286e1
 
6f66243
 
 
 
 
 
 
 
 
81286e1
6f66243
 
81286e1
 
6f66243
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
81286e1
 
cb919f0
6f66243
 
 
81286e1
cb919f0
 
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb919f0
6f66243
 
 
 
 
 
 
 
 
 
 
 
81286e1
6f66243
109f11f
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
6f66243
 
81286e1
6f66243
 
 
 
81286e1
 
6f66243
 
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
 
6f66243
 
 
 
 
 
 
 
 
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb919f0
6f66243
 
 
 
 
 
 
 
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
6f66243
 
 
 
 
 
 
 
 
 
81286e1
6f66243
 
81286e1
6f66243
 
81286e1
6f66243
 
 
81286e1
6f66243
81286e1
 
6f66243
 
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
 
 
6f66243
 
cb919f0
6f66243
81286e1
 
6f66243
 
 
cb919f0
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
 
cb919f0
6f66243
 
 
 
 
 
 
 
 
 
 
81286e1
6f66243
81286e1
6f66243
81286e1
6f66243
81286e1
6f66243
81286e1
6f66243
 
 
 
81286e1
6f66243
 
 
 
 
 
 
 
cb919f0
6f66243
cb919f0
6f66243
 
 
 
81286e1
 
6f66243
 
 
81286e1
6f66243
81286e1
6f66243
 
 
81286e1
 
6f66243
 
 
81286e1
6f66243
81286e1
6f66243
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
6f66243
 
 
 
 
81286e1
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
 
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81286e1
 
6f66243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb919f0
 
 
6f66243
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
import gradio as gr
from huggingface_hub import InferenceClient
import os
import json
import base64
from PIL import Image
import io
import requests # Keep for potential future use, though not directly used in core logic now
from smolagents.mcp_client import MCPClient # Ensure this is correctly installed and importable

ACCESS_TOKEN = os.getenv("HF_TOKEN")
if ACCESS_TOKEN:
    print("Access token loaded from HF_TOKEN environment variable.")
else:
    print("Warning: HF_TOKEN environment variable not set. Some operations might fail.")

# Function to encode image to base64
def encode_image(image_path_or_pil):
    if not image_path_or_pil:
        print("No image path or PIL Image provided")
        return None
    
    try:
        if isinstance(image_path_or_pil, Image.Image):
            image = image_path_or_pil
            print(f"Encoding PIL Image object.")
        elif isinstance(image_path_or_pil, str):
            print(f"Encoding image from path: {image_path_or_pil}")
            if not os.path.exists(image_path_or_pil):
                print(f"Error: Image file not found at {image_path_or_pil}")
                return None
            image = Image.open(image_path_or_pil)
        else:
            print(f"Error: Unsupported image input type: {type(image_path_or_pil)}")
            return None
        
        if image.mode == 'RGBA':
            image = image.convert('RGB')
        
        buffered = io.BytesIO()
        image.save(buffered, format="JPEG") # Or PNG if preferred, ensure consistency
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
        print("Image encoded successfully to base64.")
        return img_str
    except Exception as e:
        print(f"Error encoding image: {e}")
        import traceback
        traceback.print_exc()
        return None

# Dictionary to store active MCP connections
mcp_connections = {}

def connect_to_mcp_server(server_url, server_name=None):
    """Connect to an MCP server and return available tools"""
    if not server_url:
        return None, "No server URL provided. Please enter a valid URL."
    
    try:
        print(f"Attempting to connect to MCP server at URL: {server_url}")
        client = MCPClient({"url": server_url}) # This might block or raise if connection fails
        tools = client.get_tools() # This should also be a blocking call until tools are fetched
        
        name = server_name.strip() if server_name and server_name.strip() else f"Server_{len(mcp_connections) + 1}"
        mcp_connections[name] = {"client": client, "tools": tools, "url": server_url}
        
        print(f"Successfully connected to MCP server: {name} with {len(tools)} tools.")
        return name, f"Successfully connected to '{name}' ({server_url}). Found {len(tools)} tool(s)."
    except Exception as e:
        print(f"Error connecting to MCP server at {server_url}: {e}")
        import traceback
        traceback.print_exc()
        return None, f"Error connecting to MCP server '{server_url}': {str(e)}"

def list_mcp_tools(server_name):
    """List available tools for a connected MCP server"""
    if server_name not in mcp_connections:
        return "Server not connected or name not found."
    
    tools = mcp_connections[server_name]["tools"]
    tool_info = []
    for tool in tools:
        tool_info.append(f"- **{tool.name}**: {tool.description}")
    
    if not tool_info:
        return "No tools available for this server."
    
    return "\n".join(tool_info)

def call_mcp_tool(server_name, tool_name, **kwargs):
    """Call a specific tool from an MCP server and process its result."""
    if server_name not in mcp_connections:
        return {"type": "error", "message": f"Server '{server_name}' not connected."}
    
    mcp_client_instance = mcp_connections[server_name]["client"]
    
    try:
        print(f"Calling MCP tool: {server_name}.{tool_name} with args: {kwargs}")
        # Assuming mcp_client_instance.call_tool returns an mcp.client.tool.ToolResult object
        tool_result = mcp_client_instance.call_tool(tool_name, kwargs) 
        
        if tool_result and tool_result.content:
            # Process multiple blocks if present, concatenating text or prioritizing audio
            audio_block_found = None
            text_parts = []
            json_parts = []
            other_parts = []

            for block in tool_result.content:
                if hasattr(block, 'uri') and isinstance(block.uri, str) and block.uri.startswith('data:audio/'):
                    audio_block_found = {
                        "type": "audio", 
                        "data_uri": block.uri, 
                        "name": getattr(block, 'name', 'audio_output.wav') 
                    }
                    break # Prioritize first audio block
                elif hasattr(block, 'text') and block.text is not None:
                    text_parts.append(str(block.text))
                elif hasattr(block, 'json_data') and block.json_data is not None:
                    try:
                        json_parts.append(json.dumps(block.json_data, indent=2))
                    except TypeError:
                        json_parts.append(str(block.json_data)) # Fallback
                else:
                    other_parts.append(str(block))
            
            if audio_block_found:
                print(f"MCP tool returned audio: {audio_block_found['name']}")
                return audio_block_found
            elif text_parts:
                full_text = "\n".join(text_parts)
                print(f"MCP tool returned text: {full_text[:100]}...")
                return {"type": "text", "value": full_text}
            elif json_parts:
                full_json_str = "\n".join(json_parts)
                print(f"MCP tool returned JSON string.")
                return {"type": "json_string", "value": full_json_str} # Treat as string for display
            elif other_parts:
                print(f"MCP tool returned other content types.")
                return {"type": "text", "value": "\n".join(other_parts)}
            else:
                print("MCP tool executed but returned no interpretable primary content blocks.")
                return {"type": "text", "value": "Tool executed, but returned no standard content (audio/text/json)."}

        print("MCP tool executed, but ToolResult or its content was empty.")
        return {"type": "text", "value": "Tool executed, but returned no content."}
    except Exception as e:
        print(f"Error calling MCP tool '{tool_name}' or processing its result: {e}")
        import traceback
        traceback.print_exc()
        return {"type": "error", "message": f"Error during MCP tool '{tool_name}' execution: {str(e)}"}

def analyze_message_for_tool_call(message, active_mcp_servers, llm_client, llm_model_to_use, base_system_message):
    """Analyze a message to determine if an MCP tool should be called"""
    if not message or not message.strip() or not active_mcp_servers:
        return None, None
    
    tool_info_for_llm = []
    for server_name_iter in active_mcp_servers:
        if server_name_iter in mcp_connections:
            server_tools = mcp_connections[server_name_iter]["tools"]
            for tool in server_tools:
                # Provide a concise description for the LLM
                tool_info_for_llm.append(
                    f"- Server: '{server_name_iter}', Tool: '{tool.name}', Description: '{tool.description}'"
                )
    
    if not tool_info_for_llm:
        print("No active MCP tools found for analysis.")
        return None, None
    
    tools_string_for_llm = "\n".join(tool_info_for_llm)
    
    # More robust system prompt for tool detection
    analysis_system_prompt = f"""You are an expert assistant that determines if a user's request requires an external tool.
You have access to the following tools:
{tools_string_for_llm}

Based on the user's message, decide if any of these tools are appropriate.
If a tool is needed, respond ONLY with a JSON object containing:
"server_name": The name of the server providing the tool.
"tool_name": The name of the tool to be called.
"parameters": A dictionary of parameters for the tool, inferred from the user's message. Ensure parameter names match what the tool expects (often 'text', 'query', 'speed', etc.).

If NO tool is needed, respond ONLY with the exact string: NO_TOOL_NEEDED

Example 1 (TTS tool):
User: "Can you say 'hello world' for me at a slightly faster speed?"
Response: {{"server_name": "kokoroTTS", "tool_name": "text_to_audio", "parameters": {{"text": "hello world", "speed": 1.2}}}}

Example 2 (File tool):
User: "Read the content of my_document.txt"
Response: {{"server_name": "FileSystemServer", "tool_name": "readFile", "parameters": {{"path": "my_document.txt"}}}}

Example 3 (No tool):
User: "What's the weather like today?" (Assuming no weather tool is listed)
Response: NO_TOOL_NEEDED

User's current message is: "{message}"
Now, provide your decision:"""
    
    try:
        print(f"Sending tool analysis request to LLM model: {llm_model_to_use}")
        response = llm_client.chat_completion(
            model=llm_model_to_use,
            messages=[
                # {"role": "system", "content": base_system_message}, # Optional: provide original system message for context
                {"role": "user", "content": analysis_system_prompt} # The prompt itself is the user message here
            ],
            temperature=0.1, # Low temperature for deterministic tool selection
            max_tokens=300,
            stop=["\n\n"] # Stop early if LLM adds extra verbiage
        )
        
        analysis_text = response.choices[0].message.content.strip()
        print(f"LLM tool analysis response: '{analysis_text}'")
        
        if "NO_TOOL_NEEDED" in analysis_text or analysis_text == "NO_TOOL_NEEDED":
            print("LLM determined no tool needed.")
            return None, None
        
        # Try to extract JSON from the response (handle potential markdown code blocks)
        if analysis_text.startswith("```json"):
            analysis_text = analysis_text.replace("```json", "").replace("```", "").strip()
        elif analysis_text.startswith("```"):
             analysis_text = analysis_text.replace("```", "").strip()


        json_start = analysis_text.find("{")
        json_end = analysis_text.rfind("}") + 1
        
        if json_start == -1 or json_end <= json_start:
            print(f"Could not find valid JSON object in LLM response: '{analysis_text}'")
            return None, None
            
        json_str = analysis_text[json_start:json_end]
        try:
            tool_call_data = json.loads(json_str)
            if "server_name" in tool_call_data and "tool_name" in tool_call_data:
                print(f"LLM suggested tool call: {tool_call_data}")
                return tool_call_data.get("server_name"), {
                    "tool_name": tool_call_data.get("tool_name"),
                    "parameters": tool_call_data.get("parameters", {})
                }
            else:
                print(f"LLM response parsed as JSON but missing server_name or tool_name: {json_str}")
                return None, None
        except json.JSONDecodeError as e:
            print(f"Failed to parse tool call JSON from LLM response: '{json_str}'. Error: {e}")
            return None, None
            
    except Exception as e:
        print(f"Error during LLM analysis for tool calls: {str(e)}")
        import traceback
        traceback.print_exc()
        return None, None

def respond(
    message_text_input, # From user function, this is just the text part
    message_files_input, # From user function, this is the list of file paths
    history_tuples: list[tuple[tuple[str, list], str]], # History: list of ((user_text, [user_files]), assistant_response)
    system_message_prompt,
    max_tokens_val,
    temperature_val,
    top_p_val,
    frequency_penalty_val,
    seed_val,
    provider_choice,
    custom_api_key_val,
    custom_model_id,    
    # model_search_term_val, # Not directly used in respond, but kept for signature consistency if UI passes it
    selected_hf_model_id,
    mcp_is_enabled,
    active_mcp_server_names, # List of selected server names
    mcp_interaction_mode_choice
):
    print(f"\n--- RESPOND FUNCTION CALLED ---")
    print(f"Message Text: '{message_text_input}'")
    print(f"Message Files: {message_files_input}")
    # print(f"History (first item type if exists): {type(history_tuples) if history_tuples else 'No history'}")
    print(f"System Prompt: '{system_message_prompt}'")
    print(f"Provider: {provider_choice}, MCP Enabled: {mcp_is_enabled}, MCP Mode: {mcp_interaction_mode_choice}")
    print(f"Active MCP Servers: {active_mcp_server_names}")

    token_to_use_for_llm = custom_api_key_val if custom_api_key_val.strip() else ACCESS_TOKEN
    if not token_to_use_for_llm and provider_choice != "hf-inference": # Basic check
        yield "Error: API Key required for non-hf-inference providers."
        return
    
    llm_client_instance = InferenceClient(token=token_to_use_for_llm, provider=provider_choice)
    
    current_seed = None if seed_val == -1 else seed_val
    model_id_for_llm = custom_model_id.strip() if custom_model_id.strip() else selected_hf_model_id
    print(f"Using LLM model: {model_id_for_llm} via {provider_choice}")

    # --- MCP Tool Call Logic ---
    if mcp_is_enabled and (message_text_input or message_files_input) and active_mcp_server_names:
        tool_call_output_dict = None
        invoked_tool_display_name = "a tool"
        invoked_server_display_name = "an MCP server"

        if message_text_input and message_text_input.startswith("/mcp"):
            print("Processing explicit MCP command...")
            command_parts = message_text_input.split(" ", 3)
            if len(command_parts) < 3:
                yield "Invalid MCP command. Format: /mcp <server_name> <tool_name> [arguments_json]"
                return
            
            _, server_name_cmd, tool_name_cmd = command_parts[:3]
            invoked_server_display_name = server_name_cmd
            invoked_tool_display_name = tool_name_cmd
            args_json_str = "{}" if len(command_parts) < 4 else command_parts
            
            try:
                args_dict_cmd = json.loads(args_json_str)
                tool_call_output_dict = call_mcp_tool(invoked_server_display_name, invoked_tool_display_name, **args_dict_cmd)
            except json.JSONDecodeError:
                yield f"Invalid JSON arguments for MCP command: {args_json_str}"
                return
            except Exception as e_cmd:
                yield f"Error preparing MCP command: {str(e_cmd)}"
                return

        elif mcp_interaction_mode_choice == "Natural Language":
            print("Analyzing message for natural language tool call...")
            # For natural language, primarily use message_text_input. Files could be context later.
            detected_server_nl, tool_info_nl = analyze_message_for_tool_call(
                message_text_input, 
                active_mcp_server_names, 
                llm_client_instance, 
                model_id_for_llm, 
                system_message_prompt
            )
            
            if detected_server_nl and tool_info_nl and tool_info_nl.get("tool_name"):
                invoked_server_display_name = detected_server_nl
                invoked_tool_display_name = tool_info_nl['tool_name']
                tool_params_nl = tool_info_nl.get("parameters", {})
                tool_call_output_dict = call_mcp_tool(invoked_server_display_name, invoked_tool_display_name, **tool_params_nl)

        # --- Handle MCP Tool Result (if a tool was called) ---
        if tool_call_output_dict:
            response_message_parts = [f"I attempted to use the **{invoked_tool_display_name}** tool from **{invoked_server_display_name}**."]
            
            if tool_call_output_dict.get("type") == "audio":
                audio_data_uri = tool_call_output_dict["data_uri"]
                audio_html_tag = f"<audio controls src='{audio_data_uri}' title='{tool_call_output_dict.get('name', 'Audio Output')}'></audio>"
                response_message_parts.append(f"Here's the audio output:\n{audio_html_tag}")
            elif tool_call_output_dict.get("type") == "text":
                response_message_parts.append(f"\nResult:\n```\n{tool_call_output_dict['value']}\n```")
            elif tool_call_output_dict.get("type") == "json_string": # Changed from "json" to avoid confusion with dict
                response_message_parts.append(f"\nResult (JSON):\n```json\n{tool_call_output_dict['value']}\n```")
            elif tool_call_output_dict.get("type") == "error":
                response_message_parts.append(f"\nUnfortunately, there was an error: {tool_call_output_dict['message']}")
            else: # Fallback for unexpected result structure
                response_message_parts.append(f"\nThe tool returned: {str(tool_call_output_dict)}")
            
            yield "\n".join(response_message_parts)
            return # End here if a tool was called and processed

    # --- Regular LLM Response Logic (if no MCP tool was successfully called and returned primary content) ---
    print("Proceeding with standard LLM response generation.")
    
    # Prepare current user message for LLM (multimodal if files exist)
    current_user_llm_content = []
    if message_text_input and message_text_input.strip():
        current_user_llm_content.append({"type": "text", "text": message_text_input})
    
    if message_files_input:
        for file_path in message_files_input:
            if file_path: # file_path is already the actual temp path from gr.File or gr.Image
                encoded_img_str = encode_image(file_path)
                if encoded_img_str:
                    current_user_llm_content.append({
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_img_str}"}
                    })
                else:
                    print(f"Warning: Failed to encode image {file_path} for LLM.")
    
    if not current_user_llm_content:
        print("No content (text or valid files) in current user message for LLM.")
        yield "" # Or some indicator of no action
        return

    # Augment system message with MCP tool info if enabled
    augmented_sys_msg = system_message_prompt
    if mcp_is_enabled and active_mcp_server_names:
        mcp_tool_descriptions_for_llm = []
        for server_name_iter in active_mcp_server_names:
            if server_name_iter in mcp_connections:
                # Use the more detailed list_mcp_tools output for the system prompt if desired
                tools_list_str = list_mcp_tools(server_name_iter) # This returns markdown
                mcp_tool_descriptions_for_llm.append(f"From server '{server_name_iter}':\n{tools_list_str}")
        
        if mcp_tool_descriptions_for_llm:
            full_tools_info_str = "\n\n".join(mcp_tool_descriptions_for_llm)
            interaction_advice = ""
            if mcp_interaction_mode_choice == "Command Mode":
                interaction_advice = "The user can invoke these tools using '/mcp <server_name> <tool_name> <json_args>'."
            # For Natural Language mode, the LLM doesn't need explicit instruction in system prompt
            # as `analyze_message_for_tool_call` handles that part.
            
            augmented_sys_msg += f"\n\nYou also have access to the following external tools via Model Context Protocol (MCP):\n{full_tools_info_str}\n{interaction_advice}"

    # Prepare messages list for LLM
    messages_for_llm_api = [{"role": "system", "content": augmented_sys_msg}]
    
    for hist_user_turn, hist_assist_response in history_tuples:
        hist_user_text, hist_user_files = hist_user_turn # Unpack ((text, [files]))
        
        history_user_llm_content = []
        if hist_user_text and hist_user_text.strip():
            history_user_llm_content.append({"type": "text", "text": hist_user_text})
        if hist_user_files:
            for hist_file_path in hist_user_files:
                encoded_hist_img = encode_image(hist_file_path)
                if encoded_hist_img:
                    history_user_llm_content.append({
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_hist_img}"}
                    })
        if history_user_llm_content: # Only add if there's actual content
            messages_for_llm_api.append({"role": "user", "content": history_user_llm_content})
        
        if hist_assist_response and hist_assist_response.strip():
            messages_for_llm_api.append({"role": "assistant", "content": hist_assist_response})

    messages_for_llm_api.append({"role": "user", "content": current_user_llm_content})
    # print(f"Final messages for LLM API: {json.dumps(messages_for_llm_api, indent=2)}")


    llm_parameters = {
        "max_tokens": max_tokens_val, "temperature": temperature_val, "top_p": top_p_val,
        "frequency_penalty": frequency_penalty_val,
    }
    if current_seed is not None:
        llm_parameters["seed"] = current_seed

    print(f"Sending request to LLM: Model={model_id_for_llm}, Params={llm_parameters}")
    streamed_response_text = ""
    try:
        llm_stream = llm_client_instance.chat_completion(
            model=model_id_for_llm,
            messages=messages_for_llm_api,
            stream=True,
            **llm_parameters
        )
        
        # print("Streaming LLM response: ", end="", flush=True)
        for chunk in llm_stream:
            if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
                delta = chunk.choices.delta
                if hasattr(delta, 'content') and delta.content:
                    token = delta.content
                    # print(token, end="", flush=True)
                    streamed_response_text += token
                    yield streamed_response_text
        # print("\nLLM Stream finished.")
    except Exception as e_llm:
        error_msg = f"Error during LLM inference: {str(e_llm)}"
        print(error_msg)
        import traceback
        traceback.print_exc()
        streamed_response_text += f"\n{error_msg}" # Append error to existing stream if any
        yield streamed_response_text

    print(f"--- RESPOND FUNCTION COMPLETED ---")


# GRADIO UI
with gr.Blocks(theme="Nymbo/Nymbo_Theme", title="Serverless TextGen Hub + MCP") as demo:
    gr.Markdown("# Serverless TextGen Hub with MCP Client")
    chatbot = gr.Chatbot(
        label="Chat",
        height=600, 
        show_copy_button=True, 
        placeholder="Select a model, connect MCP servers (optional), and start chatting!",
        bubble_full_width=False,
        avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-square.png") 
    )
    
    with gr.Row():
        msg_textbox = gr.MultimodalTextbox( # Changed from gr.Textbox to gr.MultimodalTextbox
            placeholder="Type a message or upload images... (Use /mcp for commands)",
            show_label=False,
            container=False,
            scale=12,
            file_types=["image"], # Can add more types like "audio", "video" if supported by models
            file_count="multiple" # Allow multiple image uploads
        )
        # submit_button = gr.Button("Send", variant="primary", scale=1, min_width=100) # Optional explicit send button

    with gr.Accordion("LLM Settings", open=False):
        system_message_prompt_box = gr.Textbox(
            value="You are a helpful and versatile AI assistant. You can understand text and images. If you have access to MCP tools, you can use them when appropriate or when the user asks.", 
            label="System Prompt", lines=3
        )
        
        with gr.Row():
            with gr.Column(scale=1):
                max_tokens_slider_ui = gr.Slider(minimum=128, maximum=8192, value=1024, step=128, label="Max New Tokens")
                temperature_slider_ui = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature")
                top_p_slider_ui = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top-P (Nucleus Sampling)")
            with gr.Column(scale=1):
                frequency_penalty_slider_ui = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
                seed_slider_ui = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
        
        providers_list_ui = [
            "hf-inference", "cerebras", "together", "sambanova", "novita", 
            "cohere", "fireworks-ai", "hyperbolic", "nebius",
        ]
        provider_radio_ui = gr.Radio(choices=providers_list_ui, value="hf-inference", label="Inference Provider")
        
        byok_textbox_ui = gr.Textbox(label="Your Hugging Face API Key (Optional)", placeholder="Enter HF Token if using non-hf-inference providers or private models", type="password")
        
        custom_model_id_box = gr.Textbox(label="Custom Model ID (Overrides selection below)", placeholder="e.g., meta-llama/Llama-3-8B-Instruct")
        
        model_search_box_ui = gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1)
        
        # More diverse model list, including some known multimodal ones
        featured_models_list_data = [
            "meta-llama/Meta-Llama-3.1-8B-Instruct", # Good default
            "meta-llama/Meta-Llama-3.1-70B-Instruct",
            "mistralai/Mistral-Nemo-Instruct-2407",
            "mistralai/Mixtral-8x22B-Instruct-v0.1",
            "Qwen/Qwen2-7B-Instruct",
            "microsoft/Phi-3-medium-128k-instruct",
            # Multimodal
            "Salesforce/blip-image-captioning-large", # Example, might not be chat
            "llava-hf/llava-1.5-7b-hf", # LLaVA example
            "microsoft/kosmos-2-patch14-224", # Kosmos-2
            "google/paligemma-3b-mix-448", # PaliGemma
        ]
        featured_model_radio_ui = gr.Radio(label="Select a Featured Model", choices=featured_models_list_data, value="meta-llama/Meta-Llama-3.1-8B-Instruct", interactive=True)
        
        gr.Markdown("Tip: For multimodal chat, ensure selected model supports image inputs (e.g., LLaVA, PaliGemma, Kosmos-2).")

    with gr.Accordion("MCP Client Settings", open=False):
        mcp_enabled_checkbox_ui = gr.Checkbox(label="Enable MCP Support", value=False, info="Connect to external tools and services via MCP.")
        
        with gr.Row():
            mcp_server_url_textbox = gr.Textbox(label="MCP Server URL", placeholder="e.g., https://your-mcp-server.hf.space/gradio_api/mcp/sse")
            mcp_server_name_textbox = gr.Textbox(label="Friendly Server Name (Optional)", placeholder="MyTTS_Server")
            mcp_connect_button_ui = gr.Button("Connect", variant="secondary")
        
        mcp_connection_status_textbox = gr.Textbox(label="MCP Connection Status", placeholder="No MCP servers connected.", interactive=False, lines=2)
        
        active_mcp_servers_dropdown = gr.Dropdown(
            label="Use Tools From (Select Active MCP Servers)", choices=[], multiselect=True, 
            info="Choose which connected servers the LLM can use tools from."
        )
        
        mcp_interaction_mode_radio = gr.Radio(
            label="MCP Interaction Mode", choices=["Natural Language", "Command Mode"], value="Natural Language",
            info="Natural Language: AI tries to detect tool use. Command Mode: Use '/mcp ...' syntax."
        )
        gr.Markdown("Example MCP Command: `/mcp MyTTS text_to_audio {\"text\": \"Hello world!\"}`")

    # --- Event Handlers ---
    
    # Store history as list of tuples: [ ((user_text, [user_files]), assistant_response), ... ]
    chat_history_state = gr.State([]) 

    def user_interaction(user_multimodal_input, current_chat_history):
        user_text = user_multimodal_input["text"] if user_multimodal_input and "text" in user_multimodal_input else ""
        user_files = user_multimodal_input["files"] if user_multimodal_input and "files" in user_multimodal_input else []
        
        # Only add to history if there's text or files
        if user_text or user_files:
            current_chat_history.append( ((user_text, user_files), None) ) # Append user turn, assistant response is None initially
        return current_chat_history, gr.update(value={"text": "", "files": []}) # Clear input textbox

    def bot_response_generator(
        current_chat_history, system_prompt, max_tokens, temp, top_p_val, freq_penalty, seed_val, 
        provider_val, api_key_val, custom_model_val, selected_model_val, # Removed search_term as it's not directly used by respond
        mcp_enabled_val, active_servers_val, mcp_mode_val
    ):
        if not current_chat_history or current_chat_history[-1] is not None: # If no user message or last message already has bot response
            yield current_chat_history # Or simply `return current_chat_history` if not streaming
            return

        user_turn_content, _ = current_chat_history[-1] # Get the latest user turn: (text, [files])
        message_text, message_files = user_turn_content

        # The history passed to `respond` should be all turns *before* the current one
        history_for_respond = current_chat_history[:-1]

        response_stream = respond(
            message_text, message_files, history_for_respond,
            system_prompt, max_tokens, temp, top_p_val, freq_penalty, seed_val,
            provider_val, api_key_val, custom_model_val, selected_model_val,
            mcp_enabled_val, active_servers_val, mcp_mode_val
        )
        
        full_bot_message = ""
        for chunk in response_stream:
            full_bot_message = chunk
            current_chat_history[-1] = (user_turn_content, full_bot_message) # Update last item's assistant part
            yield current_chat_history
    
    # Link UI components to functions
    msg_textbox.submit(
        user_interaction,
        inputs=[msg_textbox, chat_history_state],
        outputs=[chat_history_state, msg_textbox] # Update history and clear input
    ).then(
        bot_response_generator,
        inputs=[
            chat_history_state, system_message_prompt_box, max_tokens_slider_ui, temperature_slider_ui, 
            top_p_slider_ui, frequency_penalty_slider_ui, seed_slider_ui, provider_radio_ui, 
            byok_textbox_ui, custom_model_id_box, featured_model_radio_ui, 
            mcp_enabled_checkbox_ui, active_mcp_servers_dropdown, mcp_interaction_mode_radio
        ],
        outputs=[chatbot] # Stream to chatbot
    )

    # MCP Connection
    def handle_mcp_connect(url, name_suggestion):
        if not url or not url.strip():
            return "MCP Server URL cannot be empty.", gr.update(choices=list(mcp_connections.keys()))
        
        _, status_msg = connect_to_mcp_server(url, name_suggestion)
        # Update dropdown choices with current server names
        new_choices = list(mcp_connections.keys())
        # Preserve selected values if they are still valid connections
        # current_selected = active_mcp_servers_dropdown.value # This might not work directly
        # new_selected = [s for s in current_selected if s in new_choices]
        return status_msg, gr.update(choices=new_choices) #, value=new_selected)

    mcp_connect_button_ui.click(
        handle_mcp_connect,
        inputs=[mcp_server_url_textbox, mcp_server_name_textbox],
        outputs=[mcp_connection_status_textbox, active_mcp_servers_dropdown]
    )

    # Model Filtering
    def filter_featured_models(search_query):
        if not search_query:
            return gr.update(choices=featured_models_list_data)
        filtered = [m for m in featured_models_list_data if search_query.lower() in m.lower()]
        return gr.update(choices=filtered if filtered else ["No models match your search"])

    model_search_box_ui.change(filter_featured_models, inputs=model_search_box_ui, outputs=featured_model_radio_ui)

    # Auto-select hf-inference if BYOK is empty and other provider is chosen
    def validate_api_key_for_provider(api_key_text, current_provider):
        if not api_key_text.strip() and current_provider != "hf-inference":
            gr.Warning("API Key needed for non-hf-inference providers. Defaulting to hf-inference.")
            return gr.update(value="hf-inference")
        return current_provider # No change if key provided or hf-inference selected

    byok_textbox_ui.change(validate_api_key_for_provider, inputs=[byok_textbox_ui, provider_radio_ui], outputs=provider_radio_ui)
    provider_radio_ui.change(validate_api_key_for_provider, inputs=[byok_textbox_ui, provider_radio_ui], outputs=provider_radio_ui)


if __name__ == "__main__":
    print("Launching Gradio demo...")
    demo.queue().launch(debug=True, show_api=False) # mcp_server=False as this is a client app