import gradio as gr from huggingface_hub import InferenceClient import os import json import base64 from PIL import Image import io ACCESS_TOKEN = os.getenv("HF_TOKEN") print("Access token loaded.") # Function to encode image to base64 def encode_image(image_path): if not image_path: print("No image path provided") return None try print(f"Encoding image from path: {image_path}") # If it's already a PIL Image if isinstance(image_path, Image.Image): image = image_path else: # Try to open the image file image = Image.open(image_path) # Convert to RGB if image has an alpha channel (RGBA) if image.mode == 'RGBA': image = image.convert('RGB') # Encode to base64 buffered = io.BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") print("Image encoded successfully") return img_str except Exception as e: print(f"Error encoding image: {e}") return None def respond( message, image_files, # Changed parameter name and structure history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, frequency_penalty, seed, provider, custom_api_key, custom_model, model_search_term, selected_model ): print(f"Received message: {message}") print(f"Received {len(image_files) if image_files else 0} images") print(f"History: {history}") print(f"System message: {system_message}") print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}") print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}") print(f"Selected provider: {provider}") print(f"Custom API Key provided: {bool(custom_api_key.strip())}") print(f"Selected model (custom_model): {custom_model}") print(f"Model search term: {model_search_term}") print(f"Selected model from radio: {selected_model}") # Determine which token to use token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN if custom_api_key.strip() != "": print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication") else: print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication") # Initialize the Inference Client with the provider and appropriate token client = InferenceClient(token=token_to_use, provider=provider) print(f"Hugging Face Inference Client initialized with {provider} provider.") # Convert seed to None if -1 (meaning random) if seed == -1: seed = None # Create multimodal content if images are present if image_files and len(image_files) > 0: # Process the user message to include images user_content = [] # Add text part if there is any if message and message.strip(): user_content.append({ "type": "text", "text": message }) # Add image parts for img in image_files: if img is not None: # Get raw image data from path try: encoded_image = encode_image(img) if encoded_image: user_content.append({ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{encoded_image}" } }) except Exception as e: print(f"Error encoding image: {e}") else: # Text-only message user_content = message # Prepare messages in the format expected by the API messages = [{"role": "system", "content": system_message}] print("Initial messages array constructed.") # Add conversation history to the context for val in history: user_part = val[0] assistant_part = val[1] if user_part: # Handle both text-only and multimodal messages in history if isinstance(user_part, tuple) and len(user_part) == 2: # This is a multimodal message with text and images history_content = [] if user_part[0]: # Text history_content.append({ "type": "text", "text": user_part[0] }) for img in user_part[1]: # Images if img: try: encoded_img = encode_image(img) if encoded_img: history_content.append({ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{encoded_img}" } }) except Exception as e: print(f"Error encoding history image: {e}") messages.append({"role": "user", "content": history_content}) else: # Regular text message messages.append({"role": "user", "content": user_part}) print(f"Added user message to context (type: {type(user_part)})") if assistant_part: messages.append({"role": "assistant", "content": assistant_part}) print(f"Added assistant message to context: {assistant_part}") # Append the latest user message messages.append({"role": "user", "content": user_content}) print(f"Latest user message appended (content type: {type(user_content)})") # Determine which model to use, prioritizing custom_model if provided model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model print(f"Model selected for inference: {model_to_use}") # Start with an empty string to build the response as tokens stream in response = "" print(f"Sending request to {provider} provider.") # Prepare parameters for the chat completion request parameters = { "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "frequency_penalty": frequency_penalty, } if seed is not None: parameters["seed"] = seed # Use the InferenceClient for making the request try: # Create a generator for the streaming response stream = client.chat_completion( model=model_to_use, messages=messages, stream=True, **parameters ) print("Received tokens: ", end="", flush=True) # Process the streaming response for chunk in stream: if hasattr(chunk, 'choices') and len(chunk.choices) > 0: # Extract the content from the response if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'): token_text = chunk.choices[0].delta.content if token_text: print(token_text, end="", flush=True) response += token_text yield response print() except Exception as e: print(f"Error during inference: {e}") response += f"\nError: {str(e)}" yield response print("Completed response generation.") # Function to validate provider selection based on BYOK def validate_provider(api_key, provider): if not api_key.strip() and provider != "hf-inference": return gr.update(value="hf-inference") return gr.update(value=provider) # GRADIO UI with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo: # Create the chatbot component chatbot = gr.Chatbot( height=600, show_copy_button=True, placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs", layout="panel" ) print("Chatbot interface created.") # Multimodal textbox for messages (combines text and file uploads) msg = gr.MultimodalTextbox( placeholder="Type a message or upload images...", show_label=False, container=False, scale=12, file_types=["image"], file_count="multiple", sources=["upload"] ) # Create accordion for settings with gr.Accordion("Settings", open=False): # System message system_message_box = gr.Textbox( value="You are a helpful AI assistant that can understand images and text.", placeholder="You are a helpful assistant.", label="System Prompt" ) # Generation parameters with gr.Row(): with gr.Column(): max_tokens_slider = gr.Slider( minimum=1, maximum=4096, value=512, step=1, label="Max tokens" ) temperature_slider = gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ) top_p_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P" ) with gr.Column(): frequency_penalty_slider = gr.Slider( minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty" ) seed_slider = gr.Slider( minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)" ) # Provider selection providers_list = [ "hf-inference", # Default Hugging Face Inference "cerebras", # Cerebras provider "together", # Together AI "sambanova", # SambaNova "novita", # Novita AI "cohere", # Cohere "fireworks-ai", # Fireworks AI "hyperbolic", # Hyperbolic "nebius", # Nebius ] provider_radio = gr.Radio( choices=providers_list, value="hf-inference", label="Inference Provider", ) # New BYOK textbox byok_textbox = gr.Textbox( value="", label="BYOK (Bring Your Own Key)", info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.", placeholder="Enter your Hugging Face API token", type="password" # Hide the API key for security ) # Custom model box custom_model_box = gr.Textbox( value="", label="Custom Model", info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.", placeholder="meta-llama/Llama-3.3-70B-Instruct" ) # Model search model_search_box = gr.Textbox( label="Filter Models", placeholder="Search for a featured model...", lines=1 ) # Featured models list models_list = [ "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.0-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", "NousResearch/Hermes-3-Llama-3.1-8B", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mistral-7B-Instruct-v0.2", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct", "Qwen/QwQ-32B", "Qwen/Qwen2.5-Coder-32B-Instruct", "microsoft/Phi-3.5-mini-instruct", "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct", ] featured_model_radio = gr.Radio( label="Select a model below", choices=models_list, value="meta-llama/Llama-3.2-11B-Vision-Instruct", # Default to a multimodal model interactive=True ) gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)") # MCP Support Information Accordion with gr.Accordion("MCP Support (for LLMs)", open=False): gr.Markdown(""" ### Model Context Protocol (MCP) Support This application can function as an MCP Server, allowing compatible AI models and agents (like Claude Desktop or custom MCP clients) to use its text and image generation capabilities as a tool. When MCP is enabled, Gradio automatically exposes the relevant functions (likely based on the `bot` function in this app) as MCP tools. **To connect an MCP client to this server:** 1. Ensure this Gradio application is running. 2. Use the following URL for the MCP server in your client configuration: - If running locally: `http://127.0.0.1:7860/gradio_api/mcp/sse` - If deployed on Hugging Face Spaces: `https://YOUR_USERNAME-YOUR_SPACENAME.hf.space/gradio_api/mcp/sse` (replace with your actual Space URL) **Example MCP Client Configuration (`mcp.json` or similar):** ```json { "mcpServers": { "serverlessTextgenHub": { "url": "http://127.0.0.1:7860/gradio_api/mcp/sse" } } } ``` **Tool Parameters:** The exposed MCP tool will likely have parameters corresponding to the inputs of the `bot` function (e.g., `history`, `system_msg`, `max_tokens`, `temperature`, `model`, etc.). * **Important for `history` parameter:** For image inputs, the MCP client might need to format the `history` to include image references in a way the `bot` function can parse (e.g., markdown links `![Image](URL_or_base64_data_uri)` within the history's message part). * It's highly recommended to inspect the MCP schema for this server to understand the exact tool names, descriptions, and input/output schemas. You can usually find this at: `http://127.0.0.1:7860/gradio_api/mcp/schema` (or the equivalent URL for your deployed Space). This allows for powerful integrations where an AI agent can programmatically request text or multimodal generations from this Serverless-TextGen-Hub. """) # Chat history state chat_history = gr.State([]) # Function to filter models def filter_models(search_term): print(f"Filtering models with search term: {search_term}") filtered = [m for m in models_list if search_term.lower() in m.lower()] print(f"Filtered models: {filtered}") return gr.update(choices=filtered) # Function to set custom model from radio def set_custom_model_from_radio(selected): print(f"Featured model selected: {selected}") return selected # Function for the chat interface def user(user_message, history): print(f"User message received: {user_message}") if not user_message or (not user_message.get("text") and not user_message.get("files")): print("Empty message, skipping") return history # Return immediately if message is empty text_content = user_message.get("text", "").strip() files = user_message.get("files", []) print(f"Text content: {text_content}") print(f"Files: {files}") if not text_content and not files: # Check again after stripping text print("No content to display") return history # Append text message first if it exists and is not empty if text_content: print(f"Adding text message: {text_content}") history.append([text_content, None]) # Then append each image file as a separate message if files: for file_path in files: if file_path and isinstance(file_path, str): # Ensure file_path is valid print(f"Adding image: {file_path}") history.append([f"![Image]({file_path})", None]) # Image as a new message return history # Define bot response function def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model): if not history or not history[-1][0]: # Check if history or last message is empty print("No history or empty last message to process for bot") # Yield an empty update or the history itself to avoid errors # depending on how Gradio handles empty yields. # For safety, just return the history if it's in a bad state. yield history return user_message_content = history[-1][0] # This is the user's latest message (text or image markdown) print(f"Bot processing user message content: {user_message_content}") # Determine if the current turn is primarily about an image or text # This logic assumes images are added as separate history entries like "![Image](path)" # and text prompts might precede them or be separate. current_message_text_for_api = "" current_image_files_for_api = [] # Check if the last entry is an image if isinstance(user_message_content, str) and user_message_content.startswith("![Image]("): image_path = user_message_content.replace("![Image](", "").replace(")", "") current_image_files_for_api.append(image_path) print(f"Bot identified image in last history entry: {image_path}") # If it's an image, check the second to last entry for a text prompt if len(history) > 1: prev_content = history[-2][0] if isinstance(prev_content, str) and not prev_content.startswith("![Image]("): current_message_text_for_api = prev_content print(f"Bot identified preceding text for image: {current_message_text_for_api}") else: # Last entry is text current_message_text_for_api = user_message_content print(f"Bot identified text in last history entry: {current_message_text_for_api}") # The history sent to `respond` should not include the current turn's input, # as `respond` will add `message` (current_message_text_for_api) to its internal `messages` list. # If an image is present, it's passed via `image_files`. history_for_respond_func = history[:-1] # Pass history *before* the current turn history[-1][1] = "" # Initialize assistant's response for the current turn for response_chunk in respond( message=current_message_text_for_api, image_files=current_image_files_for_api, history=history_for_respond_func, # Pass prior history system_message=system_msg, max_tokens=max_tokens, temperature=temperature, top_p=top_p, frequency_penalty=freq_penalty, seed=seed, provider=provider, custom_api_key=api_key, custom_model=custom_model, model_search_term=search_term, # Though these two might not be directly used by respond if model is fixed selected_model=selected_model ): history[-1][1] = response_chunk yield history # Event handlers msg.submit( user, [msg, chatbot], [chatbot], queue=False ).then( bot, [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider, frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box, model_search_box, featured_model_radio], [chatbot] ).then( lambda: {"text": "", "files": []}, # Clear inputs after submission None, [msg] ) model_search_box.change( fn=filter_models, inputs=model_search_box, outputs=featured_model_radio ) print("Model search box change event linked.") featured_model_radio.change( fn=set_custom_model_from_radio, inputs=featured_model_radio, outputs=custom_model_box ) print("Featured model radio button change event linked.") byok_textbox.change( fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio ) print("BYOK textbox change event linked.") provider_radio.change( fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio ) print("Provider radio button change event linked.") print("Gradio interface initialized.") if __name__ == "__main__": print("Launching the demo application.") demo.launch(show_api=True, mcp_server=True) # MCP SERVER ENABLED HERE