import gradio as gr from huggingface_hub import InferenceClient, InferenceTimeoutError import logging # Set up logging logging.basicConfig(level=logging.INFO) # Initialize client with timeout client = InferenceClient("hackergeek/gemma-finetuned", timeout=30) def respond( message: str, history: list[tuple[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, ): """ Handles chat responses with error handling and validation """ try: # Validate system message if not system_message.strip(): system_message = "You are a helpful AI assistant." # Build message history messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) response = "" # Stream the response for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if chunk.choices and chunk.choices[0].delta.content: token = chunk.choices[0].delta.content response += token yield response except InferenceTimeoutError: logging.error("API request timed out") yield "Error: Request timed out. Please try again." except Exception as e: logging.error(f"API error: {str(e)}") yield f"Error: {str(e)}. Please check your input and try again." # Custom CSS for better appearance custom_css = """ #chatbot { min-height: 400px; } .dark #chatbot { font-size: 14px !important; } footer { visibility: hidden; } """ # Configure interface with gr.Blocks(css=custom_css, title="Gemma Chatbot") as demo: gr.Markdown("# 🚀 Gemma Fine-Tuned Chatbot") gr.Markdown("Chat with the fine-tuned Gemma AI assistant!") with gr.Accordion("⚙️ Advanced Settings", open=False): system_input = gr.Textbox( value="You are a helpful AI assistant.", label="System Role", info="Initial instructions for the AI" ) max_tokens = gr.Slider( minimum=32, maximum=2048, value=512, step=32, label="Max Response Length" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Creativity (Temperature)" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Focus (Top-p)" ) chat_interface = gr.ChatInterface( respond, additional_inputs=[ system_input, max_tokens, temperature, top_p ], examples=[ ["Explain quantum computing in simple terms"], ["What's the weather like in Paris?"], ["Write a poem about artificial intelligence"] ], retry_btn=None, undo_btn=None, clear_btn="✨ New Chat", ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)