Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import InferenceClient, InferenceTimeoutError | |
import logging | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
# Initialize client with timeout | |
client = InferenceClient("hackergeek/gemma-finetuned", timeout=30) | |
def respond( | |
message: str, | |
history: list[tuple[str, str]], | |
system_message: str, | |
max_tokens: int, | |
temperature: float, | |
top_p: float, | |
): | |
""" | |
Handles chat responses with error handling and validation | |
""" | |
try: | |
# Validate system message | |
if not system_message.strip(): | |
system_message = "You are a helpful AI assistant." | |
# Build message history | |
messages = [{"role": "system", "content": system_message}] | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
# Stream the response | |
for chunk in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
if chunk.choices and chunk.choices[0].delta.content: | |
token = chunk.choices[0].delta.content | |
response += token | |
yield response | |
except InferenceTimeoutError: | |
logging.error("API request timed out") | |
yield "Error: Request timed out. Please try again." | |
except Exception as e: | |
logging.error(f"API error: {str(e)}") | |
yield f"Error: {str(e)}. Please check your input and try again." | |
# Custom CSS for better appearance | |
custom_css = """ | |
#chatbot { min-height: 400px; } | |
.dark #chatbot { font-size: 14px !important; } | |
footer { visibility: hidden; } | |
""" | |
# Configure interface | |
with gr.Blocks(css=custom_css, title="Gemma Chatbot") as demo: | |
gr.Markdown("# π Gemma Fine-Tuned Chatbot") | |
gr.Markdown("Chat with the fine-tuned Gemma AI assistant!") | |
with gr.Accordion("βοΈ Advanced Settings", open=False): | |
system_input = gr.Textbox( | |
value="You are a helpful AI assistant.", | |
label="System Role", | |
info="Initial instructions for the AI" | |
) | |
max_tokens = gr.Slider( | |
minimum=32, maximum=2048, value=512, | |
step=32, label="Max Response Length" | |
) | |
temperature = gr.Slider( | |
minimum=0.1, maximum=2.0, value=0.7, | |
step=0.1, label="Creativity (Temperature)" | |
) | |
top_p = gr.Slider( | |
minimum=0.1, maximum=1.0, value=0.95, | |
step=0.05, label="Focus (Top-p)" | |
) | |
chat_interface = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
system_input, | |
max_tokens, | |
temperature, | |
top_p | |
], | |
examples=[ | |
["Explain quantum computing in simple terms"], | |
["What's the weather like in Paris?"], | |
["Write a poem about artificial intelligence"] | |
], | |
retry_btn=None, | |
undo_btn=None, | |
clear_btn="β¨ New Chat", | |
) | |
if __name__ == "__main__": | |
demo.launch(server_name="0.0.0.0", server_port=7860) |