Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Configuration | |
MODEL_NAME = "RekaAI/reka-flash-3" | |
DEFAULT_MAX_LENGTH = 1024 | |
DEFAULT_TEMPERATURE = 0.7 | |
# System prompt | |
SYSTEM_PROMPT = """You are Reka Flash-3, a helpful AI assistant created by Reka AI. | |
Provide detailed, helpful answers while maintaining safety. | |
Format responses clearly using markdown when appropriate.""" | |
def generate_response( | |
message, | |
chat_history, | |
system_prompt, | |
max_length, | |
temperature, | |
top_p, | |
top_k, | |
repetition_penalty, | |
presence_penalty, | |
frequency_penalty, | |
show_reasoning | |
): | |
# Format the prompt | |
formatted_prompt = f"System: {system_prompt}\n\nUser: {message}\n\nAssistant:" | |
# Create client | |
client = InferenceClient() | |
# Generate response | |
response = client.text_generation( | |
MODEL_NAME, | |
prompt=formatted_prompt, | |
max_new_tokens=max_length, | |
temperature=temperature, | |
top_p=top_p, | |
top_k=top_k, | |
repetition_penalty=repetition_penalty, | |
presence_penalty=presence_penalty, | |
frequency_penalty=frequency_penalty, | |
details=show_reasoning, | |
) | |
# Extract reasoning and final answer if available | |
reasoning = "" | |
final_answer = response | |
if show_reasoning and hasattr(response, 'details'): | |
reasoning = response.details.get('reasoning', '') | |
final_answer = response.generated_text | |
# Update chat history | |
chat_history.append((message, final_answer)) | |
# Create full history with reasoning | |
full_history = list(chat_history) | |
if show_reasoning and reasoning: | |
full_history[-1] = (full_history[-1][0], f"{final_answer}\n\nREASONING:\n{reasoning}") | |
return "", chat_history, reasoning if show_reasoning else "" | |
# UI Components | |
with gr.Blocks(title="Reka Flash-3 Chat Demo", theme=gr.themes.Soft()) as demo: | |
# Header Section | |
gr.Markdown(f""" | |
# Reka Flash-3 Chat Interface | |
*Powered by [Reka Core AI](https://www.reka.ai/)* | |
""") | |
# Deployment Notice | |
with gr.Accordion("Important Deployment Notice", open=True): | |
gr.Markdown(f""" | |
**To deploy this model on Hugging Face Spaces:** | |
1. Request access to Reka Flash-3 from [Hugging Face Hub](https://huggingface.co/{MODEL_NAME}) | |
2. Ensure you have Hugging Face PRO subscription | |
3. Add your HF token in Space settings | |
4. Set `GPU_SMALL` or higher in Space hardware settings | |
""") | |
# Chat Interface | |
with gr.Row(): | |
chatbot = gr.Chatbot(height=500) | |
reasoning_display = gr.Textbox( | |
label="Model Reasoning", | |
interactive=False, | |
visible=True, | |
lines=20, | |
max_lines=20 | |
) | |
# Input Section | |
with gr.Row(): | |
message = gr.Textbox( | |
label="Your Message", | |
placeholder="Type your message here...", | |
lines=3, | |
max_lines=6 | |
) | |
submit_btn = gr.Button("Send", variant="primary") | |
# Parameters | |
with gr.Accordion("Normal Options", open=False): | |
with gr.Row(): | |
max_length = gr.Slider(128, 4096, value=DEFAULT_MAX_LENGTH, label="Max Length") | |
temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMPERATURE, label="Temperature") | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Row(): | |
top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p") | |
top_k = gr.Slider(1, 100, value=50, label="Top-k") | |
repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition Penalty") | |
with gr.Row(): | |
presence_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Presence Penalty") | |
frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Frequency Penalty") | |
# System Prompt | |
system_prompt = gr.Textbox( | |
label="System Prompt", | |
value=SYSTEM_PROMPT, | |
lines=3 | |
) | |
# Debug Options | |
show_reasoning = gr.Checkbox( | |
label="Show Model Reasoning", | |
value=True | |
) | |
# Event Handling | |
submit_btn.click( | |
generate_response, | |
inputs=[ | |
message, | |
chatbot, | |
system_prompt, | |
max_length, | |
temperature, | |
top_p, | |
top_k, | |
repetition_penalty, | |
presence_penalty, | |
frequency_penalty, | |
show_reasoning | |
], | |
outputs=[message, chatbot, reasoning_display] | |
) | |
message.submit( | |
generate_response, | |
inputs=[ | |
message, | |
chatbot, | |
system_prompt, | |
max_length, | |
temperature, | |
top_p, | |
top_k, | |
repetition_penalty, | |
presence_penalty, | |
frequency_penalty, | |
show_reasoning | |
], | |
outputs=[message, chatbot, reasoning_display] | |
) | |
# Deployment instructions | |
demo.launch(debug=True) |