RekaFlash / app.py
ZoroaStrella's picture
Update to use reka
ce9b3a4
raw
history blame
4.97 kB
import gradio as gr
from huggingface_hub import InferenceClient
# Configuration
MODEL_NAME = "RekaAI/reka-flash-3"
DEFAULT_MAX_LENGTH = 1024
DEFAULT_TEMPERATURE = 0.7
# System prompt
SYSTEM_PROMPT = """You are Reka Flash-3, a helpful AI assistant created by Reka AI.
Provide detailed, helpful answers while maintaining safety.
Format responses clearly using markdown when appropriate."""
def generate_response(
message,
chat_history,
system_prompt,
max_length,
temperature,
top_p,
top_k,
repetition_penalty,
presence_penalty,
frequency_penalty,
show_reasoning
):
# Format the prompt
formatted_prompt = f"System: {system_prompt}\n\nUser: {message}\n\nAssistant:"
# Create client
client = InferenceClient()
# Generate response
response = client.text_generation(
MODEL_NAME,
prompt=formatted_prompt,
max_new_tokens=max_length,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
presence_penalty=presence_penalty,
frequency_penalty=frequency_penalty,
details=show_reasoning,
)
# Extract reasoning and final answer if available
reasoning = ""
final_answer = response
if show_reasoning and hasattr(response, 'details'):
reasoning = response.details.get('reasoning', '')
final_answer = response.generated_text
# Update chat history
chat_history.append((message, final_answer))
# Create full history with reasoning
full_history = list(chat_history)
if show_reasoning and reasoning:
full_history[-1] = (full_history[-1][0], f"{final_answer}\n\nREASONING:\n{reasoning}")
return "", chat_history, reasoning if show_reasoning else ""
# UI Components
with gr.Blocks(title="Reka Flash-3 Chat Demo", theme=gr.themes.Soft()) as demo:
# Header Section
gr.Markdown(f"""
# Reka Flash-3 Chat Interface
*Powered by [Reka Core AI](https://www.reka.ai/)*
""")
# Deployment Notice
with gr.Accordion("Important Deployment Notice", open=True):
gr.Markdown(f"""
**To deploy this model on Hugging Face Spaces:**
1. Request access to Reka Flash-3 from [Hugging Face Hub](https://huggingface.co/{MODEL_NAME})
2. Ensure you have Hugging Face PRO subscription
3. Add your HF token in Space settings
4. Set `GPU_SMALL` or higher in Space hardware settings
""")
# Chat Interface
with gr.Row():
chatbot = gr.Chatbot(height=500)
reasoning_display = gr.Textbox(
label="Model Reasoning",
interactive=False,
visible=True,
lines=20,
max_lines=20
)
# Input Section
with gr.Row():
message = gr.Textbox(
label="Your Message",
placeholder="Type your message here...",
lines=3,
max_lines=6
)
submit_btn = gr.Button("Send", variant="primary")
# Parameters
with gr.Accordion("Normal Options", open=False):
with gr.Row():
max_length = gr.Slider(128, 4096, value=DEFAULT_MAX_LENGTH, label="Max Length")
temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMPERATURE, label="Temperature")
with gr.Accordion("Advanced Options", open=False):
with gr.Row():
top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
top_k = gr.Slider(1, 100, value=50, label="Top-k")
repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition Penalty")
with gr.Row():
presence_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Presence Penalty")
frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Frequency Penalty")
# System Prompt
system_prompt = gr.Textbox(
label="System Prompt",
value=SYSTEM_PROMPT,
lines=3
)
# Debug Options
show_reasoning = gr.Checkbox(
label="Show Model Reasoning",
value=True
)
# Event Handling
submit_btn.click(
generate_response,
inputs=[
message,
chatbot,
system_prompt,
max_length,
temperature,
top_p,
top_k,
repetition_penalty,
presence_penalty,
frequency_penalty,
show_reasoning
],
outputs=[message, chatbot, reasoning_display]
)
message.submit(
generate_response,
inputs=[
message,
chatbot,
system_prompt,
max_length,
temperature,
top_p,
top_k,
repetition_penalty,
presence_penalty,
frequency_penalty,
show_reasoning
],
outputs=[message, chatbot, reasoning_display]
)
# Deployment instructions
demo.launch(debug=True)