Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,973 Bytes
b2c474d ce9b3a4 b2c474d ce9b3a4 b2c474d ce9b3a4 b2c474d ce9b3a4 b2c474d ce9b3a4 b2c474d ce9b3a4 b2c474d ce9b3a4 b2c474d ce9b3a4 b2c474d ce9b3a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import gradio as gr
from huggingface_hub import InferenceClient
# Configuration
MODEL_NAME = "RekaAI/reka-flash-3"
DEFAULT_MAX_LENGTH = 1024
DEFAULT_TEMPERATURE = 0.7
# System prompt
SYSTEM_PROMPT = """You are Reka Flash-3, a helpful AI assistant created by Reka AI.
Provide detailed, helpful answers while maintaining safety.
Format responses clearly using markdown when appropriate."""
def generate_response(
message,
chat_history,
system_prompt,
max_length,
temperature,
top_p,
top_k,
repetition_penalty,
presence_penalty,
frequency_penalty,
show_reasoning
):
# Format the prompt
formatted_prompt = f"System: {system_prompt}\n\nUser: {message}\n\nAssistant:"
# Create client
client = InferenceClient()
# Generate response
response = client.text_generation(
MODEL_NAME,
prompt=formatted_prompt,
max_new_tokens=max_length,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
presence_penalty=presence_penalty,
frequency_penalty=frequency_penalty,
details=show_reasoning,
)
# Extract reasoning and final answer if available
reasoning = ""
final_answer = response
if show_reasoning and hasattr(response, 'details'):
reasoning = response.details.get('reasoning', '')
final_answer = response.generated_text
# Update chat history
chat_history.append((message, final_answer))
# Create full history with reasoning
full_history = list(chat_history)
if show_reasoning and reasoning:
full_history[-1] = (full_history[-1][0], f"{final_answer}\n\nREASONING:\n{reasoning}")
return "", chat_history, reasoning if show_reasoning else ""
# UI Components
with gr.Blocks(title="Reka Flash-3 Chat Demo", theme=gr.themes.Soft()) as demo:
# Header Section
gr.Markdown(f"""
# Reka Flash-3 Chat Interface
*Powered by [Reka Core AI](https://www.reka.ai/)*
""")
# Deployment Notice
with gr.Accordion("Important Deployment Notice", open=True):
gr.Markdown(f"""
**To deploy this model on Hugging Face Spaces:**
1. Request access to Reka Flash-3 from [Hugging Face Hub](https://huggingface.co/{MODEL_NAME})
2. Ensure you have Hugging Face PRO subscription
3. Add your HF token in Space settings
4. Set `GPU_SMALL` or higher in Space hardware settings
""")
# Chat Interface
with gr.Row():
chatbot = gr.Chatbot(height=500)
reasoning_display = gr.Textbox(
label="Model Reasoning",
interactive=False,
visible=True,
lines=20,
max_lines=20
)
# Input Section
with gr.Row():
message = gr.Textbox(
label="Your Message",
placeholder="Type your message here...",
lines=3,
max_lines=6
)
submit_btn = gr.Button("Send", variant="primary")
# Parameters
with gr.Accordion("Normal Options", open=False):
with gr.Row():
max_length = gr.Slider(128, 4096, value=DEFAULT_MAX_LENGTH, label="Max Length")
temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMPERATURE, label="Temperature")
with gr.Accordion("Advanced Options", open=False):
with gr.Row():
top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
top_k = gr.Slider(1, 100, value=50, label="Top-k")
repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition Penalty")
with gr.Row():
presence_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Presence Penalty")
frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Frequency Penalty")
# System Prompt
system_prompt = gr.Textbox(
label="System Prompt",
value=SYSTEM_PROMPT,
lines=3
)
# Debug Options
show_reasoning = gr.Checkbox(
label="Show Model Reasoning",
value=True
)
# Event Handling
submit_btn.click(
generate_response,
inputs=[
message,
chatbot,
system_prompt,
max_length,
temperature,
top_p,
top_k,
repetition_penalty,
presence_penalty,
frequency_penalty,
show_reasoning
],
outputs=[message, chatbot, reasoning_display]
)
message.submit(
generate_response,
inputs=[
message,
chatbot,
system_prompt,
max_length,
temperature,
top_p,
top_k,
repetition_penalty,
presence_penalty,
frequency_penalty,
show_reasoning
],
outputs=[message, chatbot, reasoning_display]
)
# Deployment instructions
demo.launch(debug=True) |