Spaces:

ZoroaStrella
/

RekaFlash

Sleeping

File size: 4,973 Bytes

b2c474d
 
 
ce9b3a4
 
 
 
b2c474d
ce9b3a4
 
 
 
b2c474d
ce9b3a4
b2c474d
ce9b3a4
 
 
b2c474d
 
ce9b3a4
 
 
 
 
b2c474d
ce9b3a4
 
b2c474d
ce9b3a4
 
b2c474d
ce9b3a4
 
 
 
 
b2c474d
 
ce9b3a4

import gradio as gr
from huggingface_hub import InferenceClient

# Configuration
MODEL_NAME = "RekaAI/reka-flash-3"
DEFAULT_MAX_LENGTH = 1024
DEFAULT_TEMPERATURE = 0.7

# System prompt
SYSTEM_PROMPT = """You are Reka Flash-3, a helpful AI assistant created by Reka AI. 
Provide detailed, helpful answers while maintaining safety. 
Format responses clearly using markdown when appropriate."""

def generate_response(
    message,
    chat_history,
    system_prompt,
    max_length,
    temperature,
    top_p,
    top_k,
    repetition_penalty,
    presence_penalty,
    frequency_penalty,
    show_reasoning
):
    # Format the prompt
    formatted_prompt = f"System: {system_prompt}\n\nUser: {message}\n\nAssistant:"

    # Create client
    client = InferenceClient()

    # Generate response
    response = client.text_generation(
        MODEL_NAME,
        prompt=formatted_prompt,
        max_new_tokens=max_length,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        presence_penalty=presence_penalty,
        frequency_penalty=frequency_penalty,
        details=show_reasoning,
    )

    # Extract reasoning and final answer if available
    reasoning = ""
    final_answer = response
    if show_reasoning and hasattr(response, 'details'):
        reasoning = response.details.get('reasoning', '')
        final_answer = response.generated_text

    # Update chat history
    chat_history.append((message, final_answer))
    
    # Create full history with reasoning
    full_history = list(chat_history)
    if show_reasoning and reasoning:
        full_history[-1] = (full_history[-1][0], f"{final_answer}\n\nREASONING:\n{reasoning}")

    return "", chat_history, reasoning if show_reasoning else ""

# UI Components
with gr.Blocks(title="Reka Flash-3 Chat Demo", theme=gr.themes.Soft()) as demo:
    # Header Section
    gr.Markdown(f"""
    # Reka Flash-3 Chat Interface
    *Powered by [Reka Core AI](https://www.reka.ai/)*
    """)

    # Deployment Notice
    with gr.Accordion("Important Deployment Notice", open=True):
        gr.Markdown(f"""
        **To deploy this model on Hugging Face Spaces:**
        1. Request access to Reka Flash-3 from [Hugging Face Hub](https://huggingface.co/{MODEL_NAME})
        2. Ensure you have Hugging Face PRO subscription
        3. Add your HF token in Space settings
        4. Set `GPU_SMALL` or higher in Space hardware settings
        """)

    # Chat Interface
    with gr.Row():
        chatbot = gr.Chatbot(height=500)
        reasoning_display = gr.Textbox(
            label="Model Reasoning",
            interactive=False,
            visible=True,
            lines=20,
            max_lines=20
        )

    # Input Section
    with gr.Row():
        message = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here...",
            lines=3,
            max_lines=6
        )
        submit_btn = gr.Button("Send", variant="primary")

    # Parameters
    with gr.Accordion("Normal Options", open=False):
        with gr.Row():
            max_length = gr.Slider(128, 4096, value=DEFAULT_MAX_LENGTH, label="Max Length")
            temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMPERATURE, label="Temperature")

    with gr.Accordion("Advanced Options", open=False):
        with gr.Row():
            top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
            top_k = gr.Slider(1, 100, value=50, label="Top-k")
            repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition Penalty")
        with gr.Row():
            presence_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Presence Penalty")
            frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Frequency Penalty")
    
    # System Prompt
    system_prompt = gr.Textbox(
        label="System Prompt",
        value=SYSTEM_PROMPT,
        lines=3
    )
    
    # Debug Options
    show_reasoning = gr.Checkbox(
        label="Show Model Reasoning",
        value=True
    )

    # Event Handling
    submit_btn.click(
        generate_response,
        inputs=[
            message,
            chatbot,
            system_prompt,
            max_length,
            temperature,
            top_p,
            top_k,
            repetition_penalty,
            presence_penalty,
            frequency_penalty,
            show_reasoning
        ],
        outputs=[message, chatbot, reasoning_display]
    )
    
    message.submit(
        generate_response,
        inputs=[
            message,
            chatbot,
            system_prompt,
            max_length,
            temperature,
            top_p,
            top_k,
            repetition_penalty,
            presence_penalty,
            frequency_penalty,
            show_reasoning
        ],
        outputs=[message, chatbot, reasoning_display]
    )

# Deployment instructions
demo.launch(debug=True)