File size: 4,973 Bytes
b2c474d
 
 
ce9b3a4
 
 
 
b2c474d
ce9b3a4
 
 
 
b2c474d
ce9b3a4
b2c474d
ce9b3a4
 
 
b2c474d
 
ce9b3a4
 
 
 
 
b2c474d
ce9b3a4
 
b2c474d
ce9b3a4
 
b2c474d
ce9b3a4
 
 
 
 
b2c474d
 
ce9b3a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import gradio as gr
from huggingface_hub import InferenceClient

# Configuration
MODEL_NAME = "RekaAI/reka-flash-3"
DEFAULT_MAX_LENGTH = 1024
DEFAULT_TEMPERATURE = 0.7

# System prompt
SYSTEM_PROMPT = """You are Reka Flash-3, a helpful AI assistant created by Reka AI. 
Provide detailed, helpful answers while maintaining safety. 
Format responses clearly using markdown when appropriate."""

def generate_response(
    message,
    chat_history,
    system_prompt,
    max_length,
    temperature,
    top_p,
    top_k,
    repetition_penalty,
    presence_penalty,
    frequency_penalty,
    show_reasoning
):
    # Format the prompt
    formatted_prompt = f"System: {system_prompt}\n\nUser: {message}\n\nAssistant:"

    # Create client
    client = InferenceClient()

    # Generate response
    response = client.text_generation(
        MODEL_NAME,
        prompt=formatted_prompt,
        max_new_tokens=max_length,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        presence_penalty=presence_penalty,
        frequency_penalty=frequency_penalty,
        details=show_reasoning,
    )

    # Extract reasoning and final answer if available
    reasoning = ""
    final_answer = response
    if show_reasoning and hasattr(response, 'details'):
        reasoning = response.details.get('reasoning', '')
        final_answer = response.generated_text

    # Update chat history
    chat_history.append((message, final_answer))
    
    # Create full history with reasoning
    full_history = list(chat_history)
    if show_reasoning and reasoning:
        full_history[-1] = (full_history[-1][0], f"{final_answer}\n\nREASONING:\n{reasoning}")

    return "", chat_history, reasoning if show_reasoning else ""

# UI Components
with gr.Blocks(title="Reka Flash-3 Chat Demo", theme=gr.themes.Soft()) as demo:
    # Header Section
    gr.Markdown(f"""
    # Reka Flash-3 Chat Interface
    *Powered by [Reka Core AI](https://www.reka.ai/)*
    """)

    # Deployment Notice
    with gr.Accordion("Important Deployment Notice", open=True):
        gr.Markdown(f"""
        **To deploy this model on Hugging Face Spaces:**
        1. Request access to Reka Flash-3 from [Hugging Face Hub](https://huggingface.co/{MODEL_NAME})
        2. Ensure you have Hugging Face PRO subscription
        3. Add your HF token in Space settings
        4. Set `GPU_SMALL` or higher in Space hardware settings
        """)

    # Chat Interface
    with gr.Row():
        chatbot = gr.Chatbot(height=500)
        reasoning_display = gr.Textbox(
            label="Model Reasoning",
            interactive=False,
            visible=True,
            lines=20,
            max_lines=20
        )

    # Input Section
    with gr.Row():
        message = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here...",
            lines=3,
            max_lines=6
        )
        submit_btn = gr.Button("Send", variant="primary")

    # Parameters
    with gr.Accordion("Normal Options", open=False):
        with gr.Row():
            max_length = gr.Slider(128, 4096, value=DEFAULT_MAX_LENGTH, label="Max Length")
            temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMPERATURE, label="Temperature")

    with gr.Accordion("Advanced Options", open=False):
        with gr.Row():
            top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
            top_k = gr.Slider(1, 100, value=50, label="Top-k")
            repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition Penalty")
        with gr.Row():
            presence_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Presence Penalty")
            frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Frequency Penalty")
    
    # System Prompt
    system_prompt = gr.Textbox(
        label="System Prompt",
        value=SYSTEM_PROMPT,
        lines=3
    )
    
    # Debug Options
    show_reasoning = gr.Checkbox(
        label="Show Model Reasoning",
        value=True
    )

    # Event Handling
    submit_btn.click(
        generate_response,
        inputs=[
            message,
            chatbot,
            system_prompt,
            max_length,
            temperature,
            top_p,
            top_k,
            repetition_penalty,
            presence_penalty,
            frequency_penalty,
            show_reasoning
        ],
        outputs=[message, chatbot, reasoning_display]
    )
    
    message.submit(
        generate_response,
        inputs=[
            message,
            chatbot,
            system_prompt,
            max_length,
            temperature,
            top_p,
            top_k,
            repetition_penalty,
            presence_penalty,
            frequency_penalty,
            show_reasoning
        ],
        outputs=[message, chatbot, reasoning_display]
    )

# Deployment instructions
demo.launch(debug=True)