Spaces:

ZoroaStrella
/

RekaFlash

Sleeping

App Files Files Community

RekaFlash / app.py

ZoroaStrella

Update to use reka

ce9b3a4 4 months ago

raw

history blame

4.97 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# Configuration
	MODEL_NAME = "RekaAI/reka-flash-3"
	DEFAULT_MAX_LENGTH = 1024
	DEFAULT_TEMPERATURE = 0.7

	# System prompt
	SYSTEM_PROMPT = """You are Reka Flash-3, a helpful AI assistant created by Reka AI.
	Provide detailed, helpful answers while maintaining safety.
	Format responses clearly using markdown when appropriate."""

	def generate_response(
	message,
	chat_history,
	system_prompt,
	max_length,
	temperature,
	top_p,
	top_k,
	repetition_penalty,
	presence_penalty,
	frequency_penalty,
	show_reasoning
	):
	# Format the prompt
	formatted_prompt = f"System: {system_prompt}\n\nUser: {message}\n\nAssistant:"

	# Create client
	client = InferenceClient()

	# Generate response
	response = client.text_generation(
	MODEL_NAME,
	prompt=formatted_prompt,
	max_new_tokens=max_length,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repetition_penalty=repetition_penalty,
	presence_penalty=presence_penalty,
	frequency_penalty=frequency_penalty,
	details=show_reasoning,
	)

	# Extract reasoning and final answer if available
	reasoning = ""
	final_answer = response
	if show_reasoning and hasattr(response, 'details'):
	reasoning = response.details.get('reasoning', '')
	final_answer = response.generated_text

	# Update chat history
	chat_history.append((message, final_answer))

	# Create full history with reasoning
	full_history = list(chat_history)
	if show_reasoning and reasoning:
	full_history[-1] = (full_history[-1][0], f"{final_answer}\n\nREASONING:\n{reasoning}")

	return "", chat_history, reasoning if show_reasoning else ""

	# UI Components
	with gr.Blocks(title="Reka Flash-3 Chat Demo", theme=gr.themes.Soft()) as demo:
	# Header Section
	gr.Markdown(f"""
	# Reka Flash-3 Chat Interface
	Powered by [Reka Core AI](https://www.reka.ai/)
	""")

	# Deployment Notice
	with gr.Accordion("Important Deployment Notice", open=True):
	gr.Markdown(f"""
	To deploy this model on Hugging Face Spaces:
	1. Request access to Reka Flash-3 from [Hugging Face Hub](https://huggingface.co/{MODEL_NAME})
	2. Ensure you have Hugging Face PRO subscription
	3. Add your HF token in Space settings
	4. Set `GPU_SMALL` or higher in Space hardware settings
	""")

	# Chat Interface
	with gr.Row():
	chatbot = gr.Chatbot(height=500)
	reasoning_display = gr.Textbox(
	label="Model Reasoning",
	interactive=False,
	visible=True,
	lines=20,
	max_lines=20
	)

	# Input Section
	with gr.Row():
	message = gr.Textbox(
	label="Your Message",
	placeholder="Type your message here...",
	lines=3,
	max_lines=6
	)
	submit_btn = gr.Button("Send", variant="primary")

	# Parameters
	with gr.Accordion("Normal Options", open=False):
	with gr.Row():
	max_length = gr.Slider(128, 4096, value=DEFAULT_MAX_LENGTH, label="Max Length")
	temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMPERATURE, label="Temperature")

	with gr.Accordion("Advanced Options", open=False):
	with gr.Row():
	top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
	top_k = gr.Slider(1, 100, value=50, label="Top-k")
	repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition Penalty")
	with gr.Row():
	presence_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Presence Penalty")
	frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Frequency Penalty")

	# System Prompt
	system_prompt = gr.Textbox(
	label="System Prompt",
	value=SYSTEM_PROMPT,
	lines=3
	)

	# Debug Options
	show_reasoning = gr.Checkbox(
	label="Show Model Reasoning",
	value=True
	)

	# Event Handling
	submit_btn.click(
	generate_response,
	inputs=[
	message,
	chatbot,
	system_prompt,
	max_length,
	temperature,
	top_p,
	top_k,
	repetition_penalty,
	presence_penalty,
	frequency_penalty,
	show_reasoning
	],
	outputs=[message, chatbot, reasoning_display]
	)

	message.submit(
	generate_response,
	inputs=[
	message,
	chatbot,
	system_prompt,
	max_length,
	temperature,
	top_p,
	top_k,
	repetition_penalty,
	presence_penalty,
	frequency_penalty,
	show_reasoning
	],
	outputs=[message, chatbot, reasoning_display]
	)

	# Deployment instructions
	demo.launch(debug=True)