import gradio as gr from huggingface_hub import InferenceClient import os client = InferenceClient(model="RekaAI/reka-flash-3", token=os.getenv("HF_TOKEN")) def generate_response(message, chat_history, system_prompt="You are a helpful assistant.", max_length=512, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0): full_prompt = f"{system_prompt}\n\n" for turn in chat_history: full_prompt += f"{turn['role'].capitalize()}: {turn['content']}\n" full_prompt += f"Human: {message}\nAssistant:" response = client.text_generation( full_prompt, max_new_tokens=max_length, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stop_sequences=["\nHuman:", "\nAssistant:"] ) generated_text = response.strip() chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": generated_text}) return "", chat_history with gr.Blocks() as demo: chatbot = gr.Chatbot(type="messages") msg = gr.Textbox() clear = gr.Button("Clear") msg.submit(generate_response, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.launch()