Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import os | |
client = InferenceClient(model="RekaAI/reka-flash-3", token=os.getenv("HF_TOKEN")) | |
def generate_response(message, chat_history, system_prompt="You are a helpful assistant.", | |
max_length=512, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0): | |
full_prompt = f"{system_prompt}\n\n" | |
for turn in chat_history: | |
full_prompt += f"{turn['role'].capitalize()}: {turn['content']}\n" | |
full_prompt += f"Human: {message}\nAssistant:" | |
response = client.text_generation( | |
full_prompt, | |
max_new_tokens=max_length, | |
temperature=temperature, | |
top_p=top_p, | |
top_k=top_k, | |
repetition_penalty=repetition_penalty, | |
stop_sequences=["\nHuman:", "\nAssistant:"] | |
) | |
generated_text = response.strip() | |
chat_history.append({"role": "user", "content": message}) | |
chat_history.append({"role": "assistant", "content": generated_text}) | |
return "", chat_history | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot(type="messages") | |
msg = gr.Textbox() | |
clear = gr.Button("Clear") | |
msg.submit(generate_response, [msg, chatbot], [msg, chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.launch() |