ZoroaStrella commited on
Commit
e0e0cdd
·
1 Parent(s): 96ecc26

Update code token

Browse files
Files changed (1) hide show
  1. app.py +88 -23
app.py CHANGED
@@ -1,36 +1,101 @@
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import os
4
 
 
5
  client = InferenceClient(model="RekaAI/reka-flash-3", token=os.getenv("HF_TOKEN"))
6
 
7
- def generate_response(message, chat_history, system_prompt="You are a helpful assistant.",
8
- max_length=512, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
9
- full_prompt = f"{system_prompt}\n\n"
10
- for turn in chat_history:
11
- full_prompt += f"{turn['role'].capitalize()}: {turn['content']}\n"
12
- full_prompt += f"Human: {message}\nAssistant:"
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  response = client.text_generation(
15
- full_prompt,
16
- max_new_tokens=max_length,
17
  temperature=temperature,
18
  top_p=top_p,
19
  top_k=top_k,
20
- repetition_penalty=repetition_penalty,
21
- stop_sequences=["\nHuman:", "\nAssistant:"]
 
22
  )
23
-
24
- generated_text = response.strip()
25
- chat_history.append({"role": "user", "content": message})
26
- chat_history.append({"role": "assistant", "content": generated_text})
27
- return "", chat_history
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  with gr.Blocks() as demo:
30
- chatbot = gr.Chatbot(type="messages")
31
- msg = gr.Textbox()
32
- clear = gr.Button("Clear")
33
- msg.submit(generate_response, [msg, chatbot], [msg, chatbot])
34
- clear.click(lambda: None, None, chatbot, queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- demo.launch()
 
 
 
1
+ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
 
4
 
5
+ # Initialize the Inference Client
6
  client = InferenceClient(model="RekaAI/reka-flash-3", token=os.getenv("HF_TOKEN"))
7
 
8
+ # Helper function to format the conversation history into a prompt
9
+ def format_history(history):
10
+ prompt = "You are a helpful and harmless assistant.\n\n"
11
+ for item in history:
12
+ if item["role"] == "user":
13
+ prompt += f"Human: {item['content']}\n"
14
+ elif item["role"] == "assistant":
15
+ prompt += f"Assistant: {item['content']}\n"
16
+ prompt += "Assistant:"
17
+ return prompt
18
+
19
+ # Function to handle message submission and response generation
20
+ def submit(message, history, temperature, max_new_tokens, top_p, top_k):
21
+ # Add user's message to history
22
+ history = history + [{"role": "user", "content": message}]
23
+ # Add a "Thinking..." message to simulate the model's reasoning phase
24
+ thinking_message = {"role": "assistant", "content": "Thinking..."}
25
+ history = history + [thinking_message]
26
+ yield history, history # Update chatbot and state
27
+
28
+ # Format the prompt excluding the "Thinking..." message
29
+ prompt = format_history(history[:-1])
30
+ # Stream the response from the Inference API
31
  response = client.text_generation(
32
+ prompt,
33
+ max_new_tokens=max_new_tokens,
34
  temperature=temperature,
35
  top_p=top_p,
36
  top_k=top_k,
37
+ repetition_penalty=1.0,
38
+ stop_sequences=["\nHuman:", "\nAssistant:"],
39
+ stream=True
40
  )
 
 
 
 
 
41
 
42
+ # Simulate "thinking" phase with the first 5 chunks
43
+ thought_chunks = 0
44
+ max_thought_chunks = 5
45
+ accumulated_thought = ""
46
+ for chunk in response:
47
+ if thought_chunks < max_thought_chunks:
48
+ accumulated_thought += chunk
49
+ thinking_message["content"] = "Thinking: " + accumulated_thought
50
+ thought_chunks += 1
51
+ if thought_chunks == max_thought_chunks:
52
+ # Finalize the "Thought" message and start the "Answer" message
53
+ thinking_message["content"] = "Thought: " + accumulated_thought
54
+ answer_message = {"role": "assistant", "content": "Answer:"}
55
+ history = history + [answer_message]
56
+ else:
57
+ # Append subsequent chunks to the "Answer" message
58
+ answer_message["content"] += chunk
59
+ yield history, history # Update UI with each chunk
60
+
61
+ # Finalize the response
62
+ if 'answer_message' in locals():
63
+ answer_message["content"] += "\n\n[End of response]"
64
+ else:
65
+ thinking_message["content"] += "\n\n[No response generated]"
66
+ yield history, history
67
+
68
+ # Build the Gradio interface
69
  with gr.Blocks() as demo:
70
+ # State to store the conversation history
71
+ history_state = gr.State([])
72
+ # Chatbot component to display messages
73
+ chatbot = gr.Chatbot(type="messages", height=400, label="Conversation")
74
+
75
+ # Layout with settings and input area
76
+ with gr.Row():
77
+ with gr.Column(scale=1):
78
+ # Advanced settings in a collapsible panel
79
+ with gr.Accordion("Advanced Settings", open=False):
80
+ temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7)
81
+ max_tokens = gr.Slider(label="Max Tokens", minimum=1, maximum=1024, step=1, value=512)
82
+ top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.9)
83
+ top_k = gr.Slider(label="Top K", minimum=1, maximum=100, step=1, value=50)
84
+ with gr.Column(scale=4):
85
+ # Textbox for user input and buttons
86
+ textbox = gr.Textbox(label="Your message")
87
+ submit_btn = gr.Button("Submit")
88
+ clear_btn = gr.Button("Clear")
89
+
90
+ # Connect the submit button to the submit function
91
+ submit_btn.click(
92
+ submit,
93
+ inputs=[textbox, history_state, temperature, max_tokens, top_p, top_k],
94
+ outputs=[chatbot, history_state]
95
+ )
96
+ # Clear button resets the conversation
97
+ clear_btn.click(lambda: ([], []), outputs=[chatbot, history_state])
98
 
99
+ # Launch the application
100
+ if __name__ == "__main__":
101
+ demo.queue().launch()