vericudebuget commited on
Commit
ce6faeb
·
verified ·
1 Parent(s): c826481

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -16,6 +16,7 @@ def format_prompt(message, history):
16
  def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
17
  temperature = max(float(temperature), 1e-2)
18
  top_p = float(top_p)
 
19
  generate_kwargs = dict(
20
  temperature=temperature,
21
  max_new_tokens=max_new_tokens,
@@ -31,11 +32,15 @@ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=904
31
  system_prompt = f"server log: ~This message was sent at {formatted_time}. The actual year is 2024.~"
32
 
33
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
34
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
35
- output = ""
36
- for response in stream:
37
- output += response.token.text
38
- yield output
 
 
 
 
39
  return output
40
 
41
  additional_inputs = [
@@ -54,9 +59,9 @@ chat_interface = gr.ChatInterface(
54
  chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
55
  additional_inputs=additional_inputs,
56
  title="ConvoLite",
57
- concurrency_limit=20,
58
  )
59
 
60
  with gr.Blocks(css=f"file={css_path}", theme=gr.themes.Soft()) as demo:
61
  demo.add(chat_interface)
62
- demo.launch(show_api=False)
 
16
  def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
17
  temperature = max(float(temperature), 1e-2)
18
  top_p = float(top_p)
19
+
20
  generate_kwargs = dict(
21
  temperature=temperature,
22
  max_new_tokens=max_new_tokens,
 
32
  system_prompt = f"server log: ~This message was sent at {formatted_time}. The actual year is 2024.~"
33
 
34
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
35
+
36
+ # Use a loading indicator while the model is generating the response
37
+ with gr.Blocks().loading_indicator():
38
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
39
+ output = ""
40
+ for response in stream:
41
+ output += response.token.text
42
+ yield output
43
+
44
  return output
45
 
46
  additional_inputs = [
 
59
  chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
60
  additional_inputs=additional_inputs,
61
  title="ConvoLite",
62
+ concurrency_limit=50, # Increase the concurrency limit
63
  )
64
 
65
  with gr.Blocks(css=f"file={css_path}", theme=gr.themes.Soft()) as demo:
66
  demo.add(chat_interface)
67
+ demo.launch(show_api=False)