Akjava commited on
Commit
e1df603
·
1 Parent(s): 9224f10
Files changed (2) hide show
  1. app.py +20 -11
  2. scroll.js +19 -0
app.py CHANGED
@@ -6,14 +6,18 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
6
 
7
 
8
  def generate_text(messages):
 
 
9
  generated = ""
10
  for token in client.chat_completion(messages, max_tokens=50,stream=True):
11
  content = (token.choices[0].delta.content)
12
- generated+=content
13
- #yield generated
 
 
14
  #print(token.choices[0].delta)
15
 
16
- return generated+"." #no stram version
17
 
18
  def call_generate_text(message, history):
19
  #if len(message) == 0:
@@ -25,14 +29,19 @@ def call_generate_text(message, history):
25
  user_message = [{"role":"user","content":message}]
26
  messages = history + user_message
27
  try:
28
- text = generate_text(messages)
29
- assistant_message=[{"role":"assistant","content":text}]
30
- messages += assistant_message
31
- return "",messages
 
 
 
 
 
 
32
  except RuntimeError as e:
33
  print(f"An unexpected error occurred: {e}")
34
-
35
- return "",history
36
 
37
  head = '''
38
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
@@ -131,8 +140,8 @@ window.reset_tts_text = reset_tts_text
131
  </script>
132
  '''
133
 
134
- with gr.Blocks(title="LLM with TTS",head=head) as demo:
135
- gr.Markdown("## LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
136
  gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
137
  gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")
138
 
 
6
 
7
 
8
  def generate_text(messages):
9
+ print("generate_text")
10
+ print(messages)
11
  generated = ""
12
  for token in client.chat_completion(messages, max_tokens=50,stream=True):
13
  content = (token.choices[0].delta.content)
14
+ generated += content
15
+ #print(content)
16
+ #print(''.join(list(content)))
17
+ yield generated
18
  #print(token.choices[0].delta)
19
 
20
+ #return generated+"." #no stram version
21
 
22
  def call_generate_text(message, history):
23
  #if len(message) == 0:
 
29
  user_message = [{"role":"user","content":message}]
30
  messages = history + user_message
31
  try:
32
+
33
+ assistant_message={"role":"assistant","content":""}
34
+ text_generator = generate_text(messages)
35
+
36
+ for text_chunk in text_generator:
37
+ print(f"chunk={text_chunk}")
38
+ assistant_message["content"] = text_chunk
39
+ updated_history = messages + [assistant_message]
40
+ yield "", updated_history
41
+
42
  except RuntimeError as e:
43
  print(f"An unexpected error occurred: {e}")
44
+ yield "",history
 
45
 
46
  head = '''
47
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
 
140
  </script>
141
  '''
142
 
143
+ with gr.Blocks(title="LLM with TTS",head=head,js="scroll.js") as demo:
144
+ gr.Markdown("## A LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
145
  gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
146
  gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")
147
 
scroll.js ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // see https://github.com/gradio-app/gradio/issues/8253
2
+ // UNKNOWN LICENSE
3
+
4
+ function Scrolldown() {
5
+ let targetNode = document.querySelector('[aria-label="chatbot conversation"]')
6
+ // Options for the observer (which mutations to observe)
7
+ const config = { attributes: true, childList: true, subtree: true };
8
+
9
+ // Callback function to execute when mutations are observed
10
+ const callback = (mutationList, observer) => {
11
+ targetNode.scrollTop = targetNode.scrollHeight;
12
+ };
13
+
14
+ // Create an observer instance linked to the callback function
15
+ const observer = new MutationObserver(callback);
16
+
17
+ // Start observing the target node for configured mutations
18
+ observer.observe(targetNode, config);
19
+ }