stream
Browse files
app.py
CHANGED
@@ -6,14 +6,18 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
|
6 |
|
7 |
|
8 |
def generate_text(messages):
|
|
|
|
|
9 |
generated = ""
|
10 |
for token in client.chat_completion(messages, max_tokens=50,stream=True):
|
11 |
content = (token.choices[0].delta.content)
|
12 |
-
generated+=content
|
13 |
-
#
|
|
|
|
|
14 |
#print(token.choices[0].delta)
|
15 |
|
16 |
-
return generated+"." #no stram version
|
17 |
|
18 |
def call_generate_text(message, history):
|
19 |
#if len(message) == 0:
|
@@ -25,14 +29,19 @@ def call_generate_text(message, history):
|
|
25 |
user_message = [{"role":"user","content":message}]
|
26 |
messages = history + user_message
|
27 |
try:
|
28 |
-
|
29 |
-
assistant_message=
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
except RuntimeError as e:
|
33 |
print(f"An unexpected error occurred: {e}")
|
34 |
-
|
35 |
-
return "",history
|
36 |
|
37 |
head = '''
|
38 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
@@ -131,8 +140,8 @@ window.reset_tts_text = reset_tts_text
|
|
131 |
</script>
|
132 |
'''
|
133 |
|
134 |
-
with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
135 |
-
gr.Markdown("## LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
|
136 |
gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
|
137 |
gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")
|
138 |
|
|
|
6 |
|
7 |
|
8 |
def generate_text(messages):
|
9 |
+
print("generate_text")
|
10 |
+
print(messages)
|
11 |
generated = ""
|
12 |
for token in client.chat_completion(messages, max_tokens=50,stream=True):
|
13 |
content = (token.choices[0].delta.content)
|
14 |
+
generated += content
|
15 |
+
#print(content)
|
16 |
+
#print(''.join(list(content)))
|
17 |
+
yield generated
|
18 |
#print(token.choices[0].delta)
|
19 |
|
20 |
+
#return generated+"." #no stram version
|
21 |
|
22 |
def call_generate_text(message, history):
|
23 |
#if len(message) == 0:
|
|
|
29 |
user_message = [{"role":"user","content":message}]
|
30 |
messages = history + user_message
|
31 |
try:
|
32 |
+
|
33 |
+
assistant_message={"role":"assistant","content":""}
|
34 |
+
text_generator = generate_text(messages)
|
35 |
+
|
36 |
+
for text_chunk in text_generator:
|
37 |
+
print(f"chunk={text_chunk}")
|
38 |
+
assistant_message["content"] = text_chunk
|
39 |
+
updated_history = messages + [assistant_message]
|
40 |
+
yield "", updated_history
|
41 |
+
|
42 |
except RuntimeError as e:
|
43 |
print(f"An unexpected error occurred: {e}")
|
44 |
+
yield "",history
|
|
|
45 |
|
46 |
head = '''
|
47 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
|
|
140 |
</script>
|
141 |
'''
|
142 |
|
143 |
+
with gr.Blocks(title="LLM with TTS",head=head,js="scroll.js") as demo:
|
144 |
+
gr.Markdown("## A LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
|
145 |
gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
|
146 |
gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")
|
147 |
|
scroll.js
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// see https://github.com/gradio-app/gradio/issues/8253
|
2 |
+
// UNKNOWN LICENSE
|
3 |
+
|
4 |
+
function Scrolldown() {
|
5 |
+
let targetNode = document.querySelector('[aria-label="chatbot conversation"]')
|
6 |
+
// Options for the observer (which mutations to observe)
|
7 |
+
const config = { attributes: true, childList: true, subtree: true };
|
8 |
+
|
9 |
+
// Callback function to execute when mutations are observed
|
10 |
+
const callback = (mutationList, observer) => {
|
11 |
+
targetNode.scrollTop = targetNode.scrollHeight;
|
12 |
+
};
|
13 |
+
|
14 |
+
// Create an observer instance linked to the callback function
|
15 |
+
const observer = new MutationObserver(callback);
|
16 |
+
|
17 |
+
// Start observing the target node for configured mutations
|
18 |
+
observer.observe(targetNode, config);
|
19 |
+
}
|