mistral-7b-v0.3-matcha-tts-en

Building

App Files Files Community

Akjava commited on Sep 22, 2024

Commit

e1df603

1 Parent(s): 9224f10

stream

Browse files

Files changed (2) hide show

app.py +20 -11
scroll.js +19 -0

app.py CHANGED Viewed

@@ -6,14 +6,18 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
 def generate_text(messages):
     generated = ""
     for token in client.chat_completion(messages, max_tokens=50,stream=True):
         content = (token.choices[0].delta.content)
-        generated+=content
-        #yield generated
         #print(token.choices[0].delta)
-    return generated+"." #no stram version
 def call_generate_text(message, history):
     #if len(message) == 0:
@@ -25,14 +29,19 @@ def call_generate_text(message, history):
     user_message = [{"role":"user","content":message}]
     messages = history + user_message
     try:
-        text = generate_text(messages)
-        assistant_message=[{"role":"assistant","content":text}]
-        messages += assistant_message
-        return "",messages
     except RuntimeError  as e:
         print(f"An unexpected error occurred: {e}")
-    return "",history
 head = '''
 <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
@@ -131,8 +140,8 @@ window.reset_tts_text = reset_tts_text
 </script>
 '''
-with gr.Blocks(title="LLM with TTS",head=head) as demo:
-    gr.Markdown("## LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
     gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
     gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")

 def generate_text(messages):
+    print("generate_text")
+    print(messages)
     generated = ""
     for token in client.chat_completion(messages, max_tokens=50,stream=True):
         content = (token.choices[0].delta.content)
+        generated += content
+        #print(content)
+        #print(''.join(list(content)))
+        yield generated
         #print(token.choices[0].delta)
+    #return generated+"." #no stram version
 def call_generate_text(message, history):
     #if len(message) == 0:
     user_message = [{"role":"user","content":message}]
     messages = history + user_message
     try:
+        assistant_message={"role":"assistant","content":""}
+        text_generator = generate_text(messages)
+        for text_chunk in text_generator:
+            print(f"chunk={text_chunk}")
+            assistant_message["content"] = text_chunk
+            updated_history = messages + [assistant_message]
+            yield "", updated_history
     except RuntimeError  as e:
         print(f"An unexpected error occurred: {e}")
+        yield  "",history
 head = '''
 <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
 </script>
 '''
+with gr.Blocks(title="LLM with TTS",head=head,js="scroll.js") as demo:
+    gr.Markdown("## A LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
     gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
     gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")

scroll.js ADDED Viewed

	@@ -0,0 +1,19 @@

+// see https://github.com/gradio-app/gradio/issues/8253
+// UNKNOWN LICENSE
+function Scrolldown() {
+    let targetNode = document.querySelector('[aria-label="chatbot conversation"]')
+    // Options for the observer (which mutations to observe)
+    const config = { attributes: true, childList: true, subtree: true };
+    // Callback function to execute when mutations are observed
+    const callback = (mutationList, observer) => {
+    targetNode.scrollTop = targetNode.scrollHeight;
+    };
+    // Create an observer instance linked to the callback function
+    const observer = new MutationObserver(callback);
+    // Start observing the target node for configured mutations
+    observer.observe(targetNode, config);
+    }