Spaces:

simonraj
/

ZeroCpuSpeechChat

Sleeping

App Files Files Community

simonraj commited on Jun 3, 2024

Commit

788cf13

verified ·

1 Parent(s): 71de052

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -12

app.py CHANGED Viewed

@@ -5,19 +5,24 @@ from streaming_stt_nemo import Model
 from huggingface_hub import InferenceClient
 import edge_tts
 default_lang = "en"
 engines = {default_lang: Model(default_lang)}
 def transcribe(audio):
     lang = "en"
     model = engines[lang]
     text = model.stt_file(audio)[0]
     return text
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 system_instructions = "[SYSTEM] You are CrucialCoach, an AI-powered conversational coach. Guide the user through challenging workplace situations using the principles from 'Crucial Conversations'. Ask one question at a time and provide step-by-step guidance.\n\n[USER]"
 @spaces.GPU(duration=120)
 def model(text):
     generate_kwargs = dict(
@@ -30,13 +35,15 @@ def model(text):
     )
     formatted_prompt = system_instructions + text + "[CrucialCoach]"
     stream = client.text_generation(
-        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
     for response in stream:
         if not response.token.text == "</s>":
             output += response.token.text
     return output
 async def respond(audio):
     user = transcribe(audio)
     reply = model(user)
@@ -46,22 +53,25 @@ async def respond(audio):
         await communicate.save(tmp_path)
     return tmp_path
 theme = gr.themes.Base()
-with gr.Blocks() as voice:
     with gr.Row():
-        input = gr.Audio(label="Voice Chat", sources="microphone", type="filepath", waveform_options=False)
-        output = gr.Audio(label="CrucialCoach", type="filepath",
-                        interactive=False,
-                        autoplay=True,
-                        elem_classes="audio")
         gr.Interface(
-            fn=respond,
             inputs=[input],
-                outputs=[output], live=True)
-with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="CrucialCoach DEMO") as demo:
     gr.TabbedInterface([voice], ['🗣️ Crucial Coach Chat'])
 demo.queue(max_size=200)
-demo.launch()

 from huggingface_hub import InferenceClient
 import edge_tts
+# Initialize default language and STT model
 default_lang = "en"
 engines = {default_lang: Model(default_lang)}
+# Function to transcribe audio to text
 def transcribe(audio):
     lang = "en"
     model = engines[lang]
     text = model.stt_file(audio)[0]
     return text
+# Initialize Huggingface InferenceClient
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+# System instructions for the CrucialCoach
 system_instructions = "[SYSTEM] You are CrucialCoach, an AI-powered conversational coach. Guide the user through challenging workplace situations using the principles from 'Crucial Conversations'. Ask one question at a time and provide step-by-step guidance.\n\n[USER]"
+# Decorator for using GPU with a duration of 120 seconds
 @spaces.GPU(duration=120)
 def model(text):
     generate_kwargs = dict(
     )
     formatted_prompt = system_instructions + text + "[CrucialCoach]"
     stream = client.text_generation(
+        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
+    )
     output = ""
     for response in stream:
         if not response.token.text == "</s>":
             output += response.token.text
     return output
+# Asynchronous function to handle audio input and provide response
 async def respond(audio):
     user = transcribe(audio)
     reply = model(user)
         await communicate.save(tmp_path)
     return tmp_path
+# Gradio theme
 theme = gr.themes.Base()
+# Gradio interface for voice chat
+with gr.Blocks() as voice:
     with gr.Row():
+        input = gr.Audio(label="Voice Chat", source="microphone", type="filepath", waveform_options=False)
+        output = gr.Audio(label="CrucialCoach", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
         gr.Interface(
+            fn=respond,
             inputs=[input],
+            outputs=[output],
+            live=True
+        )
+# Gradio demo setup
+with gr.Blocks(theme=theme, css="footer {visibility: hidden} textbox {resize: none}", title="CrucialCoach DEMO") as demo:
     gr.TabbedInterface([voice], ['🗣️ Crucial Coach Chat'])
+# Queue setup and launch
 demo.queue(max_size=200)
+demo.launch()