Spaces:

Mattral
/

ORG-Chat

Sleeping

App Files Files Community

Mattral commited on Oct 9, 2024

Commit

3ce5891

verified ·

1 Parent(s): c25a125

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -50

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import gradio as gr
-import speech_recognition as sr
 from huggingface_hub import InferenceClient
 import random
 import textwrap
-import pyttsx3
-# Initialize the speech recognition and TTS engine
-recognizer = sr.Recognizer()
-tts_engine = pyttsx3.init()
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -15,26 +14,25 @@ client = InferenceClient(model)
 # Embedded system prompt
 system_prompt_text = (
-    "You are a smart and helpful co-worker of Thailand based multi-national company PTT, "
-    "and PTTEP. You help with any kind of request and provide a detailed answer to the question. "
-    "But if you are asked about something unethical or dangerous, you must refuse and provide a safe and respectful way to handle that."
 )
-# Read the content of the info.md file with UTF-8 encoding
-with open("info.md", "r", encoding="utf-8") as file:
-    info_md_content = file.read()
-# Chunk the info.md content into smaller sections
-chunk_size = 2500  # Adjust this size as needed
-info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
-def get_all_chunks(chunks):
-    return "\n\n".join(chunks)
-def format_prompt_mixtral(message, history, info_md_chunks):
     prompt = "<s>"
-    all_chunks = get_all_chunks(info_md_chunks)
-    prompt += f"{all_chunks}\n\n"  # Add all chunks of info.md at the beginning
     prompt += f"{system_prompt_text}\n\n"  # Add the system prompt
     if history:
@@ -54,7 +52,7 @@ def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
         seed=seed,
     )
-    formatted_prompt = format_prompt_mixtral(prompt, history, info_md_chunks)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
     for response in stream:
@@ -74,33 +72,16 @@ def check_rand(inp, val):
     else:
         return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
-def recognize_speech(audio):
-    with sr.AudioFile(audio) as source:
-        audio_data = recognizer.record(source)  # Record the audio
-        try:
-            # Recognize the speech using Google's API
-            text = recognizer.recognize_google(audio_data)
-            return text
-        except sr.UnknownValueError:
-            return "Sorry, I could not understand the audio."
-        except sr.RequestError:
-            return "Error: Could not request results from the speech recognition service."
-def speak_text(text):
-    # Convert text to speech using pyttsx3
-    tts_engine.save_to_file(text, 'output.mp3')  # Save the TTS audio
-    tts_engine.runAndWait()  # Wait until TTS is done
-with gr.Blocks() as app:
     gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference</h3><br><h7>EXPERIMENTAL</center>""")
     with gr.Row():
         chat = gr.Chatbot(height=500)
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
-                inp = gr.Audio(type="filepath")  # Audio input
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")
@@ -119,21 +100,18 @@ with gr.Blocks() as app:
     hid1 = gr.Number(value=1, visible=False)
-    output_audio = gr.Audio(label="Output Audio", type="filepath", interactive=False)  # Create an output audio component
     def handle_chat(audio_input, chat_history, seed, temp, tokens, top_p, rep_p):
-        user_message = recognize_speech(audio_input)  # Recognize speech input
-        if "Sorry" in user_message:  # Check for error in recognition
-            return chat_history, user_message, None
         response_gen = chat_inf(user_message, chat_history, seed, temp, tokens, top_p, rep_p)
         response = next(response_gen)[0][-1][1]  # Get the response text
-        speak_text(response)  # Speak the response text
-        return chat_history + [(user_message, response)], response, 'output.mp3'  # Return the filename for audio output
-    go = btn.click(handle_chat, [inp, chat, seed, temp, tokens, top_p, rep_p], [chat, inp, output_audio])  # Use output_audio instead of "output.mp3"
     stop_btn.click(None, None, None, cancels=[go])
     clear_btn.click(clear_fn, None, [inp, chat])
-app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))

 import gradio as gr
 from huggingface_hub import InferenceClient
 import random
 import textwrap
+from transformers import pipeline
+import numpy as np
+# Load the Whisper model for automatic speech recognition
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 # Embedded system prompt
 system_prompt_text = (
+    "You are a smart and helpful co-worker of Thailand based multi-national company PTT, and PTTEP. "
+    "You help with any kind of request and provide a detailed answer to the question. But if you are asked about something "
+    "unethical or dangerous, you must refuse and provide a safe and respectful way to handle that."
 )
+# Function to transcribe audio input
+def transcribe(audio):
+    sr, y = audio
+    # Convert to mono if stereo
+    if y.ndim > 1:
+        y = y.mean(axis=1)
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))  # Normalize audio
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]  # Transcribe audio
+def format_prompt_mixtral(message, history):
     prompt = "<s>"
     prompt += f"{system_prompt_text}\n\n"  # Add the system prompt
     if history:
         seed=seed,
     )
+    formatted_prompt = format_prompt_mixtral(prompt, history)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
     for response in stream:
     else:
         return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
+with gr.Blocks() as app:  # Add auth here
     gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference</h3><br><h7>EXPERIMENTAL</center>""")
     with gr.Row():
         chat = gr.Chatbot(height=500)
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
+                inp = gr.Audio(source="microphone", type="filepath")  # Audio input from the microphone
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")
     hid1 = gr.Number(value=1, visible=False)
     def handle_chat(audio_input, chat_history, seed, temp, tokens, top_p, rep_p):
+        user_message = transcribe(audio_input)  # Transcribe audio to text
+        if not user_message:  # Check for empty or error in recognition
+            return chat_history, "Sorry, I couldn't understand that."
         response_gen = chat_inf(user_message, chat_history, seed, temp, tokens, top_p, rep_p)
         response = next(response_gen)[0][-1][1]  # Get the response text
+        return chat_history + [(user_message, response)], response  # Return updated chat history
+    go = btn.click(handle_chat, [inp, chat, seed, temp, tokens, top_p, rep_p], chat)
     stop_btn.click(None, None, None, cancels=[go])
     clear_btn.click(clear_fn, None, [inp, chat])
+app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))  # Launch the app with authentication