Mattral commited on
Commit
3ce5891
·
verified ·
1 Parent(s): c25a125

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -50
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import gradio as gr
2
- import speech_recognition as sr
3
  from huggingface_hub import InferenceClient
4
  import random
5
  import textwrap
6
- import pyttsx3
 
7
 
8
- # Initialize the speech recognition and TTS engine
9
- recognizer = sr.Recognizer()
10
- tts_engine = pyttsx3.init()
11
 
12
  # Define the model to be used
13
  model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -15,26 +14,25 @@ client = InferenceClient(model)
15
 
16
  # Embedded system prompt
17
  system_prompt_text = (
18
- "You are a smart and helpful co-worker of Thailand based multi-national company PTT, "
19
- "and PTTEP. You help with any kind of request and provide a detailed answer to the question. "
20
- "But if you are asked about something unethical or dangerous, you must refuse and provide a safe and respectful way to handle that."
21
  )
22
 
23
- # Read the content of the info.md file with UTF-8 encoding
24
- with open("info.md", "r", encoding="utf-8") as file:
25
- info_md_content = file.read()
 
 
 
26
 
27
- # Chunk the info.md content into smaller sections
28
- chunk_size = 2500 # Adjust this size as needed
29
- info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
30
 
31
- def get_all_chunks(chunks):
32
- return "\n\n".join(chunks)
33
 
34
- def format_prompt_mixtral(message, history, info_md_chunks):
35
  prompt = "<s>"
36
- all_chunks = get_all_chunks(info_md_chunks)
37
- prompt += f"{all_chunks}\n\n" # Add all chunks of info.md at the beginning
38
  prompt += f"{system_prompt_text}\n\n" # Add the system prompt
39
 
40
  if history:
@@ -54,7 +52,7 @@ def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
54
  seed=seed,
55
  )
56
 
57
- formatted_prompt = format_prompt_mixtral(prompt, history, info_md_chunks)
58
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
59
  output = ""
60
  for response in stream:
@@ -74,33 +72,16 @@ def check_rand(inp, val):
74
  else:
75
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
76
 
77
- def recognize_speech(audio):
78
- with sr.AudioFile(audio) as source:
79
- audio_data = recognizer.record(source) # Record the audio
80
- try:
81
- # Recognize the speech using Google's API
82
- text = recognizer.recognize_google(audio_data)
83
- return text
84
- except sr.UnknownValueError:
85
- return "Sorry, I could not understand the audio."
86
- except sr.RequestError:
87
- return "Error: Could not request results from the speech recognition service."
88
-
89
- def speak_text(text):
90
- # Convert text to speech using pyttsx3
91
- tts_engine.save_to_file(text, 'output.mp3') # Save the TTS audio
92
- tts_engine.runAndWait() # Wait until TTS is done
93
-
94
- with gr.Blocks() as app:
95
  gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference</h3><br><h7>EXPERIMENTAL</center>""")
96
 
97
  with gr.Row():
98
  chat = gr.Chatbot(height=500)
99
-
100
  with gr.Group():
101
  with gr.Row():
102
  with gr.Column(scale=3):
103
- inp = gr.Audio(type="filepath") # Audio input
104
  with gr.Row():
105
  with gr.Column(scale=2):
106
  btn = gr.Button("Chat")
@@ -119,21 +100,18 @@ with gr.Blocks() as app:
119
 
120
  hid1 = gr.Number(value=1, visible=False)
121
 
122
- output_audio = gr.Audio(label="Output Audio", type="filepath", interactive=False) # Create an output audio component
123
-
124
  def handle_chat(audio_input, chat_history, seed, temp, tokens, top_p, rep_p):
125
- user_message = recognize_speech(audio_input) # Recognize speech input
126
- if "Sorry" in user_message: # Check for error in recognition
127
- return chat_history, user_message, None
 
128
  response_gen = chat_inf(user_message, chat_history, seed, temp, tokens, top_p, rep_p)
129
  response = next(response_gen)[0][-1][1] # Get the response text
130
- speak_text(response) # Speak the response text
131
- return chat_history + [(user_message, response)], response, 'output.mp3' # Return the filename for audio output
132
 
133
- go = btn.click(handle_chat, [inp, chat, seed, temp, tokens, top_p, rep_p], [chat, inp, output_audio]) # Use output_audio instead of "output.mp3"
134
 
135
  stop_btn.click(None, None, None, cancels=[go])
136
  clear_btn.click(clear_fn, None, [inp, chat])
137
 
138
- app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))
139
-
 
1
  import gradio as gr
 
2
  from huggingface_hub import InferenceClient
3
  import random
4
  import textwrap
5
+ from transformers import pipeline
6
+ import numpy as np
7
 
8
+ # Load the Whisper model for automatic speech recognition
9
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 
10
 
11
  # Define the model to be used
12
  model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
14
 
15
  # Embedded system prompt
16
  system_prompt_text = (
17
+ "You are a smart and helpful co-worker of Thailand based multi-national company PTT, and PTTEP. "
18
+ "You help with any kind of request and provide a detailed answer to the question. But if you are asked about something "
19
+ "unethical or dangerous, you must refuse and provide a safe and respectful way to handle that."
20
  )
21
 
22
+ # Function to transcribe audio input
23
+ def transcribe(audio):
24
+ sr, y = audio
25
+ # Convert to mono if stereo
26
+ if y.ndim > 1:
27
+ y = y.mean(axis=1)
28
 
29
+ y = y.astype(np.float32)
30
+ y /= np.max(np.abs(y)) # Normalize audio
 
31
 
32
+ return transcriber({"sampling_rate": sr, "raw": y})["text"] # Transcribe audio
 
33
 
34
+ def format_prompt_mixtral(message, history):
35
  prompt = "<s>"
 
 
36
  prompt += f"{system_prompt_text}\n\n" # Add the system prompt
37
 
38
  if history:
 
52
  seed=seed,
53
  )
54
 
55
+ formatted_prompt = format_prompt_mixtral(prompt, history)
56
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
57
  output = ""
58
  for response in stream:
 
72
  else:
73
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
74
 
75
+ with gr.Blocks() as app: # Add auth here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference</h3><br><h7>EXPERIMENTAL</center>""")
77
 
78
  with gr.Row():
79
  chat = gr.Chatbot(height=500)
80
+
81
  with gr.Group():
82
  with gr.Row():
83
  with gr.Column(scale=3):
84
+ inp = gr.Audio(source="microphone", type="filepath") # Audio input from the microphone
85
  with gr.Row():
86
  with gr.Column(scale=2):
87
  btn = gr.Button("Chat")
 
100
 
101
  hid1 = gr.Number(value=1, visible=False)
102
 
 
 
103
  def handle_chat(audio_input, chat_history, seed, temp, tokens, top_p, rep_p):
104
+ user_message = transcribe(audio_input) # Transcribe audio to text
105
+ if not user_message: # Check for empty or error in recognition
106
+ return chat_history, "Sorry, I couldn't understand that."
107
+
108
  response_gen = chat_inf(user_message, chat_history, seed, temp, tokens, top_p, rep_p)
109
  response = next(response_gen)[0][-1][1] # Get the response text
110
+ return chat_history + [(user_message, response)], response # Return updated chat history
 
111
 
112
+ go = btn.click(handle_chat, [inp, chat, seed, temp, tokens, top_p, rep_p], chat)
113
 
114
  stop_btn.click(None, None, None, cancels=[go])
115
  clear_btn.click(clear_fn, None, [inp, chat])
116
 
117
+ app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358")) # Launch the app with authentication