TuringsSolutions commited on
Commit
94cfbe9
·
verified ·
1 Parent(s): 466a41a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -12,9 +12,9 @@ class AppState:
12
  pause_detected: bool = False
13
  stopped: bool = False
14
  started_talking: bool = False
15
- conversation: list = field(default_factory=list) # Proper use of default_factory
16
 
17
- # Function to process audio input and detect pauses
18
  def process_audio(audio: tuple, state: AppState):
19
  if state.stream is None:
20
  state.stream = audio[1]
@@ -22,28 +22,24 @@ def process_audio(audio: tuple, state: AppState):
22
  else:
23
  state.stream = np.concatenate((state.stream, audio[1]))
24
 
25
- # Detect if a pause has occurred (for simplicity, use 1-second threshold)
26
  pause_detected = len(state.stream) > state.sampling_rate * 1
27
  state.pause_detected = pause_detected
28
 
29
  if state.pause_detected:
30
- return gr.Audio(recording=False), state # Stop recording
31
  return None, state
32
 
33
- # Generate chatbot response based on user input (audio or text)
34
- def response(user_input, state: AppState, input_type: str):
35
  if input_type == "text":
36
- # Handle text input
37
- state.conversation.append({"role": "user", "content": user_input})
38
- bot_response = f"Echo: {user_input}" # Simulate response
39
  state.conversation.append({"role": "assistant", "content": bot_response})
40
  return bot_response, state
41
 
42
- # Handle audio input if pause was detected
43
  if not state.pause_detected:
44
  return None, state
45
 
46
- # Convert audio to WAV and store in conversation history
47
  audio_buffer = io.BytesIO()
48
  segment = AudioSegment(
49
  state.stream.tobytes(),
@@ -57,12 +53,9 @@ def response(user_input, state: AppState, input_type: str):
57
  f.write(audio_buffer.getvalue())
58
  state.conversation.append({"role": "user", "content": {"path": f.name, "mime_type": "audio/wav"}})
59
 
60
- # Simulate bot's response (replace with mini omni logic)
61
- chatbot_response = b"Simulated response audio content" # Placeholder
62
- output_buffer = chatbot_response
63
-
64
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
65
- f.write(output_buffer)
66
  state.conversation.append({"role": "assistant", "content": {"path": f.name, "mime_type": "audio/mp3"}})
67
 
68
  yield None, state
@@ -72,11 +65,11 @@ def start_recording_user(state: AppState):
72
  if not state.stopped:
73
  return gr.Audio(recording=True)
74
 
75
- # Gradio interface setup
76
  with gr.Blocks() as demo:
77
  with gr.Row():
78
  with gr.Column():
79
- input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
80
  text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
81
  with gr.Column():
82
  chatbot = gr.Chatbot(label="Conversation", type="messages")
@@ -89,19 +82,26 @@ with gr.Blocks() as demo:
89
  process_audio, [input_audio, state], [input_audio, state], stream_every=0.5, time_limit=30
90
  )
91
 
92
- # Handle responses for both text and audio inputs
93
  text_submit = text_input.submit(
94
  lambda txt, s: response(txt, s, "text"), [text_input, state], [chatbot, state]
95
  )
96
- respond = input_audio.stop_recording(response, [None, state, "audio"], [output_audio, state])
 
 
 
 
97
  respond.then(lambda s: s.conversation, [state], [chatbot])
98
 
99
- # Restart recording when audio playback stops
100
  restart = output_audio.stop(start_recording_user, [state], [input_audio])
101
 
102
- # Stop button to cancel the conversation
103
  cancel = gr.Button("Stop Conversation", variant="stop")
104
- cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None, [state, input_audio], cancels=[respond, restart])
 
 
 
105
 
106
  if __name__ == "__main__":
107
  demo.launch()
 
12
  pause_detected: bool = False
13
  stopped: bool = False
14
  started_talking: bool = False
15
+ conversation: list = field(default_factory=list)
16
 
17
+ # Process audio input and detect pauses
18
  def process_audio(audio: tuple, state: AppState):
19
  if state.stream is None:
20
  state.stream = audio[1]
 
22
  else:
23
  state.stream = np.concatenate((state.stream, audio[1]))
24
 
 
25
  pause_detected = len(state.stream) > state.sampling_rate * 1
26
  state.pause_detected = pause_detected
27
 
28
  if state.pause_detected:
29
+ return gr.Audio(recording=False), state
30
  return None, state
31
 
32
+ # Generate response based on input type (text or audio)
33
+ def response(input_data, state: AppState, input_type: str):
34
  if input_type == "text":
35
+ state.conversation.append({"role": "user", "content": input_data})
36
+ bot_response = f"Echo: {input_data}"
 
37
  state.conversation.append({"role": "assistant", "content": bot_response})
38
  return bot_response, state
39
 
 
40
  if not state.pause_detected:
41
  return None, state
42
 
 
43
  audio_buffer = io.BytesIO()
44
  segment = AudioSegment(
45
  state.stream.tobytes(),
 
53
  f.write(audio_buffer.getvalue())
54
  state.conversation.append({"role": "user", "content": {"path": f.name, "mime_type": "audio/wav"}})
55
 
56
+ chatbot_response = b"Simulated response audio content"
 
 
 
57
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
58
+ f.write(chatbot_response)
59
  state.conversation.append({"role": "assistant", "content": {"path": f.name, "mime_type": "audio/mp3"}})
60
 
61
  yield None, state
 
65
  if not state.stopped:
66
  return gr.Audio(recording=True)
67
 
68
+ # Gradio app setup
69
  with gr.Blocks() as demo:
70
  with gr.Row():
71
  with gr.Column():
72
+ input_audio = gr.Audio(label="Input Audio", source="microphone", type="numpy")
73
  text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
74
  with gr.Column():
75
  chatbot = gr.Chatbot(label="Conversation", type="messages")
 
82
  process_audio, [input_audio, state], [input_audio, state], stream_every=0.5, time_limit=30
83
  )
84
 
85
+ # Handle text input submission
86
  text_submit = text_input.submit(
87
  lambda txt, s: response(txt, s, "text"), [text_input, state], [chatbot, state]
88
  )
89
+
90
+ # Handle audio stop recording
91
+ respond = input_audio.stop_recording(
92
+ lambda s: response(None, s, "audio"), [state], [output_audio, state]
93
+ )
94
  respond.then(lambda s: s.conversation, [state], [chatbot])
95
 
96
+ # Restart recording after audio playback ends
97
  restart = output_audio.stop(start_recording_user, [state], [input_audio])
98
 
99
+ # Stop conversation button
100
  cancel = gr.Button("Stop Conversation", variant="stop")
101
+ cancel.click(
102
+ lambda: (AppState(stopped=True), gr.Audio(recording=False)),
103
+ None, [state, input_audio], cancels=[respond, restart]
104
+ )
105
 
106
  if __name__ == "__main__":
107
  demo.launch()