TuringsSolutions commited on
Commit
466a41a
·
verified ·
1 Parent(s): b6ab738

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -16
app.py CHANGED
@@ -4,7 +4,6 @@ import io
4
  import tempfile
5
  from pydub import AudioSegment
6
  from dataclasses import dataclass, field
7
- import numpy as np
8
 
9
  @dataclass
10
  class AppState:
@@ -13,8 +12,7 @@ class AppState:
13
  pause_detected: bool = False
14
  stopped: bool = False
15
  started_talking: bool = False
16
- conversation: list = field(default_factory=list) # Use default_factory for mutable defaults
17
-
18
 
19
  # Function to process audio input and detect pauses
20
  def process_audio(audio: tuple, state: AppState):
@@ -24,20 +22,28 @@ def process_audio(audio: tuple, state: AppState):
24
  else:
25
  state.stream = np.concatenate((state.stream, audio[1]))
26
 
27
- # Custom pause detection logic (replace with actual implementation)
28
- pause_detected = len(state.stream) > state.sampling_rate * 1 # Example: 1-sec pause
29
  state.pause_detected = pause_detected
30
 
31
  if state.pause_detected:
32
  return gr.Audio(recording=False), state # Stop recording
33
  return None, state
34
 
35
- # Generate chatbot response from user audio input
36
- def response(state: AppState):
 
 
 
 
 
 
 
 
37
  if not state.pause_detected:
38
  return None, state
39
 
40
- # Convert user audio to WAV format
41
  audio_buffer = io.BytesIO()
42
  segment = AudioSegment(
43
  state.stream.tobytes(),
@@ -51,9 +57,9 @@ def response(state: AppState):
51
  f.write(audio_buffer.getvalue())
52
  state.conversation.append({"role": "user", "content": {"path": f.name, "mime_type": "audio/wav"}})
53
 
54
- # Simulate chatbot's response (replace with mini omni model logic)
55
  chatbot_response = b"Simulated response audio content" # Placeholder
56
- output_buffer = chatbot_response # Stream actual chatbot response here
57
 
58
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
59
  f.write(output_buffer)
@@ -61,32 +67,41 @@ def response(state: AppState):
61
 
62
  yield None, state
63
 
64
- # --- Gradio Interface ---
65
-
66
  def start_recording_user(state: AppState):
67
  if not state.stopped:
68
  return gr.Audio(recording=True)
69
 
70
- # Build Gradio app using Blocks API
71
  with gr.Blocks() as demo:
72
  with gr.Row():
73
  with gr.Column():
74
  input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
 
75
  with gr.Column():
76
  chatbot = gr.Chatbot(label="Conversation", type="messages")
77
  output_audio = gr.Audio(label="Output Audio", streaming=True, autoplay=True)
78
-
79
  state = gr.State(value=AppState())
80
 
 
81
  stream = input_audio.stream(
82
  process_audio, [input_audio, state], [input_audio, state], stream_every=0.5, time_limit=30
83
  )
84
- respond = input_audio.stop_recording(response, [state], [output_audio, state])
 
 
 
 
 
85
  respond.then(lambda s: s.conversation, [state], [chatbot])
86
 
 
87
  restart = output_audio.stop(start_recording_user, [state], [input_audio])
 
 
88
  cancel = gr.Button("Stop Conversation", variant="stop")
89
  cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None, [state, input_audio], cancels=[respond, restart])
90
 
91
  if __name__ == "__main__":
92
- demo.launch()
 
4
  import tempfile
5
  from pydub import AudioSegment
6
  from dataclasses import dataclass, field
 
7
 
8
  @dataclass
9
  class AppState:
 
12
  pause_detected: bool = False
13
  stopped: bool = False
14
  started_talking: bool = False
15
+ conversation: list = field(default_factory=list) # Proper use of default_factory
 
16
 
17
  # Function to process audio input and detect pauses
18
  def process_audio(audio: tuple, state: AppState):
 
22
  else:
23
  state.stream = np.concatenate((state.stream, audio[1]))
24
 
25
+ # Detect if a pause has occurred (for simplicity, use 1-second threshold)
26
+ pause_detected = len(state.stream) > state.sampling_rate * 1
27
  state.pause_detected = pause_detected
28
 
29
  if state.pause_detected:
30
  return gr.Audio(recording=False), state # Stop recording
31
  return None, state
32
 
33
+ # Generate chatbot response based on user input (audio or text)
34
+ def response(user_input, state: AppState, input_type: str):
35
+ if input_type == "text":
36
+ # Handle text input
37
+ state.conversation.append({"role": "user", "content": user_input})
38
+ bot_response = f"Echo: {user_input}" # Simulate response
39
+ state.conversation.append({"role": "assistant", "content": bot_response})
40
+ return bot_response, state
41
+
42
+ # Handle audio input if pause was detected
43
  if not state.pause_detected:
44
  return None, state
45
 
46
+ # Convert audio to WAV and store in conversation history
47
  audio_buffer = io.BytesIO()
48
  segment = AudioSegment(
49
  state.stream.tobytes(),
 
57
  f.write(audio_buffer.getvalue())
58
  state.conversation.append({"role": "user", "content": {"path": f.name, "mime_type": "audio/wav"}})
59
 
60
+ # Simulate bot's response (replace with mini omni logic)
61
  chatbot_response = b"Simulated response audio content" # Placeholder
62
+ output_buffer = chatbot_response
63
 
64
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
65
  f.write(output_buffer)
 
67
 
68
  yield None, state
69
 
70
+ # Start recording audio input
 
71
  def start_recording_user(state: AppState):
72
  if not state.stopped:
73
  return gr.Audio(recording=True)
74
 
75
+ # Gradio interface setup
76
  with gr.Blocks() as demo:
77
  with gr.Row():
78
  with gr.Column():
79
  input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
80
+ text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
81
  with gr.Column():
82
  chatbot = gr.Chatbot(label="Conversation", type="messages")
83
  output_audio = gr.Audio(label="Output Audio", streaming=True, autoplay=True)
84
+
85
  state = gr.State(value=AppState())
86
 
87
+ # Handle audio input streaming
88
  stream = input_audio.stream(
89
  process_audio, [input_audio, state], [input_audio, state], stream_every=0.5, time_limit=30
90
  )
91
+
92
+ # Handle responses for both text and audio inputs
93
+ text_submit = text_input.submit(
94
+ lambda txt, s: response(txt, s, "text"), [text_input, state], [chatbot, state]
95
+ )
96
+ respond = input_audio.stop_recording(response, [None, state, "audio"], [output_audio, state])
97
  respond.then(lambda s: s.conversation, [state], [chatbot])
98
 
99
+ # Restart recording when audio playback stops
100
  restart = output_audio.stop(start_recording_user, [state], [input_audio])
101
+
102
+ # Stop button to cancel the conversation
103
  cancel = gr.Button("Stop Conversation", variant="stop")
104
  cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None, [state, input_audio], cancels=[respond, restart])
105
 
106
  if __name__ == "__main__":
107
+ demo.launch()