Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -96,6 +96,8 @@ def process_audio_input(audio_path, text, apikey, history, conv_state):
|
|
96 |
Process audio and/or text input from the user:
|
97 |
- If an audio file is provided, its transcript is obtained.
|
98 |
- The conversation state and history are updated.
|
|
|
|
|
99 |
"""
|
100 |
if not audio_path and not text.strip():
|
101 |
return history, conv_state, ""
|
@@ -107,11 +109,15 @@ def process_audio_input(audio_path, text, apikey, history, conv_state):
|
|
107 |
if not text.strip():
|
108 |
return history, conv_state, ""
|
109 |
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
112 |
response = generate_response(conv_state, apikey)
|
113 |
-
|
114 |
-
|
|
|
115 |
return history, conv_state, ""
|
116 |
|
117 |
|
@@ -238,18 +244,24 @@ Have a conversation with an AI using your reference voice!
|
|
238 |
def generate_audio_response(history, ref_audio, ref_text, remove_silence):
|
239 |
"""
|
240 |
Generate an audio response from the last AI message in the conversation.
|
|
|
241 |
"""
|
242 |
if not history or not ref_audio:
|
243 |
return None, ref_text
|
244 |
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
247 |
return None, ref_text
|
248 |
|
249 |
audio_result, _, ref_text_out = infer(
|
250 |
ref_audio,
|
251 |
ref_text,
|
252 |
-
|
253 |
remove_silence,
|
254 |
cross_fade_duration=0.15,
|
255 |
speed=1.0,
|
|
|
96 |
Process audio and/or text input from the user:
|
97 |
- If an audio file is provided, its transcript is obtained.
|
98 |
- The conversation state and history are updated.
|
99 |
+
|
100 |
+
Updated to construct the chat history as a list of dictionaries.
|
101 |
"""
|
102 |
if not audio_path and not text.strip():
|
103 |
return history, conv_state, ""
|
|
|
109 |
if not text.strip():
|
110 |
return history, conv_state, ""
|
111 |
|
112 |
+
# Construct user message as a dict.
|
113 |
+
user_msg = {"role": "user", "content": text}
|
114 |
+
conv_state.append(user_msg)
|
115 |
+
history.append(user_msg)
|
116 |
+
|
117 |
response = generate_response(conv_state, apikey)
|
118 |
+
assistant_msg = {"role": "assistant", "content": response}
|
119 |
+
conv_state.append(assistant_msg)
|
120 |
+
history.append(assistant_msg)
|
121 |
return history, conv_state, ""
|
122 |
|
123 |
|
|
|
244 |
def generate_audio_response(history, ref_audio, ref_text, remove_silence):
|
245 |
"""
|
246 |
Generate an audio response from the last AI message in the conversation.
|
247 |
+
Updated to search for the last assistant message in dictionary format.
|
248 |
"""
|
249 |
if not history or not ref_audio:
|
250 |
return None, ref_text
|
251 |
|
252 |
+
# Find the last message from the assistant.
|
253 |
+
last_assistant = None
|
254 |
+
for message in reversed(history):
|
255 |
+
if message.get("role") == "assistant":
|
256 |
+
last_assistant = message
|
257 |
+
break
|
258 |
+
if last_assistant is None or not last_assistant.get("content", "").strip():
|
259 |
return None, ref_text
|
260 |
|
261 |
audio_result, _, ref_text_out = infer(
|
262 |
ref_audio,
|
263 |
ref_text,
|
264 |
+
last_assistant["content"],
|
265 |
remove_silence,
|
266 |
cross_fade_duration=0.15,
|
267 |
speed=1.0,
|