Voice-Chat

Running

App Files Files Community

NeoPy commited on Feb 8

Commit

ebf74d7

verified ·

1 Parent(s): 0ad45a6

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -7

app.py CHANGED Viewed

@@ -96,6 +96,8 @@ def process_audio_input(audio_path, text, apikey, history, conv_state):
     Process audio and/or text input from the user:
       - If an audio file is provided, its transcript is obtained.
       - The conversation state and history are updated.
     """
     if not audio_path and not text.strip():
         return history, conv_state, ""
@@ -107,11 +109,15 @@ def process_audio_input(audio_path, text, apikey, history, conv_state):
     if not text.strip():
         return history, conv_state, ""
-    conv_state.append({"role": "user", "content": text})
-    history.append((text, None))
     response = generate_response(conv_state, apikey)
-    conv_state.append({"role": "assistant", "content": response})
-    history[-1] = (text, response)
     return history, conv_state, ""
@@ -238,18 +244,24 @@ Have a conversation with an AI using your reference voice!
     def generate_audio_response(history, ref_audio, ref_text, remove_silence):
         """
         Generate an audio response from the last AI message in the conversation.
         """
         if not history or not ref_audio:
             return None, ref_text
-        last_user_message, last_ai_response = history[-1]
-        if not last_ai_response:
             return None, ref_text
         audio_result, _, ref_text_out = infer(
             ref_audio,
             ref_text,
-            last_ai_response,
             remove_silence,
             cross_fade_duration=0.15,
             speed=1.0,

     Process audio and/or text input from the user:
       - If an audio file is provided, its transcript is obtained.
       - The conversation state and history are updated.
+    Updated to construct the chat history as a list of dictionaries.
     """
     if not audio_path and not text.strip():
         return history, conv_state, ""
     if not text.strip():
         return history, conv_state, ""
+    # Construct user message as a dict.
+    user_msg = {"role": "user", "content": text}
+    conv_state.append(user_msg)
+    history.append(user_msg)
     response = generate_response(conv_state, apikey)
+    assistant_msg = {"role": "assistant", "content": response}
+    conv_state.append(assistant_msg)
+    history.append(assistant_msg)
     return history, conv_state, ""
     def generate_audio_response(history, ref_audio, ref_text, remove_silence):
         """
         Generate an audio response from the last AI message in the conversation.
+        Updated to search for the last assistant message in dictionary format.
         """
         if not history or not ref_audio:
             return None, ref_text
+        # Find the last message from the assistant.
+        last_assistant = None
+        for message in reversed(history):
+            if message.get("role") == "assistant":
+                last_assistant = message
+                break
+        if last_assistant is None or not last_assistant.get("content", "").strip():
             return None, ref_text
         audio_result, _, ref_text_out = infer(
             ref_audio,
             ref_text,
+            last_assistant["content"],
             remove_silence,
             cross_fade_duration=0.15,
             speed=1.0,