Spaces:

nvidia
/

audio-flamingo-3-chat

Runtime error

App Files Files Community

SreyanG-NVIDIA commited on Aug 5

Commit

67492cd

verified ·

1 Parent(s): 7272785

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -12

app.py CHANGED Viewed

@@ -18,24 +18,48 @@ generation_config_multi = model_multi.default_generation_config
 # ---------------------------------
 # MULTI-TURN INFERENCE FUNCTION
 # ---------------------------------
-def multi_turn_chat(user_input, audio_file, history, current_audio):
     try:
         if audio_file is not None:
-            current_audio = audio_file  # Update state if a new file is uploaded
-        if current_audio is None:
-            return history + [("System", "❌ Please upload an audio file before chatting.")], history, current_audio
-        sound = llava.Sound(current_audio)
-        prompt = f"<sound>\n{user_input}"
-        response = model_multi.generate_content([sound, prompt], generation_config=generation_config_multi)
         history.append((user_input, response))
-        return history, history, current_audio
     except Exception as e:
         history.append((user_input, f"❌ Error: {str(e)}"))
-        return history, history, current_audio
 def speech_prompt_infer(audio_prompt_file):
     try:
         sound = llava.Sound(audio_prompt_file)
@@ -118,12 +142,16 @@ with gr.Blocks(css="""
             user_input_multi = gr.Textbox(label="Your message", placeholder="Ask a question about the audio...", lines=8)
             btn_multi = gr.Button("Send")
             history_state = gr.State([])           # Chat history
-            current_audio_state = gr.State(None)   # Most recent audio file path
             btn_multi.click(
                 fn=multi_turn_chat,
-                inputs=[user_input_multi, audio_input_multi, history_state, current_audio_state],
-                outputs=[chatbot, history_state, current_audio_state]
             )
             gr.Examples(
                 examples=[

 # ---------------------------------
 # MULTI-TURN INFERENCE FUNCTION
 # ---------------------------------
+# def multi_turn_chat(user_input, audio_file, history, current_audio):
+#     try:
+#         if audio_file is not None:
+#             current_audio = audio_file  # Update state if a new file is uploaded
+#         if current_audio is None:
+#             return history + [("System", "❌ Please upload an audio file before chatting.")], history, current_audio
+#         sound = llava.Sound(current_audio)
+#         prompt = f"<sound>\n{user_input}"
+#         response = model_multi.generate_content([sound, prompt], generation_config=generation_config_multi)
+#         history.append((user_input, response))
+#         return history, history, current_audio
+#     except Exception as e:
+#         history.append((user_input, f"❌ Error: {str(e)}"))
+#         return history, history, current_audio
+def multi_turn_chat(user_input, audio_file, history, audio_history):
     try:
         if audio_file is not None:
+            audio_history.append(audio_file)  # Append new audio to the list
+        if not audio_history:
+            return history + [("System", "❌ Please upload an audio file before chatting.")], history, audio_history
+        # Create list of llava.Sound objects for each audio in history
+        audio_sounds = [llava.Sound(audio) for audio in audio_history]
+        # Add the user prompt after all audio sounds
+        prompt = f"<sound>\n{user_input}"
+        response = model_multi.generate_content(audio_sounds + [prompt], generation_config=generation_config_multi)
         history.append((user_input, response))
+        return history, history, audio_history
     except Exception as e:
         history.append((user_input, f"❌ Error: {str(e)}"))
+        return history, history, audio_history
 def speech_prompt_infer(audio_prompt_file):
     try:
         sound = llava.Sound(audio_prompt_file)
             user_input_multi = gr.Textbox(label="Your message", placeholder="Ask a question about the audio...", lines=8)
             btn_multi = gr.Button("Send")
             history_state = gr.State([])           # Chat history
+            # current_audio_state = gr.State(None)   # Most recent audio file path
+            audio_history_state = gr.State([])  # List of audio file paths
             btn_multi.click(
                 fn=multi_turn_chat,
+                inputs=[user_input_multi, audio_input_multi, history_state, audio_history_state],
+                outputs=[chatbot, history_state, audio_history_state]
+                # inputs=[user_input_multi, audio_input_multi, history_state, current_audio_state],
+                # outputs=[chatbot, history_state, current_audio_state]
             )
             gr.Examples(
                 examples=[