archit11 commited on
Commit
9c0a84a
·
verified ·
1 Parent(s): e48e518

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -5,10 +5,10 @@ import torch
5
  import spaces
6
  import numpy as np
7
 
8
- # Maintain conversation history globally
9
  conversation_history = []
10
 
11
- @spaces.GPU(duration=15)
12
  def transcribe_and_respond(audio_file, chat_history):
13
  try:
14
  pipe = transformers.pipeline(
@@ -21,42 +21,53 @@ def transcribe_and_respond(audio_file, chat_history):
21
  # Load the audio file
22
  audio, sr = librosa.load(audio_file, sr=16000)
23
 
24
- # Print audio properties for debugging
25
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
26
 
27
  # Prepare conversation turns
28
- turns = chat_history.copy() # Take existing chat history
29
-
30
- # Add the audio to the current turn
31
  turns.append({'role': 'user', 'content': '<|audio|>'})
32
 
33
- # Debug: Print the updated turns
34
  print(f"Updated turns: {turns}")
35
 
36
  # Call the model with the updated conversation turns and audio
37
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
38
 
39
- # Append the model's response to the chat history
40
  turns.append({'role': 'system', 'content': output})
41
 
42
  # Debug: Print the model's response
43
  print(f"Model output: {output}")
44
 
45
- return turns, turns # Return updated history to display
 
 
 
 
 
 
 
 
46
 
47
  except Exception as e:
48
  return f"Error: {str(e)}"
49
 
50
- # Chat interface setup
51
  iface = gr.Interface(
52
  fn=transcribe_and_respond,
53
- inputs=[gr.Audio(sources="microphone", type="filepath", label="Your Audio (Microphone)"), gr.Chatbot(label="Conversation")],
54
- outputs=gr.Chatbot(label="Conversation"),
 
 
 
 
 
 
55
  title="🎙️ AI Chat with Live Transcription",
56
- description="Talk to the AI through your microphone, and it will respond conversationally based on the ongoing chat. Keep the conversation going!",
57
  live=True,
58
  allow_flagging="auto",
59
- enable_queue=True
60
  )
61
 
62
  if __name__ == "__main__":
 
5
  import spaces
6
  import numpy as np
7
 
8
+ # Initialize the conversation history globally
9
  conversation_history = []
10
 
11
+ @spaces.GPU(duration=20)
12
  def transcribe_and_respond(audio_file, chat_history):
13
  try:
14
  pipe = transformers.pipeline(
 
21
  # Load the audio file
22
  audio, sr = librosa.load(audio_file, sr=16000)
23
 
24
+ # Debug: Print audio properties for debugging
25
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
26
 
27
  # Prepare conversation turns
28
+ turns = chat_history.copy() # Take the existing chat history and append user input
 
 
29
  turns.append({'role': 'user', 'content': '<|audio|>'})
30
 
31
+ # Debug: Print the updated turns for debugging purposes
32
  print(f"Updated turns: {turns}")
33
 
34
  # Call the model with the updated conversation turns and audio
35
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
36
 
37
+ # Append the model's response to the conversation history
38
  turns.append({'role': 'system', 'content': output})
39
 
40
  # Debug: Print the model's response
41
  print(f"Model output: {output}")
42
 
43
+ # Format the chat history for Gradio's Chatbot
44
+ chat_history_for_display = []
45
+ for turn in turns:
46
+ if turn['role'] == 'user':
47
+ chat_history_for_display.append(("User", "🗣️ (Spoken Audio)"))
48
+ else:
49
+ chat_history_for_display.append(("AI", turn['content']))
50
+
51
+ return chat_history_for_display, turns # Return the formatted chat history for display and the updated history
52
 
53
  except Exception as e:
54
  return f"Error: {str(e)}"
55
 
56
+ # Define the Gradio interface
57
  iface = gr.Interface(
58
  fn=transcribe_and_respond,
59
+ inputs=[
60
+ gr.Audio(sources="microphone", type="filepath", label="Your Audio (Microphone)"),
61
+ gr.State([]) # Hidden state to maintain conversation history
62
+ ],
63
+ outputs=[
64
+ gr.Chatbot(label="Conversation History"), # Display the conversation
65
+ gr.State([]) # Hidden state to keep track of the updated conversation history
66
+ ],
67
  title="🎙️ AI Chat with Live Transcription",
68
+ description="Talk to the AI through your microphone, and it will respond conversationally, keeping the entire conversation visible.",
69
  live=True,
70
  allow_flagging="auto",
 
71
  )
72
 
73
  if __name__ == "__main__":