archit11 commited on
Commit
e48e518
·
verified ·
1 Parent(s): 13bb1b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -16
app.py CHANGED
@@ -5,8 +5,11 @@ import torch
5
  import spaces
6
  import numpy as np
7
 
8
- @spaces.GPU(duration=60)
9
- def transcribe_and_respond(audio_file):
 
 
 
10
  try:
11
  pipe = transformers.pipeline(
12
  model='sarvamai/shuka_v1',
@@ -21,32 +24,39 @@ def transcribe_and_respond(audio_file):
21
  # Print audio properties for debugging
22
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
23
 
24
- turns = [
25
- {'role': 'system', 'content': 'Respond naturally and informatively.'},
26
- {'role': 'user', 'content': '<|audio|>'}
27
- ]
 
28
 
29
- # Debug: Print the initial turns
30
- print(f"Initial turns: {turns}")
31
 
32
- # Call the model with the audio and prompt
33
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
34
 
35
- # Debug: Print the final output from the model
 
 
 
36
  print(f"Model output: {output}")
37
 
38
- return output
39
 
40
  except Exception as e:
41
  return f"Error: {str(e)}"
42
 
 
43
  iface = gr.Interface(
44
  fn=transcribe_and_respond,
45
- inputs=gr.Audio(sources="microphone", type="filepath"),
46
- outputs="text",
47
- title="Live Transcription and Response",
48
- description="Speak into your microphone, and the model will respond naturally and informatively.",
49
- live=True
 
 
50
  )
51
 
52
  if __name__ == "__main__":
 
5
  import spaces
6
  import numpy as np
7
 
8
+ # Maintain conversation history globally
9
+ conversation_history = []
10
+
11
+ @spaces.GPU(duration=15)
12
+ def transcribe_and_respond(audio_file, chat_history):
13
  try:
14
  pipe = transformers.pipeline(
15
  model='sarvamai/shuka_v1',
 
24
  # Print audio properties for debugging
25
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
26
 
27
+ # Prepare conversation turns
28
+ turns = chat_history.copy() # Take existing chat history
29
+
30
+ # Add the audio to the current turn
31
+ turns.append({'role': 'user', 'content': '<|audio|>'})
32
 
33
+ # Debug: Print the updated turns
34
+ print(f"Updated turns: {turns}")
35
 
36
+ # Call the model with the updated conversation turns and audio
37
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
38
 
39
+ # Append the model's response to the chat history
40
+ turns.append({'role': 'system', 'content': output})
41
+
42
+ # Debug: Print the model's response
43
  print(f"Model output: {output}")
44
 
45
+ return turns, turns # Return updated history to display
46
 
47
  except Exception as e:
48
  return f"Error: {str(e)}"
49
 
50
+ # Chat interface setup
51
  iface = gr.Interface(
52
  fn=transcribe_and_respond,
53
+ inputs=[gr.Audio(sources="microphone", type="filepath", label="Your Audio (Microphone)"), gr.Chatbot(label="Conversation")],
54
+ outputs=gr.Chatbot(label="Conversation"),
55
+ title="🎙️ AI Chat with Live Transcription",
56
+ description="Talk to the AI through your microphone, and it will respond conversationally based on the ongoing chat. Keep the conversation going!",
57
+ live=True,
58
+ allow_flagging="auto",
59
+ enable_queue=True
60
  )
61
 
62
  if __name__ == "__main__":