Spaces:
Sleeping
Sleeping
| import transformers | |
| import gradio as gr | |
| import librosa | |
| import torch | |
| import spaces | |
| import numpy as np | |
| # Initialize the conversation history globally | |
| conversation_history = [] | |
| def transcribe_and_respond(audio_file, chat_history): | |
| try: | |
| pipe = transformers.pipeline( | |
| model='sarvamai/shuka_v1', | |
| trust_remote_code=True, | |
| device=0, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| # Load the audio file | |
| audio, sr = librosa.load(audio_file, sr=16000) | |
| # Debug: Print audio properties for debugging | |
| print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}") | |
| # Prepare conversation turns | |
| turns = chat_history.copy() # Take the existing chat history and append user input | |
| turns.append({'role': 'user', 'content': '<|audio|>'}) | |
| # Debug: Print the updated turns for debugging purposes | |
| print(f"Updated turns: {turns}") | |
| # Call the model with the updated conversation turns and audio | |
| output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512) | |
| # Append the model's response to the conversation history | |
| turns.append({'role': 'system', 'content': output}) | |
| # Debug: Print the model's response | |
| print(f"Model output: {output}") | |
| # Format the chat history for Gradio's Chatbot | |
| chat_history_for_display = [] | |
| for turn in turns: | |
| if turn['role'] == 'user': | |
| chat_history_for_display.append(("User", "🗣️ (Spoken Audio)")) | |
| else: | |
| chat_history_for_display.append(("AI", turn['content'])) | |
| return chat_history_for_display, turns # Return the formatted chat history for display and the updated history | |
| except Exception as e: | |
| return f"Error: {str(e)}", chat_history # Ensure history is returned even on error | |
| # Define the Gradio interface | |
| iface = gr.Interface( | |
| fn=transcribe_and_respond, | |
| inputs=[ | |
| gr.Audio(sources="microphone", type="filepath", label="Your Audio (Microphone)"), | |
| gr.State([]) # Hidden state to maintain conversation history | |
| ], | |
| outputs=[ | |
| gr.Chatbot(label="Conversation History"), # Display the conversation | |
| gr.State([]) # Hidden state to keep track of the updated conversation history | |
| ], | |
| title="Shuka demo", | |
| description="shuka live demo", | |
| live=True, # Enable live mode for real-time interaction | |
| allow_flagging="auto", | |
| # enable_queue=True | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |