Spaces:

archit11
/

shuka_demo

Sleeping

archit11 commited on Aug 14, 2024

Commit

ab07d9e

verified ·

1 Parent(s): b37983d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import librosa
 import gradio as gr
 import spaces
 pipe = transformers.pipeline(
     model='sarvamai/shuka_v1',
     trust_remote_code=True,
@@ -12,17 +13,28 @@ pipe = transformers.pipeline(
 @spaces.GPU(duration=120)
 def transcribe_and_respond(audio_file):
-    audio, sr = librosa.load(audio_file, sr=16000)
-    turns = [
-        {'role': 'system', 'content': 'Respond naturally and informatively.'},
-        {'role': 'user', 'content': ''}
-    ]
-    response = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
-    return response
 iface = gr.Interface(
     fn=transcribe_and_respond,
     inputs=gr.Audio(sources="microphone", type="filepath"),  # Use the microphone for audio input
@@ -31,4 +43,5 @@ iface = gr.Interface(
     description="Record your voice, and the model will respond naturally and informatively."
 )
 iface.launch()

 import gradio as gr
 import spaces
+# Load the model pipeline on GPU:0
 pipe = transformers.pipeline(
     model='sarvamai/shuka_v1',
     trust_remote_code=True,
 @spaces.GPU(duration=120)
 def transcribe_and_respond(audio_file):
+    try:
+        # Check if the audio file is valid and exists
+        if audio_file is None or not isinstance(audio_file, str):
+            raise ValueError("Invalid audio file input.")
+        # Load the audio using librosa
+        audio, sr = librosa.load(audio_file, sr=16000)
+        # Prepare the conversation turns
+        turns = [
+            {'role': 'system', 'content': 'Respond naturally and informatively.'},
+            {'role': 'user', 'content': ''}
+        ]
+        # Run inference with the pipeline
+        response = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
+        return response
+    except Exception as e:
+        return f"Error processing audio: {str(e)}"
+# Create the Gradio interface with microphone input
 iface = gr.Interface(
     fn=transcribe_and_respond,
     inputs=gr.Audio(sources="microphone", type="filepath"),  # Use the microphone for audio input
     description="Record your voice, and the model will respond naturally and informatively."
 )
+# Launch the Gradio app
 iface.launch()