Spaces:

archit11
/

shuka_demo

Running on Zero

archit11 commited on Aug 14, 2024

Commit

b268601

verified ·

1 Parent(s): 70351e3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import transformers
 import gradio as gr
 import torch
 import numpy as np
-from typing import Dict, List
 import spaces
 # Constants
@@ -11,12 +11,15 @@ SAMPLE_RATE = 16000
 MAX_NEW_TOKENS = 256
 # Load the pipeline
-pipe = transformers.pipeline(
-    model=MODEL_NAME,
-    trust_remote_code=True,
-    device=0,
-    torch_dtype='bfloat16'
-)
 def create_conversation_turns(prompt: str) -> List[Dict[str, str]]:
     return [
@@ -25,17 +28,25 @@ def create_conversation_turns(prompt: str) -> List[Dict[str, str]]:
     ]
 @spaces.GPU(duration=120)
-def transcribe_and_respond(audio: np.ndarray) -> str:
     try:
         # Ensure audio is float32
         if audio.dtype != np.float32:
             audio = audio.astype(np.float32)
         # Create input for the pipeline
         turns = create_conversation_turns("<|audio|>")
         inputs = {
             'audio': audio,
             'turns': turns,
         }
         # Generate response

 import gradio as gr
 import torch
 import numpy as np
+from typing import Dict, List, Tuple
 import spaces
 # Constants
 MAX_NEW_TOKENS = 256
 # Load the pipeline
+def load_pipeline():
+    return transformers.pipeline(
+        model=MODEL_NAME,
+        trust_remote_code=True,
+        device=0,
+        torch_dtype=torch.bfloat16
+    )
+pipe = load_pipeline()
 def create_conversation_turns(prompt: str) -> List[Dict[str, str]]:
     return [
     ]
 @spaces.GPU(duration=120)
+def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
     try:
+        # Unpack the audio input
+        sample_rate, audio = audio_input
         # Ensure audio is float32
         if audio.dtype != np.float32:
             audio = audio.astype(np.float32)
+        # Resample if necessary
+        if sample_rate != SAMPLE_RATE:
+            audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
         # Create input for the pipeline
         turns = create_conversation_turns("<|audio|>")
         inputs = {
             'audio': audio,
             'turns': turns,
+            'sampling_rate': SAMPLE_RATE
         }
         # Generate response