Spaces:

archit11
/

shuka_demo

Sleeping

archit11 commited on Aug 14, 2024

Commit

62dda31

verified ·

1 Parent(s): ee83532

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,12 +5,11 @@ import numpy as np
 from typing import Dict, List, Tuple
 import spaces
 import librosa
-# Constants
 MODEL_NAME = 'sarvamai/shuka_v1'
 SAMPLE_RATE = 16000
 MAX_NEW_TOKENS = 256
-# Load the pipeline
 def load_pipeline():
     return transformers.pipeline(
         model=MODEL_NAME,
@@ -37,11 +36,9 @@ def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
         if audio.dtype != np.float32:
             audio = audio.astype(np.float32)
-        # Resample if necessary
         if sample_rate != SAMPLE_RATE:
             audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
-        # Create input for the pipeline
         turns = create_conversation_turns("<|audio|>")
         inputs = {
             'audio': audio,
@@ -49,14 +46,12 @@ def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
             'sampling_rate': SAMPLE_RATE
         }
-        # Generate response
         response = pipe(inputs, max_new_tokens=MAX_NEW_TOKENS)
         return response
     except Exception as e:
         return f"Error processing audio: {str(e)}"
-# Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe_and_respond,
     inputs=gr.Audio(sources="microphone", type="numpy"),

 from typing import Dict, List, Tuple
 import spaces
 import librosa
 MODEL_NAME = 'sarvamai/shuka_v1'
 SAMPLE_RATE = 16000
 MAX_NEW_TOKENS = 256
 def load_pipeline():
     return transformers.pipeline(
         model=MODEL_NAME,
         if audio.dtype != np.float32:
             audio = audio.astype(np.float32)
         if sample_rate != SAMPLE_RATE:
             audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
         turns = create_conversation_turns("<|audio|>")
         inputs = {
             'audio': audio,
             'sampling_rate': SAMPLE_RATE
         }
         response = pipe(inputs, max_new_tokens=MAX_NEW_TOKENS)
         return response
     except Exception as e:
         return f"Error processing audio: {str(e)}"
 iface = gr.Interface(
     fn=transcribe_and_respond,
     inputs=gr.Audio(sources="microphone", type="numpy"),