Spaces:

archit11
/

shuka_demo

Running on Zero

File size: 2,045 Bytes

import transformers
import gradio as gr
import torch
import numpy as np
from typing import Dict, List, Tuple
import spaces
import librosa
import soundfile as sf

MODEL_NAME = 'sarvamai/shuka_v1'
SAMPLE_RATE = 16000
MAX_NEW_TOKENS = 256

def load_pipeline():
    return transformers.pipeline(
        model=MODEL_NAME,
        trust_remote_code=True,
        device=0,
        torch_dtype=torch.bfloat16
    )

pipe = load_pipeline()

def create_conversation_turns(prompt: str) -> List[Dict[str, str]]:
    return [
        {'role': 'system', 'content': 'Respond naturally and informatively.'},
        {'role': 'user', 'content': prompt}
    ]

@spaces.GPU(duration=120)
def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
    try:
        # Unpack the audio input
        sample_rate, audio = audio_input
        
        # Ensure audio is float32
        if audio.dtype != np.float32:
            audio = audio.astype(np.float32)
        
        if sample_rate != SAMPLE_RATE:
            audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
        
        # Convert the audio to WAV format
        wav_data = librosa.util.buf_to_float(audio, n_bytes=2)
        sf.write('temp_audio.wav', wav_data, SAMPLE_RATE)
        
        # Prepare the inputs for the model
        turns = create_conversation_turns("")
        inputs = {
            'audio': wav_data,
            'turns': turns,
            'sampling_rate': SAMPLE_RATE
        }
        
        response = pipe(inputs, max_new_tokens=MAX_NEW_TOKENS)
        
        return response
    except Exception as e:
        return f"Error processing audio: {str(e)}"

iface = gr.Interface(
    fn=transcribe_and_respond,
    inputs=gr.Audio(sources="microphone", type="numpy"),
    outputs="text",
    title="Live Voice Input for Transcription and Response",
    description="Speak into your microphone, and the model will respond naturally and informatively.",
    live=True
)

# Launch the app
if __name__ == "__main__":
    iface.launch()