File size: 1,090 Bytes
f84f4f1
 
 
 
 
 
 
 
 
 
 
 
34ef833
 
f84f4f1
 
e7f5f31
f84f4f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from transformers import pipeline
from gradio_client import Client
asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu")

def transcribe_the_command(audio):
      import soundfile as sf
      sample_rate, audio_data = audio
      file_name = "recorded_audio.wav"
      sf.write(file_name, audio_data, sample_rate)
    # Convert stereo to mono by averaging the two channels

      transcript = asr_pipe(file_name)["text"]
      print(transcript)

      client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
      result = client.predict(
				transcript,
				api_name="/chat"
      )
    
      return result
# get_text_from_voice("urdu.wav")
import gradio as gr


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=gr.inputs.Audio(label="Recorded Audio",source="microphone"),
    outputs="text",
    title="Whisper Small Urdu Command",
    description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)

iface.launch()