File size: 2,062 Bytes
5cb9c90 57d9268 5cb9c90 9ecefd1 57d9268 5cb9c90 103d57b 5cb9c90 27c943a 103d57b 57d9268 9ecefd1 57d9268 103d57b 57d9268 103d57b 57d9268 103d57b 57d9268 9ecefd1 5cb9c90 57d9268 98c06b0 b2b2704 98c06b0 666f662 2fdad2f 666f662 2fdad2f b2b2704 666f662 57d9268 2fdad2f 98c06b0 2fdad2f 98c06b0 57d9268 5cb9c90 57d9268 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# coding=utf-8
import gradio as gr
import numpy as np
import soundfile as sf
import spaces
import torch
import torchaudio
from sv import process_audio
@spaces.GPU
def model_inference(input_wav, language):
# Simplify language selection
language = language if language else "auto"
# Handle input_wav format
if isinstance(input_wav, tuple):
fs, input_wav = input_wav
input_wav = input_wav.astype(np.float32) / np.iinfo(np.int16).max
input_wav = input_wav.mean(-1) if len(input_wav.shape) > 1 else input_wav
if fs != 16000:
resampler = torchaudio.transforms.Resample(fs, 16000)
input_wav = resampler(torch.from_numpy(input_wav).float()[None, :])[
0
].numpy()
# Process audio
with sf.SoundFile("temp.wav", "w", samplerate=16000, channels=1) as f:
f.write(input_wav)
result = process_audio("temp.wav", language=language)
return result
def launch():
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Cantonese Call Transcriber")
gr.Markdown("## Try an example:")
# Define components first
audio_inputs = gr.Audio(label="Input")
text_outputs = gr.Textbox(lines=10, label="Output")
# Place the Examples component above the input
with gr.Row():
gr.Examples(
examples=[["example/scb.mp3"]],
inputs=[audio_inputs],
outputs=text_outputs,
fn=lambda x: model_inference(x, "yue"),
examples_per_page=1,
scale=1,
)
with gr.Row():
with gr.Column(scale=2):
# Audio input is already defined, just reference it here
audio_inputs
fn_button = gr.Button("Process Audio", variant="primary")
with gr.Column(scale=3):
# Text output is already defined, just reference it here
text_outputs
demo.launch()
if __name__ == "__main__":
launch()
|