Spaces:
Build error
Build error
File size: 1,829 Bytes
f481a94 08f9ba3 f481a94 85638c3 f481a94 08f9ba3 f481a94 988375c f481a94 08f9ba3 f481a94 5eb8f47 988375c f481a94 827455e 966d371 973c318 988375c 5eb8f47 88c750a 08f9ba3 f481a94 08f9ba3 5eb8f47 f481a94 08f9ba3 2ba320e 5eb8f47 f481a94 212a765 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
import librosa
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
# config
model_name = "vumichien/wav2vec2-large-xlsr-japanese-hiragana"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)
def process_audio_file(file):
data, sr = librosa.load(file)
if sr != 16000:
data = librosa.resample(data, sr, 16000)
print(data.shape)
inputs = processor(data, sampling_rate=16000, return_tensors="pt", padding=True)
return inputs
def transcribe(micro, file):
if file is not None and micro is None:
input_audio = file
elif file is None and micro is not None:
input_audio = micro
else:
input_audio = file
inputs = process_audio_file(input_audio )
with torch.no_grad():
output_logit = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
pred_ids = torch.argmax(output_logit, dim=-1)
text = processor.batch_decode(pred_ids)[0]
return text
description = "A simple interface to transcribe from spoken Japanese to Hiragana."
article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>."
inputs = [gr.Audio(source="microphone", type="filepath", optional=True),
gr.Audio(source="upload", type="filepath", optional=True),
]
outputs = ["textbox"]
examples = [["samples/BASIC5000_0001.wav",""],
["samples/BASIC5000_0005.wav",""]
]
iface = gr.Interface(
fn=transcribe,
inputs=inputs,
outputs=outputs,
layout="horizontal",
theme="huggingface",
title="Transcribe Japanese audio to Hiragana",
description=description,
article=article,
allow_flagging='never',
examples=examples,
live=True,
)
iface.launch(enable_queue=True)
|