File size: 1,628 Bytes
f481a94
 
 
 
08f9ba3
f481a94
01d02b4
f481a94
 
 
 
 
 
 
08f9ba3
f481a94
 
 
 
 
5eb8f47
f481a94
 
08f9ba3
f481a94
5eb8f47
 
 
f481a94
 
827455e
966d371
5eb8f47
 
 
 
08f9ba3
 
 
f481a94
 
08f9ba3
5eb8f47
f481a94
 
 
08f9ba3
 
 
2ba320e
5eb8f47
f481a94
08f9ba3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
import librosa
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch

# config
model_name = "vumichien/wav2vec2-xls-r-1b-japanese"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)


def process_audio_file(file):
    data, sr = librosa.load(file)
    if sr != 16000:
        data = librosa.resample(data, sr, 16000)
    print(data.shape)
    inputs = processor(data, sampling_rate=16000, return_tensors="pt", padding=True)
    return inputs


def transcribe(file, state=""):
    inputs = process_audio_file(file)
    with torch.no_grad():
        output_logit = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
    pred_ids = torch.argmax(output_logit, dim=-1)
    text = processor.batch_decode(pred_ids)[0]
    state += text + " "
    return state, state


description = "A simple interface to transcribe from spoken Japanese to Hiragana."
article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>."
inputs = [gr.inputs.Audio(source="microphone", type="filepath", optional=True), 
            "state"]
outputs = ["textbox", "state"]
        
examples = [["samples/BASIC5000_0001.wav"],
            ["samples/BASIC5000_0005.wav"]
        ]
iface = gr.Interface(
    fn=transcribe,
    inputs=inputs,
    outputs=outputs,
    layout="horizontal",
    theme="huggingface",
    title="Transcribe Japanese audio to Hiragana",
    description=description,
    article=article,
    allow_flagging='never',
    examples=examples,
    live=True,
)
iface.launch(enable_queue=True, share=True)