File size: 2,312 Bytes
06fc5c8 7730969 63cc258 7730969 63cc258 7730969 a23d52e 06fc5c8 7730969 63cc258 7730969 06fc5c8 7730969 06fc5c8 63cc258 06fc5c8 7730969 63cc258 a23d52e 7730969 06fc5c8 63cc258 7730969 06fc5c8 63cc258 06fc5c8 8581ee6 06fc5c8 a23d52e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import time
import json
import gradio as gr
import torch
import torchaudio
import numpy as np
from denoiser.demucs import Demucs
from pydub import AudioSegment
from transformers import AutoModelForSequenceClassification, AutoTokenizer
# 设置 Hugging Face Hub 的 Access Token
auth_token = os.getenv("HF_TOKEN")
# 加载私有模型
model_id = "DeepLearning101/Speech-Quality-Inspection_Meta-Denoiser"
model = AutoModelForSequenceClassification.from_pretrained(model_id, token=auth_token)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
def transcribe(file_upload, microphone):
file = microphone if microphone is not None else file_upload
demucs_model = Demucs(hidden=64)
state_dict = torch.load("path_to_model_checkpoint", map_location='cpu') # 请确保提供正确的模型文件路径
demucs_model.load_state_dict(state_dict)
x, sr = torchaudio.load(file)
out = demucs_model(x[None])[0]
out = out / max(out.abs().max().item(), 1)
torchaudio.save('enhanced.wav', out, sr)
enhanced = AudioSegment.from_wav('enhanced.wav') # 只有去完噪的需要降bitrate再做语音识别
enhanced.export('enhanced.wav', format="wav", bitrate="256k")
# 假设模型是用于文本分类
inputs = tokenizer("enhanced.wav", return_tensors="pt")
outputs = model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
return "enhanced.wav", predictions
demo = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(type="filepath", label="语音质检麦克风实时录音"),
gr.Audio(type="filepath", label="语音质检原始音档"),
],
outputs=[
gr.Audio(type="filepath", label="Output"),
gr.Textbox(label="Model Predictions")
],
title="<p style='text-align: center'><a href='https://www.twman.org/AI' target='_blank'>语音质检噪音去除 (语音增强):Meta Denoiser</a>",
description="为了提升语音识别的效果,可以在识别前先进行噪音去除",
allow_flagging="never",
examples=[
["exampleAudio/15s_2020-03-27_sep1.wav"],
["exampleAudio/13s_2020-03-27_sep2.wav"],
["exampleAudio/30s_2020-04-23_sep1.wav"],
["exampleAudio/15s_2020-04-23_sep2.wav"],
],
)
demo.launch(debug=True)
|