|
import os |
|
import gradio as gr |
|
import torch |
|
import librosa |
|
import numpy as np |
|
import soundfile as sf |
|
import requests |
|
|
|
|
|
MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth" |
|
MODEL_PATH = "model/D_v13_20231020.pth" |
|
os.makedirs("model", exist_ok=True) |
|
|
|
def download_model(): |
|
if not os.path.exists(MODEL_PATH): |
|
print("Downloading model...") |
|
response = requests.get(MODEL_URL) |
|
with open(MODEL_PATH, "wb") as f: |
|
f.write(response.content) |
|
print("Model downloaded.") |
|
|
|
download_model() |
|
|
|
|
|
class DummyVoiceChanger(torch.nn.Module): |
|
def __init__(self): |
|
super().__init__() |
|
self.gain = torch.nn.Parameter(torch.tensor(1.0)) |
|
|
|
def forward(self, audio): |
|
audio = torch.tensor(audio, dtype=torch.float32) |
|
return (audio * self.gain).detach().numpy() |
|
|
|
model = DummyVoiceChanger() |
|
|
|
|
|
|
|
|
|
def convert_voice(audio_file): |
|
audio_data, sr = librosa.load(audio_file, sr=16000) |
|
audio_data = librosa.util.fix_length(audio_data, size=16000 * 5) |
|
|
|
converted = model(audio_data) |
|
converted /= np.max(np.abs(converted)) + 1e-6 |
|
|
|
output_path = "output.wav" |
|
sf.write(output_path, converted, 16000) |
|
return output_path |
|
|
|
|
|
interface = gr.Interface( |
|
fn=convert_voice, |
|
inputs=gr.Audio(type="filepath", label="Upload Voice"), |
|
outputs=gr.Audio(type="filepath", label="Converted Voice"), |
|
title="🗣️ AI Voice Changer (No RVC / No TTS)", |
|
description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production." |
|
) |
|
|
|
interface.launch() |