import os import gradio as gr import torch import librosa import numpy as np import soundfile as sf import requests # ========== MODEL SETUP ========== MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth" MODEL_PATH = "model/D_v13_20231020.pth" os.makedirs("model", exist_ok=True) def download_model(): if not os.path.exists(MODEL_PATH): print("Downloading model...") response = requests.get(MODEL_URL) with open(MODEL_PATH, "wb") as f: f.write(response.content) print("Model downloaded.") download_model() # ========== DUMMY VOICE CHANGER MODEL ========== class DummyVoiceChanger(torch.nn.Module): def __init__(self): super().__init__() self.gain = torch.nn.Parameter(torch.tensor(1.0)) def forward(self, audio): audio = torch.tensor(audio, dtype=torch.float32) return (audio * self.gain).detach().numpy() # FIXED model = DummyVoiceChanger() # Skipping real loading of .pth, as it's just a placeholder # torch.load(MODEL_PATH) would load it here if needed # ========== INFERENCE FUNCTION ========== def convert_voice(audio_file): audio_data, sr = librosa.load(audio_file, sr=16000) audio_data = librosa.util.fix_length(audio_data, size=16000 * 5) converted = model(audio_data) converted /= np.max(np.abs(converted)) + 1e-6 output_path = "output.wav" sf.write(output_path, converted, 16000) return output_path # ========== GRADIO INTERFACE ========== interface = gr.Interface( fn=convert_voice, inputs=gr.Audio(type="filepath", label="Upload Voice"), outputs=gr.Audio(type="filepath", label="Converted Voice"), title="🗣️ AI Voice Changer (No RVC / No TTS)", description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production." ) interface.launch()