Gttggtt / app.py
Athagi's picture
Update app.py
29c9c4c verified
import os
import gradio as gr
import torch
import librosa
import numpy as np
import soundfile as sf
import requests
# ========== MODEL SETUP ==========
MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth"
MODEL_PATH = "model/D_v13_20231020.pth"
os.makedirs("model", exist_ok=True)
def download_model():
if not os.path.exists(MODEL_PATH):
print("Downloading model...")
response = requests.get(MODEL_URL)
with open(MODEL_PATH, "wb") as f:
f.write(response.content)
print("Model downloaded.")
download_model()
# ========== DUMMY VOICE CHANGER MODEL ==========
class DummyVoiceChanger(torch.nn.Module):
def __init__(self):
super().__init__()
self.gain = torch.nn.Parameter(torch.tensor(1.0))
def forward(self, audio):
audio = torch.tensor(audio, dtype=torch.float32)
return (audio * self.gain).detach().numpy() # FIXED
model = DummyVoiceChanger()
# Skipping real loading of .pth, as it's just a placeholder
# torch.load(MODEL_PATH) would load it here if needed
# ========== INFERENCE FUNCTION ==========
def convert_voice(audio_file):
audio_data, sr = librosa.load(audio_file, sr=16000)
audio_data = librosa.util.fix_length(audio_data, size=16000 * 5)
converted = model(audio_data)
converted /= np.max(np.abs(converted)) + 1e-6
output_path = "output.wav"
sf.write(output_path, converted, 16000)
return output_path
# ========== GRADIO INTERFACE ==========
interface = gr.Interface(
fn=convert_voice,
inputs=gr.Audio(type="filepath", label="Upload Voice"),
outputs=gr.Audio(type="filepath", label="Converted Voice"),
title="🗣️ AI Voice Changer (No RVC / No TTS)",
description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production."
)
interface.launch()