Spaces:

Athagi
/

Gttggtt

Sleeping

File size: 1,910 Bytes

f48cab7
 
 
 
 
 
 
 
29c9c4c
 
f48cab7
 
 
 
 
 
29c9c4c
f48cab7
 
 
 
 
 
29c9c4c
f48cab7
 
 
 
 
 
 
29c9c4c
f48cab7
 
29c9c4c
 
f48cab7
29c9c4c
3800f2c
 
f48cab7
3800f2c
f48cab7
 
3800f2c
f48cab7
 
 
 
 
 
 
3800f2c
f48cab7
29c9c4c
 
f48cab7

import os
import gradio as gr
import torch
import librosa
import numpy as np
import soundfile as sf
import requests

# ========== MODEL SETUP ==========
MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth"
MODEL_PATH = "model/D_v13_20231020.pth"
os.makedirs("model", exist_ok=True)

def download_model():
    if not os.path.exists(MODEL_PATH):
        print("Downloading model...")
        response = requests.get(MODEL_URL)
        with open(MODEL_PATH, "wb") as f:
            f.write(response.content)
        print("Model downloaded.")

download_model()

# ========== DUMMY VOICE CHANGER MODEL ==========
class DummyVoiceChanger(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.gain = torch.nn.Parameter(torch.tensor(1.0))

    def forward(self, audio):
        audio = torch.tensor(audio, dtype=torch.float32)
        return (audio * self.gain).detach().numpy()  # FIXED

model = DummyVoiceChanger()
# Skipping real loading of .pth, as it's just a placeholder
# torch.load(MODEL_PATH) would load it here if needed

# ========== INFERENCE FUNCTION ==========
def convert_voice(audio_file):
    audio_data, sr = librosa.load(audio_file, sr=16000)
    audio_data = librosa.util.fix_length(audio_data, size=16000 * 5)

    converted = model(audio_data)
    converted /= np.max(np.abs(converted)) + 1e-6

    output_path = "output.wav"
    sf.write(output_path, converted, 16000)
    return output_path

# ========== GRADIO INTERFACE ==========
interface = gr.Interface(
    fn=convert_voice,
    inputs=gr.Audio(type="filepath", label="Upload Voice"),
    outputs=gr.Audio(type="filepath", label="Converted Voice"),
    title="🗣️ AI Voice Changer (No RVC / No TTS)",
    description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production."
)

interface.launch()