import os
import gradio as gr
import torch
import librosa
import numpy as np
import soundfile as sf
import requests

# ========== MODEL SETUP ==========
MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth"
MODEL_PATH = "model/D_v13_20231020.pth"
os.makedirs("model", exist_ok=True)

def download_model():
    if not os.path.exists(MODEL_PATH):
        print("Downloading model...")
        response = requests.get(MODEL_URL)
        with open(MODEL_PATH, "wb") as f:
            f.write(response.content)
        print("Model downloaded.")

download_model()

# ========== DUMMY VOICE CHANGER MODEL ==========
class DummyVoiceChanger(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.gain = torch.nn.Parameter(torch.tensor(1.0))

    def forward(self, audio):
        audio = torch.tensor(audio, dtype=torch.float32)
        return (audio * self.gain).detach().numpy()  # FIXED

model = DummyVoiceChanger()
# Skipping real loading of .pth, as it's just a placeholder
# torch.load(MODEL_PATH) would load it here if needed

# ========== INFERENCE FUNCTION ==========
def convert_voice(audio_file):
    audio_data, sr = librosa.load(audio_file, sr=16000)
    audio_data = librosa.util.fix_length(audio_data, size=16000 * 5)

    converted = model(audio_data)
    converted /= np.max(np.abs(converted)) + 1e-6

    output_path = "output.wav"
    sf.write(output_path, converted, 16000)
    return output_path

# ========== GRADIO INTERFACE ==========
interface = gr.Interface(
    fn=convert_voice,
    inputs=gr.Audio(type="filepath", label="Upload Voice"),
    outputs=gr.Audio(type="filepath", label="Converted Voice"),
    title="🗣️ AI Voice Changer (No RVC / No TTS)",
    description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production."
)

interface.launch()