Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import torch
|
4 |
+
import librosa
|
5 |
+
import numpy as np
|
6 |
+
import soundfile as sf
|
7 |
+
import requests
|
8 |
+
|
9 |
+
# ========== DOWNLOAD PRETRAINED MODEL ==========
|
10 |
+
MODEL_PATH = "model/D_v13_20231020.pth"
|
11 |
+
os.makedirs("model", exist_ok=True)
|
12 |
+
|
13 |
+
def download_model():
|
14 |
+
if not os.path.exists(MODEL_PATH):
|
15 |
+
url = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth"
|
16 |
+
print("Downloading model...")
|
17 |
+
response = requests.get(url)
|
18 |
+
with open(MODEL_PATH, "wb") as f:
|
19 |
+
f.write(response.content)
|
20 |
+
print("Model downloaded.")
|
21 |
+
|
22 |
+
download_model()
|
23 |
+
|
24 |
+
# ========== DUMMY MODEL LOADER (Placeholder for real MMVC model) ==========
|
25 |
+
class DummyVoiceChanger(torch.nn.Module):
|
26 |
+
def __init__(self):
|
27 |
+
super().__init__()
|
28 |
+
self.gain = torch.nn.Parameter(torch.tensor(1.0))
|
29 |
+
|
30 |
+
def forward(self, audio):
|
31 |
+
audio = torch.tensor(audio, dtype=torch.float32)
|
32 |
+
return (audio * self.gain).numpy()
|
33 |
+
|
34 |
+
# Load dummy model (Replace this with real MMVC model loading)
|
35 |
+
model = DummyVoiceChanger()
|
36 |
+
state_dict = torch.load(MODEL_PATH, map_location="cpu")
|
37 |
+
# model.load_state_dict(state_dict) # Normally you'd load real weights
|
38 |
+
|
39 |
+
# ========== VOICE CONVERSION ==========
|
40 |
+
def convert_voice(audio):
|
41 |
+
# Load input
|
42 |
+
audio_data, sr = librosa.load(audio, sr=16000)
|
43 |
+
audio_data = librosa.util.fix_length(audio_data, size=16000 * 5)
|
44 |
+
|
45 |
+
# Process with dummy model
|
46 |
+
converted = model(audio_data)
|
47 |
+
converted /= np.max(np.abs(converted)) + 1e-6
|
48 |
+
|
49 |
+
# Save output
|
50 |
+
output_path = "output.wav"
|
51 |
+
sf.write(output_path, converted, 16000)
|
52 |
+
return output_path
|
53 |
+
|
54 |
+
# ========== GRADIO INTERFACE ==========
|
55 |
+
interface = gr.Interface(
|
56 |
+
fn=convert_voice,
|
57 |
+
inputs=gr.Audio(source="upload", type="filepath", label="Upload Voice"),
|
58 |
+
outputs=gr.Audio(type="filepath", label="Converted Voice"),
|
59 |
+
title="AI Voice Changer (No RVC / No TTS)",
|
60 |
+
description="This app loads a pretrained AI model (D_v13_20231020.pth) and simulates voice conversion using PyTorch. Replace dummy model with full MMVC logic for advanced use."
|
61 |
+
)
|
62 |
+
|
63 |
+
interface.launch()
|