Spaces:

Athagi
/

Gttggtt

Sleeping

App Files Files Community

Gttggtt / app.py

Athagi

Update app.py

29c9c4c verified 2 months ago

raw

history blame contribute delete

1.91 kB

	import os
	import gradio as gr
	import torch
	import librosa
	import numpy as np
	import soundfile as sf
	import requests

	# ========== MODEL SETUP ==========
	MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth"
	MODEL_PATH = "model/D_v13_20231020.pth"
	os.makedirs("model", exist_ok=True)

	def download_model():
	if not os.path.exists(MODEL_PATH):
	print("Downloading model...")
	response = requests.get(MODEL_URL)
	with open(MODEL_PATH, "wb") as f:
	f.write(response.content)
	print("Model downloaded.")

	download_model()

	# ========== DUMMY VOICE CHANGER MODEL ==========
	class DummyVoiceChanger(torch.nn.Module):
	def __init__(self):
	super().__init__()
	self.gain = torch.nn.Parameter(torch.tensor(1.0))

	def forward(self, audio):
	audio = torch.tensor(audio, dtype=torch.float32)
	return (audio * self.gain).detach().numpy() # FIXED

	model = DummyVoiceChanger()
	# Skipping real loading of .pth, as it's just a placeholder
	# torch.load(MODEL_PATH) would load it here if needed

	# ========== INFERENCE FUNCTION ==========
	def convert_voice(audio_file):
	audio_data, sr = librosa.load(audio_file, sr=16000)
	audio_data = librosa.util.fix_length(audio_data, size=16000 * 5)

	converted = model(audio_data)
	converted /= np.max(np.abs(converted)) + 1e-6

	output_path = "output.wav"
	sf.write(output_path, converted, 16000)
	return output_path

	# ========== GRADIO INTERFACE ==========
	interface = gr.Interface(
	fn=convert_voice,
	inputs=gr.Audio(type="filepath", label="Upload Voice"),
	outputs=gr.Audio(type="filepath", label="Converted Voice"),
	title="🗣️ AI Voice Changer (No RVC / No TTS)",
	description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production."
	)

	interface.launch()