Spaces:

Emma123453
/

Demo

Runtime error

App Files Files Community

Emma123453 commited on May 4

Commit

5df2441

verified ·

1 Parent(s): 8e0e0c7

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from TTS.api import TTS
+from speechbrain.pretrained import SpeakerRecognition
+from transformers import pipeline
+import gradio as gr
+import numpy as np
+import soundfile as sf
+from scipy.signal import resample
+from scipy.io.wavfile import write as write_wav
+from tempfile import NamedTemporaryFile
+import os
+# Load voice cloning model (XTTS)
+tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)
+# Load spoof detection models
+sb = SpeakerRecognition.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="tmp_model")
+ast_pipe = pipeline("audio-classification", model="MattyB95/AST-VoxCelebSpoof-Synthetic-Voice-Detection")
+df_pipe = pipeline("audio-classification", model="MelodyMachine/Deepfake-audio-detection-V2")
+def spoof_and_detect(voice_sample, desired_sr=16000):
+    ref_audio_array, ref_sr = voice_sample
+    # Resample to 16kHz
+    if ref_sr != desired_sr:
+        duration = ref_audio_array.shape[0] / ref_sr
+        num_samples = int(duration * desired_sr)
+        ref_audio_array = resample(ref_audio_array, num_samples)
+        ref_sr = desired_sr
+    # Save reference audio
+    with NamedTemporaryFile(suffix=".wav", mode='wb', delete=False) as ref_wav:
+        ref_temp_path = ref_wav.name
+        write_wav(ref_temp_path, ref_sr, ref_audio_array.astype("float32"))
+    # Clone voice
+    clone_path = ref_temp_path.replace(".wav", "_clone.wav")
+    tts.tts_to_file(
+        text="My voice is my password.",
+        speaker_wav=ref_temp_path,
+        file_path=clone_path,
+        language="en"
+    )
+    # Spoof detection
+    sb_score, sb_label = sb.verify_files(ref_temp_path, clone_path)
+    ast_ref = ast_pipe(ref_temp_path)[0]
+    ast_clone = ast_pipe(clone_path)[0]
+    df_ref = df_pipe(ref_temp_path)[0]
+    df_clone = df_pipe(clone_path)[0]
+    results = {
+        "SpeechBrain": str(sb_label.item()),
+        "AST REF": f"{ast_ref['label']} ({ast_ref['score']:.2f})",
+        "AST CLONE": f"{ast_clone['label']} ({ast_clone['score']:.2f})",
+        "Deepfake REF": f"{df_ref['label']} ({df_ref['score']:.2f})",
+        "Deepfake CLONE": f"{df_clone['label']} ({df_clone['score']:.2f})",
+    }
+    return ref_temp_path, clone_path, results
+demo = gr.Interface(
+    fn=spoof_and_detect,
+    inputs=gr.Audio(source="microphone", type="numpy", label="🎤 Record your voice"),
+    outputs=[
+        gr.Audio(label="🎧 Original"),
+        gr.Audio(label="🎧 Cloned"),
+        gr.JSON(label="🧪 Spoof Detection Results")
+    ],
+    title="🗣️ Voice Cloning + Spoof Detection",
+    description="Clone a speaker's voice and evaluate with 3 spoof detection models."
+)
+demo.launch()