Emma123453 commited on
Commit
5df2441
·
verified ·
1 Parent(s): 8e0e0c7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from TTS.api import TTS
2
+ from speechbrain.pretrained import SpeakerRecognition
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+ import numpy as np
6
+ import soundfile as sf
7
+ from scipy.signal import resample
8
+ from scipy.io.wavfile import write as write_wav
9
+ from tempfile import NamedTemporaryFile
10
+ import os
11
+
12
+ # Load voice cloning model (XTTS)
13
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)
14
+
15
+ # Load spoof detection models
16
+ sb = SpeakerRecognition.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="tmp_model")
17
+ ast_pipe = pipeline("audio-classification", model="MattyB95/AST-VoxCelebSpoof-Synthetic-Voice-Detection")
18
+ df_pipe = pipeline("audio-classification", model="MelodyMachine/Deepfake-audio-detection-V2")
19
+
20
+ def spoof_and_detect(voice_sample, desired_sr=16000):
21
+ ref_audio_array, ref_sr = voice_sample
22
+
23
+ # Resample to 16kHz
24
+ if ref_sr != desired_sr:
25
+ duration = ref_audio_array.shape[0] / ref_sr
26
+ num_samples = int(duration * desired_sr)
27
+ ref_audio_array = resample(ref_audio_array, num_samples)
28
+ ref_sr = desired_sr
29
+
30
+ # Save reference audio
31
+ with NamedTemporaryFile(suffix=".wav", mode='wb', delete=False) as ref_wav:
32
+ ref_temp_path = ref_wav.name
33
+ write_wav(ref_temp_path, ref_sr, ref_audio_array.astype("float32"))
34
+
35
+ # Clone voice
36
+ clone_path = ref_temp_path.replace(".wav", "_clone.wav")
37
+ tts.tts_to_file(
38
+ text="My voice is my password.",
39
+ speaker_wav=ref_temp_path,
40
+ file_path=clone_path,
41
+ language="en"
42
+ )
43
+
44
+ # Spoof detection
45
+ sb_score, sb_label = sb.verify_files(ref_temp_path, clone_path)
46
+ ast_ref = ast_pipe(ref_temp_path)[0]
47
+ ast_clone = ast_pipe(clone_path)[0]
48
+ df_ref = df_pipe(ref_temp_path)[0]
49
+ df_clone = df_pipe(clone_path)[0]
50
+
51
+ results = {
52
+ "SpeechBrain": str(sb_label.item()),
53
+ "AST REF": f"{ast_ref['label']} ({ast_ref['score']:.2f})",
54
+ "AST CLONE": f"{ast_clone['label']} ({ast_clone['score']:.2f})",
55
+ "Deepfake REF": f"{df_ref['label']} ({df_ref['score']:.2f})",
56
+ "Deepfake CLONE": f"{df_clone['label']} ({df_clone['score']:.2f})",
57
+ }
58
+
59
+ return ref_temp_path, clone_path, results
60
+
61
+ demo = gr.Interface(
62
+ fn=spoof_and_detect,
63
+ inputs=gr.Audio(source="microphone", type="numpy", label="🎤 Record your voice"),
64
+ outputs=[
65
+ gr.Audio(label="🎧 Original"),
66
+ gr.Audio(label="🎧 Cloned"),
67
+ gr.JSON(label="🧪 Spoof Detection Results")
68
+ ],
69
+ title="🗣️ Voice Cloning + Spoof Detection",
70
+ description="Clone a speaker's voice and evaluate with 3 spoof detection models."
71
+ )
72
+
73
+ demo.launch()