Spaces:

Zeyadd-Mostaffa
/

deepfake_audio_model

Sleeping

App Files Files Community

ZeyadMostafa22 commited on Feb 11

Commit

c629c7c

1 Parent(s): db175f8

a5er edit

Browse files

Files changed (1) hide show

app.py +14 -12

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 import torchaudio
 import numpy as np
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 import torchaudio.transforms as T
 MODEL_ID = "Zeyadd-Mostaffa/wav2vec_checkpoints"
@@ -18,15 +19,13 @@ model.to(device)
 label_names = ["fake", "real"]  # According to your label2id = {"fake": 0, "real": 1}
 def classify_audio(audio_file):
     """
     audio_file: path to the uploaded file (WAV, MP3, etc.)
-    Returns: "fake" or "real"
     """
     # 2) Load the audio file
-    # torchaudio returns (waveform, sample_rate)
     waveform, sr = torchaudio.load(audio_file)
     # If stereo, pick one channel or average
@@ -40,14 +39,13 @@ def classify_audio(audio_file):
         waveform = resampler(waveform)
         sr = 16000
     # 3) Preprocess with feature_extractor
     inputs = feature_extractor(
         waveform.numpy(),
         sampling_rate=sr,
         return_tensors="pt",
         truncation=True,
-        max_length=int(16000* 6.0),  # 6 second max
     )
     # Move everything to device
@@ -55,20 +53,24 @@ def classify_audio(audio_file):
     with torch.no_grad():
         logits = model(input_values).logits
-        pred_id = torch.argmax(logits, dim=-1).item()
-    # 4) Return label text
-    predicted_label = label_names[pred_id]
-    return predicted_label
-# 5) Build Gradio interface
 demo = gr.Interface(
     fn=classify_audio,
-    inputs=gr.Audio( type="filepath"),
     outputs="text",
     title="Wav2Vec2 Deepfake Detection",
-    description="Upload an audio sample to check if it is fake or real."
 )
 if __name__ == "__main__":

 import torchaudio
 import numpy as np
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
+import torch.nn.functional as F
 import torchaudio.transforms as T
 MODEL_ID = "Zeyadd-Mostaffa/wav2vec_checkpoints"
 label_names = ["fake", "real"]  # According to your label2id = {"fake": 0, "real": 1}
 def classify_audio(audio_file):
     """
     audio_file: path to the uploaded file (WAV, MP3, etc.)
+    Returns: predicted label and confidence score
     """
     # 2) Load the audio file
     waveform, sr = torchaudio.load(audio_file)
     # If stereo, pick one channel or average
         waveform = resampler(waveform)
         sr = 16000
     # 3) Preprocess with feature_extractor
     inputs = feature_extractor(
         waveform.numpy(),
         sampling_rate=sr,
         return_tensors="pt",
         truncation=True,
+        max_length=int(16000 * 6.0),  # 6 second max
     )
     # Move everything to device
     with torch.no_grad():
         logits = model(input_values).logits
+        # 4) Calculate probabilities using softmax
+        probabilities = F.softmax(logits, dim=-1)
+        # Get predicted label and confidence
+        confidence, pred_id = torch.max(probabilities, dim=-1)
+        predicted_label = label_names[pred_id.item()]
+    # 5) Return label and confidence percentage
+    return f"Prediction: {predicted_label}, Confidence: {confidence.item() * 100:.2f}%"
+# 6) Build Gradio interface
 demo = gr.Interface(
     fn=classify_audio,
+    inputs=gr.Audio(type="filepath"),
     outputs="text",
     title="Wav2Vec2 Deepfake Detection",
+    description="Upload an audio sample to check if it is fake or real, along with confidence."
 )
 if __name__ == "__main__":