Spaces:
Sleeping
Sleeping
File size: 6,151 Bytes
3fc2ce9 3c32de9 ebe2b18 3c32de9 ebe2b18 3c32de9 5187bce 1a7861d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
from fastapi import FastAPI
app = FastAPI()
@app.get("/")
def greet_json():
return {"Hello": "World!"}
#--------------------------------------------------------------------------------------------------------------------
import os
import numpy as np
import tensorflow as tf
import tensorflow
import librosa
import matplotlib.pyplot as plt
# import gradio as gr
import os
os.environ["TORCH_HOME"] = "/tmp/torch_cache"
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from transformers import pipeline
class UnifiedDeepfakeDetector:
def __init__(self):
self.input_shape = (224, 224, 3)
self.vgg_model = self.build_vgg16_model()
self.dense_model = tf.keras.models.load_model('deepfake_detection_model.h5')
self.cnn_model = tf.keras.models.load_model('audio_deepfake_detection_model_cnn.h5')
self.melody_machine = pipeline(model="MelodyMachine/Deepfake-audio-detection-V2")
def build_vgg16_model(self):
base_model = VGG16(weights='imagenet', include_top=False, input_shape=self.input_shape)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=0.0001),
loss='binary_crossentropy',
metrics=['accuracy'])
return model
def audio_to_spectrogram(self, file_path, plot=False):
try:
audio, sr = librosa.load(file_path, duration=5.0, sr=22050)
spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=224, fmax=8000)
spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
if plot:
plt.figure(figsize=(12, 6))
librosa.display.specshow(spectrogram_db, y_axis='mel', x_axis='time', cmap='viridis')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel Spectrogram Analysis')
plot_path = 'spectrogram_plot.png'
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()
return plot_path
spectrogram_norm = (spectrogram_db - spectrogram_db.min()) / (spectrogram_db.max() - spectrogram_db.min())
spectrogram_rgb = np.stack([spectrogram_norm]*3, axis=-1)
spectrogram_resized = tf.image.resize(spectrogram_rgb, (224, 224))
return preprocess_input(spectrogram_resized * 255)
except Exception as e:
print(f"Spectrogram error: {e}")
return None
def analyze_audio_rf(self, audio_path, model_choice="all"):
results = {}
plots = {}
r = []
audio_features = {}
try:
# Load audio and extract basic features
audio, sr = librosa.load(audio_path, res_type="kaiser_fast")
audio_features = {
"sample_rate": sr,
"duration": librosa.get_duration(y=audio, sr=sr),
"rms_energy": float(np.mean(librosa.feature.rms(y=audio))),
"zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y=audio)))
}
# VGG16 Analysis
if model_choice in ["VGG16", "all"]:
spec = self.audio_to_spectrogram(audio_path)
if spec is not None:
pred = self.vgg_model.predict(np.expand_dims(spec, axis=0))[0][0]
results["VGG16"] = {
"prediction": "FAKE" if pred > 0.5 else "REAL",
"confidence": float(pred if pred > 0.5 else 1 - pred),
"raw_score": float(pred)
}
plots["spectrogram"] = self.audio_to_spectrogram(audio_path, plot=True)
r.append("FAKE" if pred > 0.5 else "REAL")
# Dense Model Analysis
if model_choice in ["Dense", "all"]:
mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
mfcc_scaled = np.mean(mfcc.T, axis=0).reshape(1, -1)
pred = self.dense_model.predict(mfcc_scaled)
results["Dense"] = {
"prediction": "FAKE" if np.argmax(pred[0]) == 0 else "REAL",
"confidence": float(np.max(pred[0])),
"raw_scores": pred[0].tolist()
}
r.append("FAKE" if np.argmax(pred[0]) == 0 else "REAL")
# CNN Model Analysis
if model_choice in ["CNN", "all"]:
mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
mfcc_scaled = np.mean(mfcc.T, axis=0).reshape(1, 40, 1, 1)
pred = self.cnn_model.predict(mfcc_scaled)
results["CNN"] = {
"prediction": "FAKE" if np.argmax(pred[0]) == 0 else "REAL",
"confidence": float(np.max(pred[0])),
"raw_scores": pred[0].tolist()
}
r.append("FAKE" if np.argmax(pred[0]) == 0 else "REAL")
# Melody Machine Analysis
if model_choice in ["MelodyMachine", "all"]:
result = self.melody_machine(audio_path)
best_pred = max(result, key=lambda x: x['score'])
results["MelodyMachine"] = {
"prediction": best_pred['label'].upper(),
"confidence": float(best_pred['score']),
"all_predictions": result
}
r.append(best_pred['label'].upper())
return r
except Exception as e:
print(f"Analysis error: {e}")
return None, None, None |