File size: 6,151 Bytes
3fc2ce9
3c32de9
ebe2b18
3c32de9
ebe2b18
 
3c32de9
5187bce
1a7861d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151

from fastapi import FastAPI

app = FastAPI()

@app.get("/")
def greet_json():
    return {"Hello": "World!"}


#--------------------------------------------------------------------------------------------------------------------

import os
import numpy as np
import tensorflow as tf
import tensorflow
import librosa
import matplotlib.pyplot as plt
# import gradio as gr

import os
os.environ["TORCH_HOME"] = "/tmp/torch_cache"


from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from transformers import pipeline

class UnifiedDeepfakeDetector:
    def __init__(self):
        self.input_shape = (224, 224, 3)
        self.vgg_model = self.build_vgg16_model()
        self.dense_model = tf.keras.models.load_model('deepfake_detection_model.h5')
        self.cnn_model = tf.keras.models.load_model('audio_deepfake_detection_model_cnn.h5')
        self.melody_machine = pipeline(model="MelodyMachine/Deepfake-audio-detection-V2")

    def build_vgg16_model(self):
        base_model = VGG16(weights='imagenet', include_top=False, input_shape=self.input_shape)
        for layer in base_model.layers:
            layer.trainable = False

        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(512, activation='relu')(x)
        x = Dropout(0.5)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        output = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=base_model.input, outputs=output)
        model.compile(optimizer=Adam(learning_rate=0.0001),
                     loss='binary_crossentropy',
                     metrics=['accuracy'])
        return model

    def audio_to_spectrogram(self, file_path, plot=False):
        try:
            audio, sr = librosa.load(file_path, duration=5.0, sr=22050)
            spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=224, fmax=8000)
            spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

            if plot:
                plt.figure(figsize=(12, 6))
                librosa.display.specshow(spectrogram_db, y_axis='mel', x_axis='time', cmap='viridis')
                plt.colorbar(format='%+2.0f dB')
                plt.title('Mel Spectrogram Analysis')
                plot_path = 'spectrogram_plot.png'
                plt.savefig(plot_path, dpi=300, bbox_inches='tight')
                plt.close()
                return plot_path

            spectrogram_norm = (spectrogram_db - spectrogram_db.min()) / (spectrogram_db.max() - spectrogram_db.min())
            spectrogram_rgb = np.stack([spectrogram_norm]*3, axis=-1)
            spectrogram_resized = tf.image.resize(spectrogram_rgb, (224, 224))
            return preprocess_input(spectrogram_resized * 255)

        except Exception as e:
            print(f"Spectrogram error: {e}")
            return None

    def analyze_audio_rf(self, audio_path, model_choice="all"):
        results = {}
        plots = {}
        r = []
        audio_features = {}

        try:
            # Load audio and extract basic features
            audio, sr = librosa.load(audio_path, res_type="kaiser_fast")
            audio_features = {
                "sample_rate": sr,
                "duration": librosa.get_duration(y=audio, sr=sr),
                "rms_energy": float(np.mean(librosa.feature.rms(y=audio))),
                "zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y=audio)))
            }

            # VGG16 Analysis
            if model_choice in ["VGG16", "all"]:
                spec = self.audio_to_spectrogram(audio_path)
                if spec is not None:
                    pred = self.vgg_model.predict(np.expand_dims(spec, axis=0))[0][0]
                    results["VGG16"] = {
                        "prediction": "FAKE" if pred > 0.5 else "REAL",
                        "confidence": float(pred if pred > 0.5 else 1 - pred),
                        "raw_score": float(pred)
                    }
                    plots["spectrogram"] = self.audio_to_spectrogram(audio_path, plot=True)
                    r.append("FAKE" if pred > 0.5 else "REAL")

            # Dense Model Analysis
            if model_choice in ["Dense", "all"]:
                mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
                mfcc_scaled = np.mean(mfcc.T, axis=0).reshape(1, -1)
                pred = self.dense_model.predict(mfcc_scaled)
                results["Dense"] = {
                    "prediction": "FAKE" if np.argmax(pred[0]) == 0 else "REAL",
                    "confidence": float(np.max(pred[0])),
                    "raw_scores": pred[0].tolist()
                }
                r.append("FAKE" if np.argmax(pred[0]) == 0 else "REAL")

            # CNN Model Analysis
            if model_choice in ["CNN", "all"]:
                mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
                mfcc_scaled = np.mean(mfcc.T, axis=0).reshape(1, 40, 1, 1)
                pred = self.cnn_model.predict(mfcc_scaled)
                results["CNN"] = {
                    "prediction": "FAKE" if np.argmax(pred[0]) == 0 else "REAL",
                    "confidence": float(np.max(pred[0])),
                    "raw_scores": pred[0].tolist()
                }
                r.append("FAKE" if np.argmax(pred[0]) == 0 else "REAL")

            # Melody Machine Analysis
            if model_choice in ["MelodyMachine", "all"]:
                result = self.melody_machine(audio_path)
                best_pred = max(result, key=lambda x: x['score'])
                results["MelodyMachine"] = {
                    "prediction": best_pred['label'].upper(),
                    "confidence": float(best_pred['score']),
                    "all_predictions": result
                }
                r.append(best_pred['label'].upper())

            return r

        except Exception as e:
            print(f"Analysis error: {e}")
            return None, None, None