File size: 2,501 Bytes
be0f58d
 
ca254db
be0f58d
 
7ec95bc
ca254db
4ce6045
be0f58d
 
 
 
e698f0f
be0f58d
 
 
 
e698f0f
4ce6045
 
e698f0f
be0f58d
e698f0f
be0f58d
 
4ce6045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca254db
 
 
 
 
 
 
 
7ec95bc
be0f58d
 
0a56869
e698f0f
be0f58d
e698f0f
 
 
4ce6045
ca254db
be0f58d
e698f0f
ca254db
e698f0f
be0f58d
e698f0f
be0f58d
e698f0f
 
 
0a56869
e698f0f
0a56869
be0f58d
 
 
96e306d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import json
import numpy as np
from flask import Flask, jsonify, request
from transformers import pipeline
from pydub import AudioSegment
from scipy.io import wavfile
from io import BytesIO

# Create a Flask app
app = Flask(__name__)

# Initialize models at the start of the API
audio_model = None

def download_models():
    global audio_model
    print("Downloading models...")
    # Download and load the audio model
    audio_model = pipeline("audio-classification", model="MelodyMachine/Deepfake-audio-detection-V2")
    print("Model downloaded and ready to use.")

# Download model when the server starts
download_models()

def preprocess_audio(file):
    # Load audio file
    audio = AudioSegment.from_file(file)
    
    # Convert audio to mono and normalize volume
    audio = audio.set_channels(1).set_frame_rate(16000)
    
    # Ensure audio is of a standard length (e.g., 10 seconds)
    duration_ms = len(audio)
    target_duration_ms = 10000  # Target duration in milliseconds (10 seconds)
    if duration_ms < target_duration_ms:
        # Pad with silence if shorter than target duration
        padding = AudioSegment.silent(duration=target_duration_ms - duration_ms)
        audio = audio + padding
    elif duration_ms > target_duration_ms:
        # Truncate if longer than target duration
        audio = audio[:target_duration_ms]

    # Convert audio to numpy array
    audio_np = np.array(audio.get_array_of_samples())
    
    # Normalize to [-1, 1] range if needed
    audio_np = audio_np.astype(np.float32)
    audio_np /= np.max(np.abs(audio_np))
    
    return audio_np

@app.route('/detect', methods=['POST'])
def detect_deepfake():
    # Expect an audio file in the request
    audio_file = request.files.get('audio_file')

    # If a single audio file is provided
    if audio_file:
        try:
            # Preprocess the audio file
            audio_np = preprocess_audio(audio_file)

            # Perform detection
            result = audio_model(audio_np)
            result_dict = {item['label']: item['score'] for item in result}

            return jsonify({"message": "Detection completed", "results": result_dict}), 200

        except Exception as e:
            return jsonify({"error": str(e)}), 500

    # Invalid request if no audio file is provided
    else:
        return jsonify({"error": "Invalid input. Please provide an audio file."}), 400

if __name__ == '__main__':
    # Run the Flask app
    app.run(host='0.0.0.0', port=7860)