|
import os |
|
import argparse |
|
import json |
|
import pandas as pd |
|
from tqdm import tqdm |
|
from concurrent.futures import ProcessPoolExecutor, as_completed |
|
from app import DeepfakeDetector, convert_audio |
|
|
|
def process_single_file(file_path, detector): |
|
"""Process a single audio file and return the detection result.""" |
|
try: |
|
|
|
processed_audio = convert_audio(file_path) |
|
|
|
|
|
result = detector.detect(processed_audio) |
|
|
|
|
|
result["file_path"] = file_path |
|
result["file_name"] = os.path.basename(file_path) |
|
|
|
|
|
if processed_audio != file_path: |
|
try: |
|
os.remove(processed_audio) |
|
except: |
|
pass |
|
|
|
return result |
|
except Exception as e: |
|
return { |
|
"file_path": file_path, |
|
"file_name": os.path.basename(file_path), |
|
"error": str(e) |
|
} |
|
|
|
def process_directory(directory_path, output_format='json', max_workers=None, recursive=False): |
|
"""Process all audio files in a directory.""" |
|
|
|
detector = DeepfakeDetector() |
|
|
|
|
|
audio_extensions = ('.wav', '.mp3', '.ogg', '.flac') |
|
audio_files = [] |
|
|
|
if recursive: |
|
for root, _, files in os.walk(directory_path): |
|
for file in files: |
|
if file.lower().endswith(audio_extensions): |
|
audio_files.append(os.path.join(root, file)) |
|
else: |
|
audio_files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) |
|
if f.lower().endswith(audio_extensions)] |
|
|
|
if not audio_files: |
|
print(f"No audio files found in {directory_path}") |
|
return |
|
|
|
print(f"Found {len(audio_files)} audio files to process") |
|
|
|
|
|
results = [] |
|
|
|
|
|
with ProcessPoolExecutor(max_workers=max_workers) as executor: |
|
futures = {executor.submit(process_single_file, file, detector): file for file in audio_files} |
|
|
|
for future in tqdm(as_completed(futures), total=len(audio_files), desc="Processing audio files"): |
|
result = future.result() |
|
results.append(result) |
|
|
|
|
|
if output_format == 'json': |
|
output_file = os.path.join(directory_path, "deepfake_detection_results.json") |
|
with open(output_file, 'w') as f: |
|
json.dump(results, f, indent=2) |
|
print(f"Results saved to {output_file}") |
|
|
|
elif output_format == 'csv': |
|
output_file = os.path.join(directory_path, "deepfake_detection_results.csv") |
|
df = pd.DataFrame(results) |
|
df.to_csv(output_file, index=False) |
|
print(f"Results saved to {output_file}") |
|
|
|
|
|
total = len(results) |
|
real_count = sum(1 for r in results if 'prediction' in r and r['prediction'] == 'Real') |
|
fake_count = sum(1 for r in results if 'prediction' in r and r['prediction'] == 'Deepfake') |
|
error_count = sum(1 for r in results if 'error' in r) |
|
|
|
print("\nSummary:") |
|
print(f"Total files processed: {total}") |
|
print(f"Detected as real: {real_count} ({real_count/total*100:.1f}%)") |
|
print(f"Detected as deepfake: {fake_count} ({fake_count/total*100:.1f}%)") |
|
print(f"Errors during processing: {error_count} ({error_count/total*100:.1f}%)") |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description='Batch process audio files for deepfake detection') |
|
parser.add_argument('directory', help='Directory containing audio files to process') |
|
parser.add_argument('--format', choices=['json', 'csv'], default='json', help='Output format (default: json)') |
|
parser.add_argument('--workers', type=int, default=None, help='Number of worker processes (default: CPU count)') |
|
parser.add_argument('--recursive', action='store_true', help='Search for audio files recursively in subdirectories') |
|
|
|
args = parser.parse_args() |
|
|
|
process_directory(args.directory, args.format, args.workers, args.recursive) |