deepFake_Voice_detection / batch_processor.py
Ashishdubey1974's picture
Deploy deepfake voice detection app
d8240b4
import os
import argparse
import json
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed
from app import DeepfakeDetector, convert_audio
def process_single_file(file_path, detector):
"""Process a single audio file and return the detection result."""
try:
# Convert audio to the required format
processed_audio = convert_audio(file_path)
# Detect if it's a deepfake
result = detector.detect(processed_audio)
# Add the file path to the result
result["file_path"] = file_path
result["file_name"] = os.path.basename(file_path)
# Clean up temporary files if needed
if processed_audio != file_path:
try:
os.remove(processed_audio)
except:
pass
return result
except Exception as e:
return {
"file_path": file_path,
"file_name": os.path.basename(file_path),
"error": str(e)
}
def process_directory(directory_path, output_format='json', max_workers=None, recursive=False):
"""Process all audio files in a directory."""
# Initialize the detector
detector = DeepfakeDetector()
# Find all audio files
audio_extensions = ('.wav', '.mp3', '.ogg', '.flac')
audio_files = []
if recursive:
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(audio_extensions):
audio_files.append(os.path.join(root, file))
else:
audio_files = [os.path.join(directory_path, f) for f in os.listdir(directory_path)
if f.lower().endswith(audio_extensions)]
if not audio_files:
print(f"No audio files found in {directory_path}")
return
print(f"Found {len(audio_files)} audio files to process")
# Process files with a progress bar
results = []
# Use parallel processing for faster analysis
with ProcessPoolExecutor(max_workers=max_workers) as executor:
futures = {executor.submit(process_single_file, file, detector): file for file in audio_files}
for future in tqdm(as_completed(futures), total=len(audio_files), desc="Processing audio files"):
result = future.result()
results.append(result)
# Save results based on output format
if output_format == 'json':
output_file = os.path.join(directory_path, "deepfake_detection_results.json")
with open(output_file, 'w') as f:
json.dump(results, f, indent=2)
print(f"Results saved to {output_file}")
elif output_format == 'csv':
output_file = os.path.join(directory_path, "deepfake_detection_results.csv")
df = pd.DataFrame(results)
df.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")
# Print summary
total = len(results)
real_count = sum(1 for r in results if 'prediction' in r and r['prediction'] == 'Real')
fake_count = sum(1 for r in results if 'prediction' in r and r['prediction'] == 'Deepfake')
error_count = sum(1 for r in results if 'error' in r)
print("\nSummary:")
print(f"Total files processed: {total}")
print(f"Detected as real: {real_count} ({real_count/total*100:.1f}%)")
print(f"Detected as deepfake: {fake_count} ({fake_count/total*100:.1f}%)")
print(f"Errors during processing: {error_count} ({error_count/total*100:.1f}%)")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Batch process audio files for deepfake detection')
parser.add_argument('directory', help='Directory containing audio files to process')
parser.add_argument('--format', choices=['json', 'csv'], default='json', help='Output format (default: json)')
parser.add_argument('--workers', type=int, default=None, help='Number of worker processes (default: CPU count)')
parser.add_argument('--recursive', action='store_true', help='Search for audio files recursively in subdirectories')
args = parser.parse_args()
process_directory(args.directory, args.format, args.workers, args.recursive)