Spaces:

Jonny001
/

Video-Face-Swap

Running

File size: 9,681 Bytes

#!/usr/bin/env python3

import os
import sys
import platform
import signal
import shutil
import argparse
import warnings
from typing import List

import torch
import onnxruntime
import tensorflow

import roop.globals
import roop.metadata
import roop.ui as ui
from roop.predicter import predict_image, predict_video
from roop.processors.frame.core import get_frame_processors_modules
from roop.utilities import (
    has_image_extension, is_image, is_video, detect_fps, create_video, extract_frames,
    get_temp_frame_paths, restore_audio, create_temp, move_temp, clean_temp, normalize_output_path
)

# Reduce TensorFlow log level and configure threading for torch
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
if any(arg.startswith('--execution-provider') for arg in sys.argv):
    os.environ['OMP_NUM_THREADS'] = '1'

warnings.filterwarnings('ignore', category=FutureWarning, module='insightface')
warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')


def parse_args() -> None:
    """Parse command-line arguments and configure global settings."""
    signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
    
    parser = argparse.ArgumentParser(
        formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=100)
    )
    parser.add_argument('-s', '--source', help='Path to the source image', dest='source_path')
    parser.add_argument('-t', '--target', help='Path to the target image or video', dest='target_path')
    parser.add_argument('-o', '--output', help='Path to the output file or directory', dest='output_path')
    parser.add_argument('--frame-processor', help='Frame processors (choices: face_swapper, face_enhancer, ...)', dest='frame_processor', default=['face_swapper'], nargs='+')
    parser.add_argument('--keep-fps', help='Keep original FPS', dest='keep_fps', action='store_true', default=False)
    parser.add_argument('--keep-audio', help='Keep original audio', dest='keep_audio', action='store_true', default=True)
    parser.add_argument('--keep-frames', help='Keep temporary frames', dest='keep_frames', action='store_true', default=False)
    parser.add_argument('--many-faces', help='Process every face', dest='many_faces', action='store_true', default=False)
    parser.add_argument('--video-encoder', help='Output video encoder', dest='video_encoder', default='libx264', choices=['libx264', 'libx265', 'libvpx-vp9'])
    parser.add_argument('--video-quality', help='Output video quality', dest='video_quality', type=int, default=18, choices=range(52), metavar='[0-51]')
    parser.add_argument('--max-memory', help='Maximum amount of RAM in GB', dest='max_memory', type=int, default=suggest_max_memory())
    parser.add_argument('--execution-provider', help='Available execution provider (choices: cpu, ...)', dest='execution_provider', default=['cpu'], choices=suggest_execution_providers(), nargs='+')
    parser.add_argument('--execution-threads', help='Number of execution threads', dest='execution_threads', type=int, default=suggest_execution_threads())
    parser.add_argument('-v', '--version', action='version', version=f'{roop.metadata.name} {roop.metadata.version}')
    
    args = parser.parse_args()

    roop.globals.source_path = args.source_path
    roop.globals.target_path = args.target_path
    roop.globals.output_path = normalize_output_path(roop.globals.source_path, roop.globals.target_path, args.output_path)
    roop.globals.frame_processors = args.frame_processor
    roop.globals.headless = args.source_path or args.target_path or args.output_path
    roop.globals.keep_fps = args.keep_fps
    roop.globals.keep_audio = args.keep_audio
    roop.globals.keep_frames = args.keep_frames
    roop.globals.many_faces = args.many_faces
    roop.globals.video_encoder = args.video_encoder
    roop.globals.video_quality = args.video_quality
    roop.globals.max_memory = args.max_memory
    roop.globals.execution_providers = decode_execution_providers(args.execution_provider)
    roop.globals.execution_threads = args.execution_threads


def encode_execution_providers(execution_providers: List[str]) -> List[str]:
    """Convert execution providers to their encoded form."""
    return [provider.replace('ExecutionProvider', '').lower() for provider in execution_providers]


def decode_execution_providers(execution_providers: List[str]) -> List[str]:
    """Decode execution providers from their encoded form."""
    return [provider for provider, encoded_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
            if any(execution_provider in encoded_provider for execution_provider in execution_providers)]


def suggest_max_memory() -> int:
    """Suggest maximum memory in GB based on the operating system."""
    if platform.system().lower() == 'darwin':
        return 10
    return 14


def suggest_execution_providers() -> List[str]:
    """Suggest available execution providers based on ONNX Runtime."""
    return encode_execution_providers(onnxruntime.get_available_providers())


def suggest_execution_threads() -> int:
    """Suggest the number of execution threads based on execution providers."""
    if 'DmlExecutionProvider' in roop.globals.execution_providers or 'ROCMExecutionProvider' in roop.globals.execution_providers:
        return 1
    return 8


def limit_resources() -> None:
    """Limit GPU and RAM resources based on configuration."""
    # Prevent TensorFlow memory leak
    gpus = tensorflow.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tensorflow.config.experimental.set_virtual_device_configuration(gpu, [
            tensorflow.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)
        ])
    
    # Limit memory usage
    if roop.globals.max_memory:
        memory = roop.globals.max_memory * 1024 ** 3
        if platform.system().lower() == 'darwin':
            memory = roop.globals.max_memory * 1024 ** 6
        elif platform.system().lower() == 'windows':
            import ctypes
            kernel32 = ctypes.windll.kernel32
            kernel32.SetProcessWorkingSetSize(-1, ctypes.c_size_t(memory), ctypes.c_size_t(memory))
        else:
            import resource
            resource.setrlimit(resource.RLIMIT_DATA, (memory, memory))


def release_resources() -> None:
    """Release resources such as GPU cache."""
    if 'CUDAExecutionProvider' in roop.globals.execution_providers:
        torch.cuda.empty_cache()


def pre_check() -> bool:
    """Perform preliminary checks before starting the processing."""
    if sys.version_info < (3, 9):
        update_status('Python version is not supported - please upgrade to 3.9 or higher.')
        return False
    if not shutil.which('ffmpeg'):
        update_status('ffmpeg is not installed.')
        return False
    return True


def update_status(message: str, scope: str = 'ROOP.CORE') -> None:
    """Update status message to the console or UI."""
    print(f'[{scope}] {message}')
    if not roop.globals.headless:
        ui.update_status(message)


def start() -> None:
    """Start the processing based on the configuration and input."""
    for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
        if not frame_processor.pre_start():
            return

    # Process image to image
    if has_image_extension(roop.globals.target_path):
        if predict_image(roop.globals.target_path):
            destroy()
        shutil.copy2(roop.globals.target_path, roop.globals.output_path)
        for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
            update_status('Progressing...', frame_processor.NAME)
            frame_processor.process_image(roop.globals.source_path, roop.globals.output_path, roop.globals.output_path)
            frame_processor.post_process()
            release_resources()
        update_status('Processing to image succeeded!' if is_image(roop.globals.target_path) else 'Processing to image failed!')
        return

    # Process image to video
    if predict_video(roop.globals.target_path):
        destroy()

    update_status('Creating temp resources...')
    create_temp(roop.globals.target_path)
    update_status('Extracting frames...')
    extract_frames(roop.globals.target_path)
    temp_frame_paths = get_temp_frame_paths(roop.globals.target_path)

    for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
        update_status('Progressing...', frame_processor.NAME)
        frame_processor.process_video(roop.globals.source_path, temp_frame_paths)
        frame_processor.post_process()
        release_resources()

    # Handle FPS
    if roop.globals.keep_fps:
        update_status('Detecting FPS...')
        fps = detect_fps(roop.globals.target_path)
        update_status(f'Creating video with {fps} FPS...')
        create_video(roop.globals.target_path, fps)
    else:
        update_status('Creating video with 30.0 FPS...')
        create_video(roop.globals.target_path)

    # Handle audio
    if roop.globals.keep_audio:
        update_status('Restoring audio...' if roop.globals.keep_fps else 'Restoring audio and creating final video...')
        restore_audio(roop.globals.target_path)

    move_temp(roop.globals.target_path)
    clean_temp()
    update_status('Processing succeeded!')
    release_resources()


def destroy() -> None:
    """Cleanup and exit the program."""
    update_status('Cleaning up and exiting...')
    clean_temp()
    sys.exit()


if __name__ == '__main__':
    parse_args()
    if pre_check():
        limit_resources()
        start()