File size: 4,998 Bytes
fe62bf5
 
 
 
 
3bbedf7
fe62bf5
 
 
 
 
 
3bbedf7
fe62bf5
3bbedf7
23f5054
fe62bf5
 
 
 
 
 
 
 
3bbedf7
fe62bf5
3bbedf7
fe62bf5
 
 
 
 
23f5054
fe62bf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23f5054
fe62bf5
 
23f5054
fe62bf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bbedf7
 
fe62bf5
 
 
 
 
3bbedf7
 
 
 
 
 
fe62bf5
23f5054
 
 
 
fe62bf5
 
23f5054
fe62bf5
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import argparse
import shutil
import sys
from dotenv import load_dotenv, find_dotenv
from concurrent.futures import ThreadPoolExecutor

# Importing modules from the utils package
from utils.resize_images import main as resize_images_main
from utils.removebg import iterate_over_directory as removebg_iterate
from utils.photoroom import iterate_over_directory as photoroom_iterate
from utils.bria_rmbg20 import iterate_over_directory as bria_iterate
from utils.clipdrop import iterate_over_directory as clipdrop_iterate
from utils.upload_to_dataset import upload_to_dataset
from utils.resize_processed_images import process_images as downsize_processed_images
from utils.add_checkered_background import process_directory as add_checkered_background_process

def check_env_variables():
    """Check if the necessary environment variables are loaded."""
    if not find_dotenv():
        sys.exit("Error: .env file not found.")
    
    load_dotenv()
    
    required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN', 'CLIPDROP_API_KEY']
    missing_keys = [key for key in required_keys if not os.getenv(key)]
   
    if missing_keys:
        sys.exit(f"Error: Missing environment variables: {', '.join(missing_keys)}")

def copy_images(source_dir, dest_dir):
    os.makedirs(dest_dir, exist_ok=True)
    valid_extensions = ('.png', '.jpg', '.jpeg', '.webp')

    # Walk through the source directory
    for root, _, files in os.walk(source_dir):
        for filename in files:
            if filename.lower().endswith(valid_extensions):
                source_file = os.path.join(root, filename)
                
                # Extract the folder name
                folder_name = os.path.basename(root)
                # Append folder name to the filename
                new_filename = f"{folder_name}_{filename}"
                dest_file = os.path.join(dest_dir, new_filename)

                # Check if the file is an image and doesn't already exist in the destination
                if os.path.isfile(source_file) and not os.path.exists(dest_file):
                    shutil.copy2(source_file, dest_file)
                    print(f"Copied: {new_filename}")
                else:
                    print(f"Skipped: {filename} (already exists or not a file)")

def main():
    check_env_variables()

    parser = argparse.ArgumentParser(description="Image Processing Pipeline")
    parser.add_argument("--input-dir", type=str, default="original-images", help="Input directory for images")
    parser.add_argument("--work-dir", type=str, default="workdir", help="Working directory for intermediate images")
    parser.add_argument("--dataset-name", type=str, help="Name of the dataset to upload to Hugging Face Hub")
    parser.add_argument("--push-dataset", action="store_true", help="Push the dataset to the Hugging Face Hub")

    args = parser.parse_args()

    # Define intermediate directories within the work directory
    input_resized_dir = os.path.join(args.work_dir, "resized")
    bg_removed_dir = os.path.join(args.work_dir, "background-removed")
    checkered_bg_dir = os.path.join(args.work_dir, "checkered-background")

    # Ensure all directories exist
    for directory in [input_resized_dir, bg_removed_dir, checkered_bg_dir]:
        os.makedirs(directory, exist_ok=True)

    # Step 4: Move images to final output directory
    print("Moving images to final output directory...")
    original_images_dir = os.path.join(args.work_dir, "merged-categories")
    copy_images(args.input_dir, original_images_dir)

    # Step 1: Resize images
    print("Resizing images...")
    resize_images_main(input_directory=original_images_dir, output_directory=input_resized_dir)

    # Step 2: Remove background
    print("Removing backgrounds...")
    bg_removal_dirs = {
        "removebg": os.path.join(bg_removed_dir, "removebg"),
        "photoroom": os.path.join(bg_removed_dir, "photoroom"),
        "bria": os.path.join(bg_removed_dir, "bria"),
        "clipdrop": os.path.join(bg_removed_dir, "clipdrop")
    }

    for dir_path in bg_removal_dirs.values():
        os.makedirs(dir_path, exist_ok=True)

    # Use ThreadPoolExecutor to parallelize API calls
    with ThreadPoolExecutor(max_workers=4) as executor:
        executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
        executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
        executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
        executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])


    print("Adding checkered background...")
    add_checkered_background_process(bg_removed_dir, checkered_bg_dir)


    if args.dataset_name:
        upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
    else:
        print("Please provide a dataset name using --dataset-name")

if __name__ == "__main__":
    main()