Spaces:
Sleeping
Sleeping
File size: 1,237 Bytes
4ee33aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import argparse
import functools
import os
import pathlib
from multiprocessing.pool import ThreadPool
from tqdm import tqdm
'''
Helper function for scripts that iterate over large sets of files. Defines command-line arguments
for operating over a large set of files, then handles setting up a worker queue system to operate
on those files. You need to provide your own process_file_fn.
process_file_fn expected signature:
(path, output_path)
'''
def do_to_files(process_file_fn):
parser = argparse.ArgumentParser()
parser.add_argument('--path')
parser.add_argument('--glob')
parser.add_argument('--out')
parser.add_argument('--resume')
parser.add_argument('--num_workers')
args = parser.parse_args()
src = args.path
glob = args.glob
out = args.out
resume = args.resume
num_workers = int(args.num_workers)
path = pathlib.Path(src)
files = path.rglob(glob)
files = [str(f) for f in files]
files = files[resume:]
pfn = functools.partial(process_file_fn, output_path=out)
if num_workers > 0:
with ThreadPool(num_workers) as pool:
list(tqdm(pool.imap(pfn, files), total=len(files)))
else:
for f in tqdm(files):
pfn(f)
|