Steven Tang
commited on
Commit
·
ac97109
1
Parent(s):
195bb0e
Add ignore errors
Browse files- run_batches.py +6 -2
run_batches.py
CHANGED
@@ -23,6 +23,7 @@ def main():
|
|
23 |
parser.add_argument("output_dir", type=str, help="directory to write processed files to")
|
24 |
parser.add_argument("--batch_size", type=int, default=8)
|
25 |
parser.add_argument("--max_workers", type=int, default=os.cpu_count() - 4)
|
|
|
26 |
args = parser.parse_args()
|
27 |
|
28 |
input_dir = Path(args.input_dir)
|
@@ -49,12 +50,13 @@ def main():
|
|
49 |
[input_dir] * len(input_files_batches),
|
50 |
[output_dir] * len(input_files_batches),
|
51 |
[args.mode] * len(input_files_batches),
|
|
|
52 |
max_workers=args.max_workers,
|
53 |
chunksize=1,
|
54 |
)
|
55 |
|
56 |
|
57 |
-
def run_batch(input_files_batch, input_dir, output_dir, mode):
|
58 |
with TemporaryDirectory() as temp_dir_SR, TemporaryDirectory() as temp_dir_GNR, TemporaryDirectory() as temp_dir_SA, TemporaryDirectory() as input_temp_dir, TemporaryDirectory() as output_temp_dir:
|
59 |
input_temp_dir = Path(input_temp_dir)
|
60 |
output_temp_dir = Path(output_temp_dir)
|
@@ -87,13 +89,15 @@ def run_batch(input_files_batch, input_dir, output_dir, mode):
|
|
87 |
if "returned non-zero exit status 137" in str(e):
|
88 |
logging.error("Process killed due to memory limit")
|
89 |
return
|
|
|
|
|
|
|
90 |
raise e
|
91 |
|
92 |
output_paths = list(output_temp_dir.rglob("*"))
|
93 |
for output_path in output_paths:
|
94 |
logging.info(f"cp {output_path} {output_dir}")
|
95 |
shutil.copy(output_path, output_dir)
|
96 |
-
output_file = output_path.name
|
97 |
|
98 |
|
99 |
if __name__ == "__main__":
|
|
|
23 |
parser.add_argument("output_dir", type=str, help="directory to write processed files to")
|
24 |
parser.add_argument("--batch_size", type=int, default=8)
|
25 |
parser.add_argument("--max_workers", type=int, default=os.cpu_count() - 4)
|
26 |
+
parser.add_argument("--ignore_errors", action="store_true", help="ignore errors")
|
27 |
args = parser.parse_args()
|
28 |
|
29 |
input_dir = Path(args.input_dir)
|
|
|
50 |
[input_dir] * len(input_files_batches),
|
51 |
[output_dir] * len(input_files_batches),
|
52 |
[args.mode] * len(input_files_batches),
|
53 |
+
[args.ignore_errors] * len(input_files_batches),
|
54 |
max_workers=args.max_workers,
|
55 |
chunksize=1,
|
56 |
)
|
57 |
|
58 |
|
59 |
+
def run_batch(input_files_batch, input_dir, output_dir, mode, ignore_errors):
|
60 |
with TemporaryDirectory() as temp_dir_SR, TemporaryDirectory() as temp_dir_GNR, TemporaryDirectory() as temp_dir_SA, TemporaryDirectory() as input_temp_dir, TemporaryDirectory() as output_temp_dir:
|
61 |
input_temp_dir = Path(input_temp_dir)
|
62 |
output_temp_dir = Path(output_temp_dir)
|
|
|
89 |
if "returned non-zero exit status 137" in str(e):
|
90 |
logging.error("Process killed due to memory limit")
|
91 |
return
|
92 |
+
elif ignore_errors:
|
93 |
+
logging.error("Ignoring error")
|
94 |
+
return
|
95 |
raise e
|
96 |
|
97 |
output_paths = list(output_temp_dir.rglob("*"))
|
98 |
for output_path in output_paths:
|
99 |
logging.info(f"cp {output_path} {output_dir}")
|
100 |
shutil.copy(output_path, output_dir)
|
|
|
101 |
|
102 |
|
103 |
if __name__ == "__main__":
|