deepdoc use GPU if possible (#4618)
Browse files### What problem does this PR solve?
deepdoc use GPU if possible
### Type of change
- [x] Refactoring
- deepdoc/vision/ocr.py +23 -7
- deepdoc/vision/recognizer.py +20 -3
deepdoc/vision/ocr.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
|
|
|
|
| 17 |
import copy
|
| 18 |
import time
|
| 19 |
import os
|
|
@@ -75,17 +76,32 @@ def load_model(model_dir, nm):
|
|
| 75 |
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
| 76 |
options.intra_op_num_threads = 2
|
| 77 |
options.inter_op_num_threads = 2
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
sess = ort.InferenceSession(
|
| 80 |
model_file_path,
|
| 81 |
options=options,
|
| 82 |
-
providers=['CUDAExecutionProvider']
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
else:
|
| 84 |
sess = ort.InferenceSession(
|
| 85 |
model_file_path,
|
| 86 |
options=options,
|
| 87 |
providers=['CPUExecutionProvider'])
|
| 88 |
-
|
|
|
|
|
|
|
| 89 |
|
| 90 |
|
| 91 |
class TextRecognizer(object):
|
|
@@ -98,7 +114,7 @@ class TextRecognizer(object):
|
|
| 98 |
"use_space_char": True
|
| 99 |
}
|
| 100 |
self.postprocess_op = build_post_process(postprocess_params)
|
| 101 |
-
self.predictor, self.input_tensor = load_model(model_dir, 'rec')
|
| 102 |
|
| 103 |
def resize_norm_img(self, img, max_wh_ratio):
|
| 104 |
imgC, imgH, imgW = self.rec_image_shape
|
|
@@ -344,7 +360,7 @@ class TextRecognizer(object):
|
|
| 344 |
input_dict[self.input_tensor.name] = norm_img_batch
|
| 345 |
for i in range(100000):
|
| 346 |
try:
|
| 347 |
-
outputs = self.predictor.run(None, input_dict)
|
| 348 |
break
|
| 349 |
except Exception as e:
|
| 350 |
if i >= 3:
|
|
@@ -383,7 +399,7 @@ class TextDetector(object):
|
|
| 383 |
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
| 384 |
|
| 385 |
self.postprocess_op = build_post_process(postprocess_params)
|
| 386 |
-
self.predictor, self.input_tensor = load_model(model_dir, 'det')
|
| 387 |
|
| 388 |
img_h, img_w = self.input_tensor.shape[2:]
|
| 389 |
if isinstance(img_h, str) or isinstance(img_w, str):
|
|
@@ -456,7 +472,7 @@ class TextDetector(object):
|
|
| 456 |
input_dict[self.input_tensor.name] = img
|
| 457 |
for i in range(100000):
|
| 458 |
try:
|
| 459 |
-
outputs = self.predictor.run(None, input_dict)
|
| 460 |
break
|
| 461 |
except Exception as e:
|
| 462 |
if i >= 3:
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
|
| 17 |
+
import logging
|
| 18 |
import copy
|
| 19 |
import time
|
| 20 |
import os
|
|
|
|
| 76 |
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
| 77 |
options.intra_op_num_threads = 2
|
| 78 |
options.inter_op_num_threads = 2
|
| 79 |
+
|
| 80 |
+
# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
|
| 81 |
+
# Shrink GPU memory after execution
|
| 82 |
+
run_options = ort.RunOptions()
|
| 83 |
+
if ort.get_device() == "GPU":
|
| 84 |
+
cuda_provider_options = {
|
| 85 |
+
"device_id": 0, # Use specific GPU
|
| 86 |
+
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
|
| 87 |
+
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
|
| 88 |
+
}
|
| 89 |
sess = ort.InferenceSession(
|
| 90 |
model_file_path,
|
| 91 |
options=options,
|
| 92 |
+
providers=['CUDAExecutionProvider'],
|
| 93 |
+
provider_options=[cuda_provider_options]
|
| 94 |
+
)
|
| 95 |
+
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
| 96 |
+
logging.info(f"TextRecognizer {nm} uses GPU")
|
| 97 |
else:
|
| 98 |
sess = ort.InferenceSession(
|
| 99 |
model_file_path,
|
| 100 |
options=options,
|
| 101 |
providers=['CPUExecutionProvider'])
|
| 102 |
+
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
| 103 |
+
logging.info(f"TextRecognizer {nm} uses CPU")
|
| 104 |
+
return sess, sess.get_inputs()[0], run_options
|
| 105 |
|
| 106 |
|
| 107 |
class TextRecognizer(object):
|
|
|
|
| 114 |
"use_space_char": True
|
| 115 |
}
|
| 116 |
self.postprocess_op = build_post_process(postprocess_params)
|
| 117 |
+
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'rec')
|
| 118 |
|
| 119 |
def resize_norm_img(self, img, max_wh_ratio):
|
| 120 |
imgC, imgH, imgW = self.rec_image_shape
|
|
|
|
| 360 |
input_dict[self.input_tensor.name] = norm_img_batch
|
| 361 |
for i in range(100000):
|
| 362 |
try:
|
| 363 |
+
outputs = self.predictor.run(None, input_dict, self.run_options)
|
| 364 |
break
|
| 365 |
except Exception as e:
|
| 366 |
if i >= 3:
|
|
|
|
| 399 |
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
| 400 |
|
| 401 |
self.postprocess_op = build_post_process(postprocess_params)
|
| 402 |
+
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'det')
|
| 403 |
|
| 404 |
img_h, img_w = self.input_tensor.shape[2:]
|
| 405 |
if isinstance(img_h, str) or isinstance(img_w, str):
|
|
|
|
| 472 |
input_dict[self.input_tensor.name] = img
|
| 473 |
for i in range(100000):
|
| 474 |
try:
|
| 475 |
+
outputs = self.predictor.run(None, input_dict, self.run_options)
|
| 476 |
break
|
| 477 |
except Exception as e:
|
| 478 |
if i >= 3:
|
deepdoc/vision/recognizer.py
CHANGED
|
@@ -60,12 +60,29 @@ class Recognizer(object):
|
|
| 60 |
if not os.path.exists(model_file_path):
|
| 61 |
raise ValueError("not find model file path {}".format(
|
| 62 |
model_file_path))
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
options = ort.SessionOptions()
|
| 65 |
options.enable_cpu_mem_arena = False
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
else:
|
| 68 |
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
|
|
|
|
|
|
| 69 |
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
| 70 |
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
| 71 |
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
|
@@ -454,7 +471,7 @@ class Recognizer(object):
|
|
| 454 |
inputs = self.preprocess(batch_image_list)
|
| 455 |
logging.debug("preprocess")
|
| 456 |
for ins in inputs:
|
| 457 |
-
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
|
| 458 |
res.append(bb)
|
| 459 |
|
| 460 |
#seeit.save_results(image_list, res, self.label_list, threshold=thr)
|
|
|
|
| 60 |
if not os.path.exists(model_file_path):
|
| 61 |
raise ValueError("not find model file path {}".format(
|
| 62 |
model_file_path))
|
| 63 |
+
# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
|
| 64 |
+
# Shrink GPU memory after execution
|
| 65 |
+
self.run_options = ort.RunOptions()
|
| 66 |
+
|
| 67 |
+
if ort.get_device() == "GPU":
|
| 68 |
options = ort.SessionOptions()
|
| 69 |
options.enable_cpu_mem_arena = False
|
| 70 |
+
cuda_provider_options = {
|
| 71 |
+
"device_id": 0, # Use specific GPU
|
| 72 |
+
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
|
| 73 |
+
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
|
| 74 |
+
}
|
| 75 |
+
self.ort_sess = ort.InferenceSession(
|
| 76 |
+
model_file_path, options=options,
|
| 77 |
+
providers=['CUDAExecutionProvider'],
|
| 78 |
+
provider_options=[cuda_provider_options]
|
| 79 |
+
)
|
| 80 |
+
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
| 81 |
+
logging.info(f"Recognizer {task_name} uses GPU")
|
| 82 |
else:
|
| 83 |
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
| 84 |
+
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
| 85 |
+
logging.info(f"Recognizer {task_name} uses CPU")
|
| 86 |
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
| 87 |
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
| 88 |
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
|
|
|
| 471 |
inputs = self.preprocess(batch_image_list)
|
| 472 |
logging.debug("preprocess")
|
| 473 |
for ins in inputs:
|
| 474 |
+
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names}, self.run_options)[0], ins, thr)
|
| 475 |
res.append(bb)
|
| 476 |
|
| 477 |
#seeit.save_results(image_list, res, self.label_list, threshold=thr)
|