YinuoGuo27 commited on
Commit
b022961
·
verified ·
1 Parent(s): 56dd1ad

Update difpoint/src/models/predictor.py

Browse files
Files changed (1) hide show
  1. difpoint/src/models/predictor.py +261 -261
difpoint/src/models/predictor.py CHANGED
@@ -1,261 +1,261 @@
1
- import pdb
2
- import threading
3
- import os
4
- import time
5
-
6
- import numpy as np
7
- import onnxruntime
8
-
9
- import torch
10
- from torch.cuda import nvtx
11
- from collections import OrderedDict
12
- import platform
13
-
14
- try:
15
- import tensorrt as trt
16
- import ctypes
17
- except ModuleNotFoundError:
18
- print("No TensorRT Found")
19
-
20
- numpy_to_torch_dtype_dict = {
21
- np.uint8: torch.uint8,
22
- np.int8: torch.int8,
23
- np.int16: torch.int16,
24
- np.int32: torch.int32,
25
- np.int64: torch.int64,
26
- np.float16: torch.float16,
27
- np.float32: torch.float32,
28
- np.float64: torch.float64,
29
- np.complex64: torch.complex64,
30
- np.complex128: torch.complex128,
31
- }
32
- if np.version.full_version >= "1.24.0":
33
- numpy_to_torch_dtype_dict[np.bool_] = torch.bool
34
- else:
35
- numpy_to_torch_dtype_dict[np.bool] = torch.bool
36
-
37
-
38
- class TensorRTPredictor:
39
- """
40
- Implements inference for the EfficientDet TensorRT engine.
41
- """
42
-
43
- def __init__(self, **kwargs):
44
- """
45
- :param engine_path: The path to the serialized engine to load from disk.
46
- """
47
- if platform.system().lower() == 'linux':
48
- ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/libgrid_sample_3d_plugin.so", mode=ctypes.RTLD_GLOBAL)
49
- else:
50
- ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/grid_sample_3d_plugin.dll", mode=ctypes.RTLD_GLOBAL)
51
- # Load TRT engine
52
- self.logger = trt.Logger(trt.Logger.VERBOSE)
53
- trt.init_libnvinfer_plugins(self.logger, "")
54
- engine_path = os.path.abspath(kwargs.get("model_path", None))
55
- print('engine_path', engine_path)
56
- self.debug = kwargs.get("debug", False)
57
- assert engine_path, f"model:{engine_path} must exist!"
58
- print(f"loading trt model:{engine_path}")
59
- with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
60
- assert runtime
61
- self.engine = runtime.deserialize_cuda_engine(f.read())
62
- print('self.engine', self.engine)
63
- assert self.engine
64
- self.context = self.engine.create_execution_context()
65
- assert self.context
66
-
67
- # Setup I/O bindings
68
- self.inputs = []
69
- self.outputs = []
70
- self.tensors = OrderedDict()
71
-
72
- # TODO: 支持动态shape输入
73
- for idx in range(self.engine.num_io_tensors):
74
- name = self.engine[idx]
75
- is_input = self.engine.get_tensor_mode(name).name == "INPUT"
76
- shape = self.engine.get_tensor_shape(name)
77
- dtype = trt.nptype(self.engine.get_tensor_dtype(name))
78
-
79
- binding = {
80
- "index": idx,
81
- "name": name,
82
- "dtype": dtype,
83
- "shape": list(shape)
84
- }
85
- if is_input:
86
- self.inputs.append(binding)
87
- else:
88
- self.outputs.append(binding)
89
-
90
- assert len(self.inputs) > 0
91
- assert len(self.outputs) > 0
92
- self.allocate_max_buffers()
93
-
94
- def allocate_max_buffers(self, device="cuda"):
95
- nvtx.range_push("allocate_max_buffers")
96
- # 目前仅支持 batch 维度的动态处理
97
- batch_size = 1
98
- for idx in range(self.engine.num_io_tensors):
99
- binding = self.engine[idx]
100
- shape = self.engine.get_tensor_shape(binding)
101
- is_input = self.engine.get_tensor_mode(binding).name == "INPUT"
102
- if -1 in shape:
103
- if is_input:
104
- shape = self.engine.get_tensor_profile_shape(binding, 0)[-1]
105
- batch_size = shape[0]
106
- else:
107
- shape[0] = batch_size
108
- dtype = trt.nptype(self.engine.get_tensor_dtype(binding))
109
- tensor = torch.empty(
110
- tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
111
- ).to(device=device)
112
- self.tensors[binding] = tensor
113
- nvtx.range_pop()
114
-
115
- def input_spec(self):
116
- """
117
- Get the specs for the input tensor of the network. Useful to prepare memory allocations.
118
- :return: Two items, the shape of the input tensor and its (numpy) datatype.
119
- """
120
- specs = []
121
- for i, o in enumerate(self.inputs):
122
- specs.append((o["name"], o['shape'], o['dtype']))
123
- if self.debug:
124
- print(f"trt input {i} -> {o['name']} -> {o['shape']}")
125
- return specs
126
-
127
- def output_spec(self):
128
- """
129
- Get the specs for the output tensors of the network. Useful to prepare memory allocations.
130
- :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
131
- """
132
- specs = []
133
- for i, o in enumerate(self.outputs):
134
- specs.append((o["name"], o['shape'], o['dtype']))
135
- if self.debug:
136
- print(f"trt output {i} -> {o['name']} -> {o['shape']}")
137
- return specs
138
-
139
- def adjust_buffer(self, feed_dict):
140
- nvtx.range_push("adjust_buffer")
141
- for name, buf in feed_dict.items():
142
- input_tensor = self.tensors[name]
143
- current_shape = list(buf.shape)
144
- slices = tuple(slice(0, dim) for dim in current_shape)
145
- input_tensor[slices].copy_(buf)
146
- self.context.set_input_shape(name, current_shape)
147
- nvtx.range_pop()
148
-
149
- def predict(self, feed_dict, stream):
150
- """
151
- Execute inference on a batch of images.
152
- :param data: A list of inputs as numpy arrays.
153
- :return A list of outputs as numpy arrays.
154
- """
155
- nvtx.range_push("set_tensors")
156
- self.adjust_buffer(feed_dict)
157
- for name, tensor in self.tensors.items():
158
- self.context.set_tensor_address(name, tensor.data_ptr())
159
- nvtx.range_pop()
160
- nvtx.range_push("execute")
161
- noerror = self.context.execute_async_v3(stream)
162
- if not noerror:
163
- raise ValueError("ERROR: inference failed.")
164
- nvtx.range_pop()
165
- return self.tensors
166
-
167
- def __del__(self):
168
- del self.engine
169
- del self.context
170
- del self.inputs
171
- del self.outputs
172
- del self.tensors
173
-
174
-
175
- class OnnxRuntimePredictor:
176
- """
177
- OnnxRuntime Prediction
178
- """
179
-
180
- def __init__(self, **kwargs):
181
- model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
182
- assert os.path.exists(model_path), "model path must exist!"
183
- # print("loading ort model:{}".format(model_path))
184
- self.debug = kwargs.get("debug", False)
185
- providers = ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
186
-
187
- print(f"OnnxRuntime use {providers}")
188
- opts = onnxruntime.SessionOptions()
189
- # opts.inter_op_num_threads = kwargs.get("num_threads", 4)
190
- # opts.intra_op_num_threads = kwargs.get("num_threads", 4)
191
- # opts.log_severity_level = 3
192
- self.onnx_model = onnxruntime.InferenceSession(model_path, providers=providers, sess_options=opts)
193
- self.inputs = self.onnx_model.get_inputs()
194
- self.outputs = self.onnx_model.get_outputs()
195
-
196
- def input_spec(self):
197
- """
198
- Get the specs for the input tensor of the network. Useful to prepare memory allocations.
199
- :return: Two items, the shape of the input tensor and its (numpy) datatype.
200
- """
201
- specs = []
202
- for i, o in enumerate(self.inputs):
203
- specs.append((o.name, o.shape, o.type))
204
- if self.debug:
205
- print(f"ort {i} -> {o.name} -> {o.shape}")
206
- return specs
207
-
208
- def output_spec(self):
209
- """
210
- Get the specs for the output tensors of the network. Useful to prepare memory allocations.
211
- :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
212
- """
213
- specs = []
214
- for i, o in enumerate(self.outputs):
215
- specs.append((o.name, o.shape, o.type))
216
- if self.debug:
217
- print(f"ort output {i} -> {o.name} -> {o.shape}")
218
- return specs
219
-
220
- def predict(self, *data):
221
- input_feeds = {}
222
- for i in range(len(data)):
223
- if self.inputs[i].type == 'tensor(float16)':
224
- input_feeds[self.inputs[i].name] = data[i].astype(np.float16)
225
- else:
226
- input_feeds[self.inputs[i].name] = data[i].astype(np.float32)
227
- results = self.onnx_model.run(None, input_feeds)
228
- return results
229
-
230
- def __del__(self):
231
- del self.onnx_model
232
- self.onnx_model = None
233
-
234
-
235
- class OnnxRuntimePredictorSingleton(OnnxRuntimePredictor):
236
- """
237
- 单例模式,防止模型被加载多次
238
- """
239
- _instance_lock = threading.Lock()
240
- _instance = {}
241
-
242
- def __new__(cls, *args, **kwargs):
243
- model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
244
- assert os.path.exists(model_path), "model path must exist!"
245
- # 单例模式,避免重复加载模型
246
- with OnnxRuntimePredictorSingleton._instance_lock:
247
- if model_path not in OnnxRuntimePredictorSingleton._instance or \
248
- OnnxRuntimePredictorSingleton._instance[model_path].onnx_model is None:
249
- OnnxRuntimePredictorSingleton._instance[model_path] = OnnxRuntimePredictor(**kwargs)
250
-
251
- return OnnxRuntimePredictorSingleton._instance[model_path]
252
-
253
-
254
- def get_predictor(**kwargs):
255
- predict_type = kwargs.get("predict_type", "trt")
256
- if predict_type == "ort":
257
- return OnnxRuntimePredictorSingleton(**kwargs)
258
- elif predict_type == "trt":
259
- return TensorRTPredictor(**kwargs)
260
- else:
261
- raise NotImplementedError
 
1
+ import pdb
2
+ import threading
3
+ import os
4
+ import time
5
+
6
+ import numpy as np
7
+ import onnxruntime
8
+
9
+ import torch
10
+ from torch.cuda import nvtx
11
+ from collections import OrderedDict
12
+ import platform
13
+
14
+ try:
15
+ import tensorrt as trt
16
+ import ctypes
17
+ except ModuleNotFoundError:
18
+ print("No TensorRT Found")
19
+
20
+ numpy_to_torch_dtype_dict = {
21
+ np.uint8: torch.uint8,
22
+ np.int8: torch.int8,
23
+ np.int16: torch.int16,
24
+ np.int32: torch.int32,
25
+ np.int64: torch.int64,
26
+ np.float16: torch.float16,
27
+ np.float32: torch.float32,
28
+ np.float64: torch.float64,
29
+ np.complex64: torch.complex64,
30
+ np.complex128: torch.complex128,
31
+ }
32
+ if np.version.full_version >= "1.24.0":
33
+ numpy_to_torch_dtype_dict[np.bool_] = torch.bool
34
+ else:
35
+ numpy_to_torch_dtype_dict[np.bool] = torch.bool
36
+
37
+
38
+ class TensorRTPredictor:
39
+ """
40
+ Implements inference for the EfficientDet TensorRT engine.
41
+ """
42
+
43
+ def __init__(self, **kwargs):
44
+ """
45
+ :param engine_path: The path to the serialized engine to load from disk.
46
+ """
47
+ if platform.system().lower() == 'linux':
48
+ ctypes.CDLL("./downloaded_repo/pretrained_weights/liveportrait_onnx/libgrid_sample_3d_plugin.so", mode=ctypes.RTLD_GLOBAL)
49
+ else:
50
+ ctypes.CDLL("./downloaded_repo/pretrained_weights/liveportrait_onnx/grid_sample_3d_plugin.dll", mode=ctypes.RTLD_GLOBAL)
51
+ # Load TRT engine
52
+ self.logger = trt.Logger(trt.Logger.VERBOSE)
53
+ trt.init_libnvinfer_plugins(self.logger, "")
54
+ engine_path = os.path.abspath(kwargs.get("model_path", None))
55
+ print('engine_path', engine_path)
56
+ self.debug = kwargs.get("debug", False)
57
+ assert engine_path, f"model:{engine_path} must exist!"
58
+ print(f"loading trt model:{engine_path}")
59
+ with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
60
+ assert runtime
61
+ self.engine = runtime.deserialize_cuda_engine(f.read())
62
+ print('self.engine', self.engine)
63
+ assert self.engine
64
+ self.context = self.engine.create_execution_context()
65
+ assert self.context
66
+
67
+ # Setup I/O bindings
68
+ self.inputs = []
69
+ self.outputs = []
70
+ self.tensors = OrderedDict()
71
+
72
+ # TODO: 支持动态shape输入
73
+ for idx in range(self.engine.num_io_tensors):
74
+ name = self.engine[idx]
75
+ is_input = self.engine.get_tensor_mode(name).name == "INPUT"
76
+ shape = self.engine.get_tensor_shape(name)
77
+ dtype = trt.nptype(self.engine.get_tensor_dtype(name))
78
+
79
+ binding = {
80
+ "index": idx,
81
+ "name": name,
82
+ "dtype": dtype,
83
+ "shape": list(shape)
84
+ }
85
+ if is_input:
86
+ self.inputs.append(binding)
87
+ else:
88
+ self.outputs.append(binding)
89
+
90
+ assert len(self.inputs) > 0
91
+ assert len(self.outputs) > 0
92
+ self.allocate_max_buffers()
93
+
94
+ def allocate_max_buffers(self, device="cuda"):
95
+ nvtx.range_push("allocate_max_buffers")
96
+ # 目前仅支持 batch 维度的动态处理
97
+ batch_size = 1
98
+ for idx in range(self.engine.num_io_tensors):
99
+ binding = self.engine[idx]
100
+ shape = self.engine.get_tensor_shape(binding)
101
+ is_input = self.engine.get_tensor_mode(binding).name == "INPUT"
102
+ if -1 in shape:
103
+ if is_input:
104
+ shape = self.engine.get_tensor_profile_shape(binding, 0)[-1]
105
+ batch_size = shape[0]
106
+ else:
107
+ shape[0] = batch_size
108
+ dtype = trt.nptype(self.engine.get_tensor_dtype(binding))
109
+ tensor = torch.empty(
110
+ tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
111
+ ).to(device=device)
112
+ self.tensors[binding] = tensor
113
+ nvtx.range_pop()
114
+
115
+ def input_spec(self):
116
+ """
117
+ Get the specs for the input tensor of the network. Useful to prepare memory allocations.
118
+ :return: Two items, the shape of the input tensor and its (numpy) datatype.
119
+ """
120
+ specs = []
121
+ for i, o in enumerate(self.inputs):
122
+ specs.append((o["name"], o['shape'], o['dtype']))
123
+ if self.debug:
124
+ print(f"trt input {i} -> {o['name']} -> {o['shape']}")
125
+ return specs
126
+
127
+ def output_spec(self):
128
+ """
129
+ Get the specs for the output tensors of the network. Useful to prepare memory allocations.
130
+ :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
131
+ """
132
+ specs = []
133
+ for i, o in enumerate(self.outputs):
134
+ specs.append((o["name"], o['shape'], o['dtype']))
135
+ if self.debug:
136
+ print(f"trt output {i} -> {o['name']} -> {o['shape']}")
137
+ return specs
138
+
139
+ def adjust_buffer(self, feed_dict):
140
+ nvtx.range_push("adjust_buffer")
141
+ for name, buf in feed_dict.items():
142
+ input_tensor = self.tensors[name]
143
+ current_shape = list(buf.shape)
144
+ slices = tuple(slice(0, dim) for dim in current_shape)
145
+ input_tensor[slices].copy_(buf)
146
+ self.context.set_input_shape(name, current_shape)
147
+ nvtx.range_pop()
148
+
149
+ def predict(self, feed_dict, stream):
150
+ """
151
+ Execute inference on a batch of images.
152
+ :param data: A list of inputs as numpy arrays.
153
+ :return A list of outputs as numpy arrays.
154
+ """
155
+ nvtx.range_push("set_tensors")
156
+ self.adjust_buffer(feed_dict)
157
+ for name, tensor in self.tensors.items():
158
+ self.context.set_tensor_address(name, tensor.data_ptr())
159
+ nvtx.range_pop()
160
+ nvtx.range_push("execute")
161
+ noerror = self.context.execute_async_v3(stream)
162
+ if not noerror:
163
+ raise ValueError("ERROR: inference failed.")
164
+ nvtx.range_pop()
165
+ return self.tensors
166
+
167
+ def __del__(self):
168
+ del self.engine
169
+ del self.context
170
+ del self.inputs
171
+ del self.outputs
172
+ del self.tensors
173
+
174
+
175
+ class OnnxRuntimePredictor:
176
+ """
177
+ OnnxRuntime Prediction
178
+ """
179
+
180
+ def __init__(self, **kwargs):
181
+ model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
182
+ assert os.path.exists(model_path), "model path must exist!"
183
+ # print("loading ort model:{}".format(model_path))
184
+ self.debug = kwargs.get("debug", False)
185
+ providers = ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
186
+
187
+ print(f"OnnxRuntime use {providers}")
188
+ opts = onnxruntime.SessionOptions()
189
+ # opts.inter_op_num_threads = kwargs.get("num_threads", 4)
190
+ # opts.intra_op_num_threads = kwargs.get("num_threads", 4)
191
+ # opts.log_severity_level = 3
192
+ self.onnx_model = onnxruntime.InferenceSession(model_path, providers=providers, sess_options=opts)
193
+ self.inputs = self.onnx_model.get_inputs()
194
+ self.outputs = self.onnx_model.get_outputs()
195
+
196
+ def input_spec(self):
197
+ """
198
+ Get the specs for the input tensor of the network. Useful to prepare memory allocations.
199
+ :return: Two items, the shape of the input tensor and its (numpy) datatype.
200
+ """
201
+ specs = []
202
+ for i, o in enumerate(self.inputs):
203
+ specs.append((o.name, o.shape, o.type))
204
+ if self.debug:
205
+ print(f"ort {i} -> {o.name} -> {o.shape}")
206
+ return specs
207
+
208
+ def output_spec(self):
209
+ """
210
+ Get the specs for the output tensors of the network. Useful to prepare memory allocations.
211
+ :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
212
+ """
213
+ specs = []
214
+ for i, o in enumerate(self.outputs):
215
+ specs.append((o.name, o.shape, o.type))
216
+ if self.debug:
217
+ print(f"ort output {i} -> {o.name} -> {o.shape}")
218
+ return specs
219
+
220
+ def predict(self, *data):
221
+ input_feeds = {}
222
+ for i in range(len(data)):
223
+ if self.inputs[i].type == 'tensor(float16)':
224
+ input_feeds[self.inputs[i].name] = data[i].astype(np.float16)
225
+ else:
226
+ input_feeds[self.inputs[i].name] = data[i].astype(np.float32)
227
+ results = self.onnx_model.run(None, input_feeds)
228
+ return results
229
+
230
+ def __del__(self):
231
+ del self.onnx_model
232
+ self.onnx_model = None
233
+
234
+
235
+ class OnnxRuntimePredictorSingleton(OnnxRuntimePredictor):
236
+ """
237
+ 单例模式,防止模型被加载多次
238
+ """
239
+ _instance_lock = threading.Lock()
240
+ _instance = {}
241
+
242
+ def __new__(cls, *args, **kwargs):
243
+ model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
244
+ assert os.path.exists(model_path), "model path must exist!"
245
+ # 单例模式,避免重复加载模型
246
+ with OnnxRuntimePredictorSingleton._instance_lock:
247
+ if model_path not in OnnxRuntimePredictorSingleton._instance or \
248
+ OnnxRuntimePredictorSingleton._instance[model_path].onnx_model is None:
249
+ OnnxRuntimePredictorSingleton._instance[model_path] = OnnxRuntimePredictor(**kwargs)
250
+
251
+ return OnnxRuntimePredictorSingleton._instance[model_path]
252
+
253
+
254
+ def get_predictor(**kwargs):
255
+ predict_type = kwargs.get("predict_type", "trt")
256
+ if predict_type == "ort":
257
+ return OnnxRuntimePredictorSingleton(**kwargs)
258
+ elif predict_type == "trt":
259
+ return TensorRTPredictor(**kwargs)
260
+ else:
261
+ raise NotImplementedError