YinuoGuo27 commited on
Commit
fe29479
·
verified ·
1 Parent(s): c55d2d9

Update difpoint/src/models/predictor.py

Browse files
Files changed (1) hide show
  1. difpoint/src/models/predictor.py +266 -263
difpoint/src/models/predictor.py CHANGED
@@ -1,263 +1,266 @@
1
- import pdb
2
- import threading
3
- import os
4
- import time
5
-
6
- import numpy as np
7
- import onnxruntime
8
-
9
- import torch
10
- from torch.cuda import nvtx
11
- from collections import OrderedDict
12
- import platform
13
-
14
- import spaces
15
-
16
- try:
17
- import tensorrt as trt
18
- import ctypes
19
- except ModuleNotFoundError:
20
- print("No TensorRT Found")
21
-
22
- numpy_to_torch_dtype_dict = {
23
- np.uint8: torch.uint8,
24
- np.int8: torch.int8,
25
- np.int16: torch.int16,
26
- np.int32: torch.int32,
27
- np.int64: torch.int64,
28
- np.float16: torch.float16,
29
- np.float32: torch.float32,
30
- np.float64: torch.float64,
31
- np.complex64: torch.complex64,
32
- np.complex128: torch.complex128,
33
- }
34
- if np.version.full_version >= "1.24.0":
35
- numpy_to_torch_dtype_dict[np.bool_] = torch.bool
36
- else:
37
- numpy_to_torch_dtype_dict[np.bool] = torch.bool
38
-
39
-
40
- class TensorRTPredictor:
41
- """
42
- Implements inference for the EfficientDet TensorRT engine.
43
- """
44
- @spaces.GPU
45
- def __init__(self, **kwargs):
46
- """
47
- :param engine_path: The path to the serialized engine to load from disk.
48
- """
49
- if platform.system().lower() == 'linux':
50
- ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/libgrid_sample_3d_plugin.so", mode=ctypes.RTLD_GLOBAL)
51
- else:
52
- ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/grid_sample_3d_plugin.dll", mode=ctypes.RTLD_GLOBAL)
53
- # Load TRT engine
54
- self.logger = trt.Logger(trt.Logger.VERBOSE)
55
- trt.init_libnvinfer_plugins(self.logger, "")
56
- engine_path = os.path.abspath(kwargs.get("model_path", None))
57
- print('engine_path', engine_path)
58
- self.debug = kwargs.get("debug", False)
59
- assert engine_path, f"model:{engine_path} must exist!"
60
- print(f"loading trt model:{engine_path}")
61
- with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
62
- assert runtime
63
- self.engine = runtime.deserialize_cuda_engine(f.read())
64
- print('self.engine', self.engine)
65
- assert self.engine
66
- self.context = self.engine.create_execution_context()
67
- assert self.context
68
-
69
- # Setup I/O bindings
70
- self.inputs = []
71
- self.outputs = []
72
- self.tensors = OrderedDict()
73
-
74
- # TODO: 支持动态shape输入
75
- for idx in range(self.engine.num_io_tensors):
76
- name = self.engine[idx]
77
- is_input = self.engine.get_tensor_mode(name).name == "INPUT"
78
- shape = self.engine.get_tensor_shape(name)
79
- dtype = trt.nptype(self.engine.get_tensor_dtype(name))
80
-
81
- binding = {
82
- "index": idx,
83
- "name": name,
84
- "dtype": dtype,
85
- "shape": list(shape)
86
- }
87
- if is_input:
88
- self.inputs.append(binding)
89
- else:
90
- self.outputs.append(binding)
91
-
92
- assert len(self.inputs) > 0
93
- assert len(self.outputs) > 0
94
- self.allocate_max_buffers()
95
-
96
- def allocate_max_buffers(self, device="cuda"):
97
- nvtx.range_push("allocate_max_buffers")
98
- # 目前仅支持 batch 维度的动态处理
99
- batch_size = 1
100
- for idx in range(self.engine.num_io_tensors):
101
- binding = self.engine[idx]
102
- shape = self.engine.get_tensor_shape(binding)
103
- is_input = self.engine.get_tensor_mode(binding).name == "INPUT"
104
- if -1 in shape:
105
- if is_input:
106
- shape = self.engine.get_tensor_profile_shape(binding, 0)[-1]
107
- batch_size = shape[0]
108
- else:
109
- shape[0] = batch_size
110
- dtype = trt.nptype(self.engine.get_tensor_dtype(binding))
111
- tensor = torch.empty(
112
- tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
113
- ).to(device=device)
114
- self.tensors[binding] = tensor
115
- nvtx.range_pop()
116
-
117
- def input_spec(self):
118
- """
119
- Get the specs for the input tensor of the network. Useful to prepare memory allocations.
120
- :return: Two items, the shape of the input tensor and its (numpy) datatype.
121
- """
122
- specs = []
123
- for i, o in enumerate(self.inputs):
124
- specs.append((o["name"], o['shape'], o['dtype']))
125
- if self.debug:
126
- print(f"trt input {i} -> {o['name']} -> {o['shape']}")
127
- return specs
128
-
129
- def output_spec(self):
130
- """
131
- Get the specs for the output tensors of the network. Useful to prepare memory allocations.
132
- :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
133
- """
134
- specs = []
135
- for i, o in enumerate(self.outputs):
136
- specs.append((o["name"], o['shape'], o['dtype']))
137
- if self.debug:
138
- print(f"trt output {i} -> {o['name']} -> {o['shape']}")
139
- return specs
140
-
141
- def adjust_buffer(self, feed_dict):
142
- nvtx.range_push("adjust_buffer")
143
- for name, buf in feed_dict.items():
144
- input_tensor = self.tensors[name]
145
- current_shape = list(buf.shape)
146
- slices = tuple(slice(0, dim) for dim in current_shape)
147
- input_tensor[slices].copy_(buf)
148
- self.context.set_input_shape(name, current_shape)
149
- nvtx.range_pop()
150
-
151
- def predict(self, feed_dict, stream):
152
- """
153
- Execute inference on a batch of images.
154
- :param data: A list of inputs as numpy arrays.
155
- :return A list of outputs as numpy arrays.
156
- """
157
- nvtx.range_push("set_tensors")
158
- self.adjust_buffer(feed_dict)
159
- for name, tensor in self.tensors.items():
160
- self.context.set_tensor_address(name, tensor.data_ptr())
161
- nvtx.range_pop()
162
- nvtx.range_push("execute")
163
- noerror = self.context.execute_async_v3(stream)
164
- if not noerror:
165
- raise ValueError("ERROR: inference failed.")
166
- nvtx.range_pop()
167
- return self.tensors
168
-
169
- def __del__(self):
170
- del self.engine
171
- del self.context
172
- del self.inputs
173
- del self.outputs
174
- del self.tensors
175
-
176
- class OnnxRuntimePredictor:
177
- """
178
- OnnxRuntime Prediction
179
- """
180
-
181
- def __init__(self, **kwargs):
182
- model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
183
- assert os.path.exists(model_path), "model path must exist!"
184
- # print("loading ort model:{}".format(model_path))
185
- self.debug = kwargs.get("debug", False)
186
- providers = ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
187
-
188
- print(f"OnnxRuntime use {providers}")
189
- opts = onnxruntime.SessionOptions()
190
- # opts.inter_op_num_threads = kwargs.get("num_threads", 4)
191
- # opts.intra_op_num_threads = kwargs.get("num_threads", 4)
192
- # opts.log_severity_level = 3
193
-
194
- self.onnx_model = onnxruntime.InferenceSession(model_path, providers=providers, sess_options=opts)
195
- self.inputs = self.onnx_model.get_inputs()
196
- self.outputs = self.onnx_model.get_outputs()
197
-
198
- def input_spec(self):
199
- """
200
- Get the specs for the input tensor of the network. Useful to prepare memory allocations.
201
- :return: Two items, the shape of the input tensor and its (numpy) datatype.
202
- """
203
- specs = []
204
- for i, o in enumerate(self.inputs):
205
- specs.append((o.name, o.shape, o.type))
206
- if self.debug:
207
- print(f"ort {i} -> {o.name} -> {o.shape}")
208
- return specs
209
-
210
- def output_spec(self):
211
- """
212
- Get the specs for the output tensors of the network. Useful to prepare memory allocations.
213
- :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
214
- """
215
- specs = []
216
- for i, o in enumerate(self.outputs):
217
- specs.append((o.name, o.shape, o.type))
218
- if self.debug:
219
- print(f"ort output {i} -> {o.name} -> {o.shape}")
220
- return specs
221
-
222
- def predict(self, *data):
223
- input_feeds = {}
224
- for i in range(len(data)):
225
- if self.inputs[i].type == 'tensor(float16)':
226
- input_feeds[self.inputs[i].name] = data[i].astype(np.float16)
227
- else:
228
- input_feeds[self.inputs[i].name] = data[i].astype(np.float32)
229
- results = self.onnx_model.run(None, input_feeds)
230
- return results
231
-
232
- def __del__(self):
233
- del self.onnx_model
234
- self.onnx_model = None
235
-
236
-
237
- class OnnxRuntimePredictorSingleton(OnnxRuntimePredictor):
238
- """
239
- 单例模式,防止模型被加载多次
240
- """
241
- _instance_lock = threading.Lock()
242
- _instance = {}
243
-
244
- def __new__(cls, *args, **kwargs):
245
- model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
246
- assert os.path.exists(model_path), "model path must exist!"
247
- # 单例模式,避免重复加载模型
248
- with OnnxRuntimePredictorSingleton._instance_lock:
249
- if model_path not in OnnxRuntimePredictorSingleton._instance or \
250
- OnnxRuntimePredictorSingleton._instance[model_path].onnx_model is None:
251
- OnnxRuntimePredictorSingleton._instance[model_path] = OnnxRuntimePredictor(**kwargs)
252
-
253
- return OnnxRuntimePredictorSingleton._instance[model_path]
254
-
255
-
256
- def get_predictor(**kwargs):
257
- predict_type = kwargs.get("predict_type", "trt")
258
- if predict_type == "ort":
259
- return OnnxRuntimePredictorSingleton(**kwargs)
260
- elif predict_type == "trt":
261
- return TensorRTPredictor(**kwargs)
262
- else:
263
- raise NotImplementedError
 
 
 
 
1
+ import pdb
2
+ import threading
3
+ import os
4
+ import time
5
+
6
+ import numpy as np
7
+ import onnxruntime
8
+
9
+ import torch
10
+ from torch.cuda import nvtx
11
+ from collections import OrderedDict
12
+ import platform
13
+
14
+ import spaces
15
+
16
+ try:
17
+ import tensorrt as trt
18
+ import ctypes
19
+ except ModuleNotFoundError:
20
+ print("No TensorRT Found")
21
+
22
+ numpy_to_torch_dtype_dict = {
23
+ np.uint8: torch.uint8,
24
+ np.int8: torch.int8,
25
+ np.int16: torch.int16,
26
+ np.int32: torch.int32,
27
+ np.int64: torch.int64,
28
+ np.float16: torch.float16,
29
+ np.float32: torch.float32,
30
+ np.float64: torch.float64,
31
+ np.complex64: torch.complex64,
32
+ np.complex128: torch.complex128,
33
+ }
34
+ if np.version.full_version >= "1.24.0":
35
+ numpy_to_torch_dtype_dict[np.bool_] = torch.bool
36
+ else:
37
+ numpy_to_torch_dtype_dict[np.bool] = torch.bool
38
+
39
+
40
+ class TensorRTPredictor:
41
+ """
42
+ Implements inference for the EfficientDet TensorRT engine.
43
+ """
44
+ @spaces.GPU
45
+ def __init__(self, **kwargs):
46
+ """
47
+ :param engine_path: The path to the serialized engine to load from disk.
48
+ """
49
+ if platform.system().lower() == 'linux':
50
+ ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/libgrid_sample_3d_plugin.so", mode=ctypes.RTLD_GLOBAL)
51
+ else:
52
+ ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/grid_sample_3d_plugin.dll", mode=ctypes.RTLD_GLOBAL)
53
+ # Load TRT engine
54
+ self.logger = trt.Logger(trt.Logger.VERBOSE)
55
+ trt.init_libnvinfer_plugins(self.logger, "")
56
+ engine_path = os.path.abspath(kwargs.get("model_path", None))
57
+ print('engine_path', engine_path)
58
+ self.debug = kwargs.get("debug", False)
59
+ assert engine_path, f"model:{engine_path} must exist!"
60
+ print(f"loading trt model:{engine_path}")
61
+ with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
62
+ assert runtime
63
+ self.engine = runtime.deserialize_cuda_engine(f.read())
64
+ print('self.engine', self.engine)
65
+ assert self.engine
66
+ self.context = self.engine.create_execution_context()
67
+ assert self.context
68
+
69
+ # Setup I/O bindings
70
+ self.inputs = []
71
+ self.outputs = []
72
+ self.tensors = OrderedDict()
73
+
74
+ # TODO: 支持动态shape输入
75
+ for idx in range(self.engine.num_io_tensors):
76
+ name = self.engine[idx]
77
+ is_input = self.engine.get_tensor_mode(name).name == "INPUT"
78
+ shape = self.engine.get_tensor_shape(name)
79
+ dtype = trt.nptype(self.engine.get_tensor_dtype(name))
80
+
81
+ binding = {
82
+ "index": idx,
83
+ "name": name,
84
+ "dtype": dtype,
85
+ "shape": list(shape)
86
+ }
87
+ if is_input:
88
+ self.inputs.append(binding)
89
+ else:
90
+ self.outputs.append(binding)
91
+
92
+ assert len(self.inputs) > 0
93
+ assert len(self.outputs) > 0
94
+ self.allocate_max_buffers()
95
+
96
+ def allocate_max_buffers(self, device="cuda"):
97
+ nvtx.range_push("allocate_max_buffers")
98
+ # 目前仅支持 batch 维度的动态处理
99
+ batch_size = 1
100
+ for idx in range(self.engine.num_io_tensors):
101
+ binding = self.engine[idx]
102
+ shape = self.engine.get_tensor_shape(binding)
103
+ is_input = self.engine.get_tensor_mode(binding).name == "INPUT"
104
+ if -1 in shape:
105
+ if is_input:
106
+ shape = self.engine.get_tensor_profile_shape(binding, 0)[-1]
107
+ batch_size = shape[0]
108
+ else:
109
+ shape[0] = batch_size
110
+ dtype = trt.nptype(self.engine.get_tensor_dtype(binding))
111
+ tensor = torch.empty(
112
+ tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
113
+ ).to(device=device)
114
+ self.tensors[binding] = tensor
115
+ nvtx.range_pop()
116
+
117
+ def input_spec(self):
118
+ """
119
+ Get the specs for the input tensor of the network. Useful to prepare memory allocations.
120
+ :return: Two items, the shape of the input tensor and its (numpy) datatype.
121
+ """
122
+ specs = []
123
+ for i, o in enumerate(self.inputs):
124
+ specs.append((o["name"], o['shape'], o['dtype']))
125
+ if self.debug:
126
+ print(f"trt input {i} -> {o['name']} -> {o['shape']}")
127
+ return specs
128
+
129
+ def output_spec(self):
130
+ """
131
+ Get the specs for the output tensors of the network. Useful to prepare memory allocations.
132
+ :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
133
+ """
134
+ specs = []
135
+ for i, o in enumerate(self.outputs):
136
+ specs.append((o["name"], o['shape'], o['dtype']))
137
+ if self.debug:
138
+ print(f"trt output {i} -> {o['name']} -> {o['shape']}")
139
+ return specs
140
+
141
+ def adjust_buffer(self, feed_dict):
142
+ nvtx.range_push("adjust_buffer")
143
+ for name, buf in feed_dict.items():
144
+ input_tensor = self.tensors[name]
145
+ current_shape = list(buf.shape)
146
+ slices = tuple(slice(0, dim) for dim in current_shape)
147
+ input_tensor[slices].copy_(buf)
148
+ self.context.set_input_shape(name, current_shape)
149
+ nvtx.range_pop()
150
+
151
+ def predict(self, feed_dict, stream):
152
+ """
153
+ Execute inference on a batch of images.
154
+ :param data: A list of inputs as numpy arrays.
155
+ :return A list of outputs as numpy arrays.
156
+ """
157
+ nvtx.range_push("set_tensors")
158
+ self.adjust_buffer(feed_dict)
159
+ for name, tensor in self.tensors.items():
160
+ self.context.set_tensor_address(name, tensor.data_ptr())
161
+ nvtx.range_pop()
162
+ nvtx.range_push("execute")
163
+ noerror = self.context.execute_async_v3(stream)
164
+ if not noerror:
165
+ raise ValueError("ERROR: inference failed.")
166
+ nvtx.range_pop()
167
+ return self.tensors
168
+
169
+ def __del__(self):
170
+ del self.engine
171
+ del self.context
172
+ del self.inputs
173
+ del self.outputs
174
+ del self.tensors
175
+
176
+ class OnnxRuntimePredictor:
177
+ """
178
+ OnnxRuntime Prediction
179
+ """
180
+
181
+ def __init__(self, **kwargs):
182
+ model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
183
+ assert os.path.exists(model_path), "model path must exist!"
184
+ # print("loading ort model:{}".format(model_path))
185
+ self.debug = kwargs.get("debug", False)
186
+ providers = ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
187
+
188
+ print(f"OnnxRuntime use {providers}")
189
+ opts = onnxruntime.SessionOptions()
190
+ # opts.inter_op_num_threads = kwargs.get("num_threads", 4)
191
+ # opts.intra_op_num_threads = kwargs.get("num_threads", 4)
192
+ # opts.log_severity_level = 3
193
+
194
+ self.onnx_model = onnxruntime.InferenceSession(model_path, providers=providers, sess_options=opts)
195
+ self.inputs = self.onnx_model.get_inputs()
196
+ self.outputs = self.onnx_model.get_outputs()
197
+
198
+ def input_spec(self):
199
+ """
200
+ Get the specs for the input tensor of the network. Useful to prepare memory allocations.
201
+ :return: Two items, the shape of the input tensor and its (numpy) datatype.
202
+ """
203
+ specs = []
204
+ for i, o in enumerate(self.inputs):
205
+ specs.append((o.name, o.shape, o.type))
206
+ if self.debug:
207
+ print(f"ort {i} -> {o.name} -> {o.shape}")
208
+ return specs
209
+
210
+ def output_spec(self):
211
+ """
212
+ Get the specs for the output tensors of the network. Useful to prepare memory allocations.
213
+ :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
214
+ """
215
+ specs = []
216
+ for i, o in enumerate(self.outputs):
217
+ specs.append((o.name, o.shape, o.type))
218
+ if self.debug:
219
+ print(f"ort output {i} -> {o.name} -> {o.shape}")
220
+ return specs
221
+
222
+ def predict(self, *data):
223
+ input_feeds = {}
224
+ for i in range(len(data)):
225
+ if self.inputs[i].type == 'tensor(float16)':
226
+ input_feeds[self.inputs[i].name] = data[i].astype(np.float16)
227
+ else:
228
+ try:
229
+ input_feeds[self.inputs[i].name] = data[i].astype(np.float32)
230
+ except:
231
+ input_feeds[self.inputs[i].name] = data[i].cpu().numpy().astype(np.float32)
232
+ results = self.onnx_model.run(None, input_feeds)
233
+ return results
234
+
235
+ def __del__(self):
236
+ del self.onnx_model
237
+ self.onnx_model = None
238
+
239
+
240
+ class OnnxRuntimePredictorSingleton(OnnxRuntimePredictor):
241
+ """
242
+ 单例模式,防止模型被加载多次
243
+ """
244
+ _instance_lock = threading.Lock()
245
+ _instance = {}
246
+
247
+ def __new__(cls, *args, **kwargs):
248
+ model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
249
+ assert os.path.exists(model_path), "model path must exist!"
250
+ # 单例模式,避免重复加载模型
251
+ with OnnxRuntimePredictorSingleton._instance_lock:
252
+ if model_path not in OnnxRuntimePredictorSingleton._instance or \
253
+ OnnxRuntimePredictorSingleton._instance[model_path].onnx_model is None:
254
+ OnnxRuntimePredictorSingleton._instance[model_path] = OnnxRuntimePredictor(**kwargs)
255
+
256
+ return OnnxRuntimePredictorSingleton._instance[model_path]
257
+
258
+
259
+ def get_predictor(**kwargs):
260
+ predict_type = kwargs.get("predict_type", "trt")
261
+ if predict_type == "ort":
262
+ return OnnxRuntimePredictorSingleton(**kwargs)
263
+ elif predict_type == "trt":
264
+ return TensorRTPredictor(**kwargs)
265
+ else:
266
+ raise NotImplementedError