YinuoGuo27 commited on
Commit
c55d2d9
·
verified ·
1 Parent(s): 267de73

Upload predictor.py

Browse files
Files changed (1) hide show
  1. difpoint/src/models/predictor.py +263 -275
difpoint/src/models/predictor.py CHANGED
@@ -1,275 +1,263 @@
1
- import pdb
2
- import os
3
- import time
4
-
5
- import numpy as np
6
- import onnxruntime
7
-
8
- import torch
9
- from torch.cuda import nvtx
10
- from collections import OrderedDict
11
- import platform
12
-
13
- try:
14
- import tensorrt as trt
15
- import ctypes
16
- except ModuleNotFoundError:
17
- print("No TensorRT Found")
18
-
19
- numpy_to_torch_dtype_dict = {
20
- np.uint8: torch.uint8,
21
- np.int8: torch.int8,
22
- np.int16: torch.int16,
23
- np.int32: torch.int32,
24
- np.int64: torch.int64,
25
- np.float16: torch.float16,
26
- np.float32: torch.float32,
27
- np.float64: torch.float64,
28
- np.complex64: torch.complex64,
29
- np.complex128: torch.complex128,
30
- }
31
- if np.version.full_version >= "1.24.0":
32
- numpy_to_torch_dtype_dict[np.bool_] = torch.bool
33
- else:
34
- numpy_to_torch_dtype_dict[np.bool] = torch.bool
35
-
36
-
37
- class TensorRTPredictor:
38
- """
39
- Implements inference for the EfficientDet TensorRT engine.
40
- """
41
- def __init__(self, **kwargs):
42
- """
43
- :param engine_path: The path to the serialized engine to load from disk.
44
- """
45
- if platform.system().lower() == 'linux':
46
- ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/libgrid_sample_3d_plugin.so", mode=ctypes.RTLD_GLOBAL)
47
- else:
48
- ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/grid_sample_3d_plugin.dll", mode=ctypes.RTLD_GLOBAL)
49
- # Load TRT engine
50
- self.logger = trt.Logger(trt.Logger.VERBOSE)
51
- trt.init_libnvinfer_plugins(self.logger, "")
52
- engine_path = os.path.abspath(kwargs.get("model_path", None))
53
- print('engine_path', engine_path)
54
- self.debug = kwargs.get("debug", False)
55
- assert engine_path, f"model:{engine_path} must exist!"
56
- print(f"loading trt model:{engine_path}")
57
- with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
58
- assert runtime
59
- self.engine = runtime.deserialize_cuda_engine(f.read())
60
- print('self.engine', self.engine)
61
- assert self.engine
62
- self.context = self.engine.create_execution_context()
63
- assert self.context
64
-
65
- # Setup I/O bindings
66
- self.inputs = []
67
- self.outputs = []
68
- self.tensors = OrderedDict()
69
-
70
- # TODO: 支持动态shape输入
71
- for idx in range(self.engine.num_io_tensors):
72
- name = self.engine[idx]
73
- is_input = self.engine.get_tensor_mode(name).name == "INPUT"
74
- shape = self.engine.get_tensor_shape(name)
75
- dtype = trt.nptype(self.engine.get_tensor_dtype(name))
76
-
77
- binding = {
78
- "index": idx,
79
- "name": name,
80
- "dtype": dtype,
81
- "shape": list(shape)
82
- }
83
- if is_input:
84
- self.inputs.append(binding)
85
- else:
86
- self.outputs.append(binding)
87
-
88
- assert len(self.inputs) > 0
89
- assert len(self.outputs) > 0
90
- self.allocate_max_buffers()
91
-
92
- def allocate_max_buffers(self, device="cuda"):
93
- nvtx.range_push("allocate_max_buffers")
94
- # 目前仅支持 batch 维度的动态处理
95
- batch_size = 1
96
- for idx in range(self.engine.num_io_tensors):
97
- binding = self.engine[idx]
98
- shape = self.engine.get_tensor_shape(binding)
99
- is_input = self.engine.get_tensor_mode(binding).name == "INPUT"
100
- if -1 in shape:
101
- if is_input:
102
- shape = self.engine.get_tensor_profile_shape(binding, 0)[-1]
103
- batch_size = shape[0]
104
- else:
105
- shape[0] = batch_size
106
- dtype = trt.nptype(self.engine.get_tensor_dtype(binding))
107
- tensor = torch.empty(
108
- tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
109
- ).to(device=device)
110
- self.tensors[binding] = tensor
111
- nvtx.range_pop()
112
-
113
- def input_spec(self):
114
- """
115
- Get the specs for the input tensor of the network. Useful to prepare memory allocations.
116
- :return: Two items, the shape of the input tensor and its (numpy) datatype.
117
- """
118
- specs = []
119
- for i, o in enumerate(self.inputs):
120
- specs.append((o["name"], o['shape'], o['dtype']))
121
- if self.debug:
122
- print(f"trt input {i} -> {o['name']} -> {o['shape']}")
123
- return specs
124
-
125
- def output_spec(self):
126
- """
127
- Get the specs for the output tensors of the network. Useful to prepare memory allocations.
128
- :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
129
- """
130
- specs = []
131
- for i, o in enumerate(self.outputs):
132
- specs.append((o["name"], o['shape'], o['dtype']))
133
- if self.debug:
134
- print(f"trt output {i} -> {o['name']} -> {o['shape']}")
135
- return specs
136
-
137
- def adjust_buffer(self, feed_dict):
138
- nvtx.range_push("adjust_buffer")
139
- for name, buf in feed_dict.items():
140
- input_tensor = self.tensors[name]
141
- current_shape = list(buf.shape)
142
- slices = tuple(slice(0, dim) for dim in current_shape)
143
- input_tensor[slices].copy_(buf)
144
- self.context.set_input_shape(name, current_shape)
145
- nvtx.range_pop()
146
-
147
- def predict(self, feed_dict, stream):
148
- """
149
- Execute inference on a batch of images.
150
- :param data: A list of inputs as numpy arrays.
151
- :return A list of outputs as numpy arrays.
152
- """
153
- nvtx.range_push("set_tensors")
154
- self.adjust_buffer(feed_dict)
155
- for name, tensor in self.tensors.items():
156
- self.context.set_tensor_address(name, tensor.data_ptr())
157
- nvtx.range_pop()
158
- nvtx.range_push("execute")
159
- noerror = self.context.execute_async_v3(stream)
160
- if not noerror:
161
- raise ValueError("ERROR: inference failed.")
162
- nvtx.range_pop()
163
- return self.tensors
164
-
165
- def __del__(self):
166
- del self.engine
167
- del self.context
168
- del self.inputs
169
- del self.outputs
170
- del self.tensors
171
-
172
- class OnnxRuntimePredictor:
173
- """
174
- OnnxRuntime Prediction
175
- """
176
-
177
- def __init__(self, **kwargs):
178
- model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
179
- assert os.path.exists(model_path), "model path must exist!"
180
- # print("loading ort model:{}".format(model_path))
181
- self.debug = kwargs.get("debug", False)
182
- providers = ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
183
-
184
- print(f"OnnxRuntime use {providers}")
185
- opts = onnxruntime.SessionOptions()
186
- # opts.inter_op_num_threads = kwargs.get("num_threads", 4)
187
- # opts.intra_op_num_threads = kwargs.get("num_threads", 4)
188
- # opts.log_severity_level = 3
189
- #self.onnx_model = onnxruntime.InferenceSession(model_path, providers=providers, sess_options=opts)
190
- #self.inputs = self.onnx_model.get_inputs()
191
- #self.outputs = self.onnx_model.get_outputs()
192
- self.onnx_model = None
193
- self.inputs = []
194
- self.outputs = []
195
-
196
- def _load_model(self):
197
- """Lazy initialization of the ONNX model (only when needed)."""
198
- if self.onnx_model is None:
199
- providers = ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
200
- print(f"OnnxRuntime use {providers}")
201
- opts = onnxruntime.SessionOptions()
202
- self.onnx_model = onnxruntime.InferenceSession(self.model_path, providers=providers, sess_options=opts)
203
- self.inputs = self.onnx_model.get_inputs()
204
- self.outputs = self.onnx_model.get_outputs()
205
-
206
- def input_spec(self):
207
- """
208
- Get the specs for the input tensor of the network. Useful to prepare memory allocations.
209
- :return: Two items, the shape of the input tensor and its (numpy) datatype.
210
- """
211
- specs = []
212
- for i, o in enumerate(self.inputs):
213
- specs.append((o.name, o.shape, o.type))
214
- if self.debug:
215
- print(f"ort {i} -> {o.name} -> {o.shape}")
216
- return specs
217
-
218
- def output_spec(self):
219
- """
220
- Get the specs for the output tensors of the network. Useful to prepare memory allocations.
221
- :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
222
- """
223
- specs = []
224
- for i, o in enumerate(self.outputs):
225
- specs.append((o.name, o.shape, o.type))
226
- if self.debug:
227
- print(f"ort output {i} -> {o.name} -> {o.shape}")
228
- return specs
229
-
230
- def predict(self, *data):
231
- self._load_model()
232
- input_feeds = {}
233
- for i in range(len(data)):
234
- if self.inputs[i].type == 'tensor(float16)':
235
- input_feeds[self.inputs[i].name] = data[i].astype(np.float16)
236
- else:
237
- try:
238
- input_feeds[self.inputs[i].name] = data[i].astype(np.float32)
239
- except:
240
- input_feeds[self.inputs[i].name] = data[i].cpu().numpy().astype(np.float32)
241
- results = self.onnx_model.run(None, input_feeds)
242
- return results
243
-
244
- def __del__(self):
245
- del self.onnx_model
246
- self.onnx_model = None
247
-
248
-
249
- class OnnxRuntimePredictorSingleton(OnnxRuntimePredictor):
250
- """
251
- 单例模式,防止模型被加载多次
252
- """
253
- #_instance_lock = threading.Lock()
254
- #_instance = {}
255
-
256
- def __new__(cls, *args, **kwargs):
257
- model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
258
- assert os.path.exists(model_path), "model path must exist!"
259
- # 单例模式,避免重复加载模型
260
- #with OnnxRuntimePredictorSingleton._instance_lock:
261
- #if model_path not in OnnxRuntimePredictorSingleton._instance or \
262
- #OnnxRuntimePredictorSingleton._instance[model_path].onnx_model is None:
263
- #OnnxRuntimePredictorSingleton._instance[model_path] = OnnxRuntimePredictor(**kwargs)
264
-
265
- #return OnnxRuntimePredictorSingleton._instance[model_path]
266
- return OnnxRuntimePredictor(**kwargs)
267
-
268
- def get_predictor(**kwargs):
269
- predict_type = kwargs.get("predict_type", "trt")
270
- if predict_type == "ort":
271
- return OnnxRuntimePredictorSingleton(**kwargs)
272
- elif predict_type == "trt":
273
- return TensorRTPredictor(**kwargs)
274
- else:
275
- raise NotImplementedError
 
1
+ import pdb
2
+ import threading
3
+ import os
4
+ import time
5
+
6
+ import numpy as np
7
+ import onnxruntime
8
+
9
+ import torch
10
+ from torch.cuda import nvtx
11
+ from collections import OrderedDict
12
+ import platform
13
+
14
+ import spaces
15
+
16
+ try:
17
+ import tensorrt as trt
18
+ import ctypes
19
+ except ModuleNotFoundError:
20
+ print("No TensorRT Found")
21
+
22
+ numpy_to_torch_dtype_dict = {
23
+ np.uint8: torch.uint8,
24
+ np.int8: torch.int8,
25
+ np.int16: torch.int16,
26
+ np.int32: torch.int32,
27
+ np.int64: torch.int64,
28
+ np.float16: torch.float16,
29
+ np.float32: torch.float32,
30
+ np.float64: torch.float64,
31
+ np.complex64: torch.complex64,
32
+ np.complex128: torch.complex128,
33
+ }
34
+ if np.version.full_version >= "1.24.0":
35
+ numpy_to_torch_dtype_dict[np.bool_] = torch.bool
36
+ else:
37
+ numpy_to_torch_dtype_dict[np.bool] = torch.bool
38
+
39
+
40
+ class TensorRTPredictor:
41
+ """
42
+ Implements inference for the EfficientDet TensorRT engine.
43
+ """
44
+ @spaces.GPU
45
+ def __init__(self, **kwargs):
46
+ """
47
+ :param engine_path: The path to the serialized engine to load from disk.
48
+ """
49
+ if platform.system().lower() == 'linux':
50
+ ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/libgrid_sample_3d_plugin.so", mode=ctypes.RTLD_GLOBAL)
51
+ else:
52
+ ctypes.CDLL("./difpoint/checkpoints/liveportrait_onnx/grid_sample_3d_plugin.dll", mode=ctypes.RTLD_GLOBAL)
53
+ # Load TRT engine
54
+ self.logger = trt.Logger(trt.Logger.VERBOSE)
55
+ trt.init_libnvinfer_plugins(self.logger, "")
56
+ engine_path = os.path.abspath(kwargs.get("model_path", None))
57
+ print('engine_path', engine_path)
58
+ self.debug = kwargs.get("debug", False)
59
+ assert engine_path, f"model:{engine_path} must exist!"
60
+ print(f"loading trt model:{engine_path}")
61
+ with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
62
+ assert runtime
63
+ self.engine = runtime.deserialize_cuda_engine(f.read())
64
+ print('self.engine', self.engine)
65
+ assert self.engine
66
+ self.context = self.engine.create_execution_context()
67
+ assert self.context
68
+
69
+ # Setup I/O bindings
70
+ self.inputs = []
71
+ self.outputs = []
72
+ self.tensors = OrderedDict()
73
+
74
+ # TODO: 支持动态shape输入
75
+ for idx in range(self.engine.num_io_tensors):
76
+ name = self.engine[idx]
77
+ is_input = self.engine.get_tensor_mode(name).name == "INPUT"
78
+ shape = self.engine.get_tensor_shape(name)
79
+ dtype = trt.nptype(self.engine.get_tensor_dtype(name))
80
+
81
+ binding = {
82
+ "index": idx,
83
+ "name": name,
84
+ "dtype": dtype,
85
+ "shape": list(shape)
86
+ }
87
+ if is_input:
88
+ self.inputs.append(binding)
89
+ else:
90
+ self.outputs.append(binding)
91
+
92
+ assert len(self.inputs) > 0
93
+ assert len(self.outputs) > 0
94
+ self.allocate_max_buffers()
95
+
96
+ def allocate_max_buffers(self, device="cuda"):
97
+ nvtx.range_push("allocate_max_buffers")
98
+ # 目前仅支持 batch 维度的动态处理
99
+ batch_size = 1
100
+ for idx in range(self.engine.num_io_tensors):
101
+ binding = self.engine[idx]
102
+ shape = self.engine.get_tensor_shape(binding)
103
+ is_input = self.engine.get_tensor_mode(binding).name == "INPUT"
104
+ if -1 in shape:
105
+ if is_input:
106
+ shape = self.engine.get_tensor_profile_shape(binding, 0)[-1]
107
+ batch_size = shape[0]
108
+ else:
109
+ shape[0] = batch_size
110
+ dtype = trt.nptype(self.engine.get_tensor_dtype(binding))
111
+ tensor = torch.empty(
112
+ tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
113
+ ).to(device=device)
114
+ self.tensors[binding] = tensor
115
+ nvtx.range_pop()
116
+
117
+ def input_spec(self):
118
+ """
119
+ Get the specs for the input tensor of the network. Useful to prepare memory allocations.
120
+ :return: Two items, the shape of the input tensor and its (numpy) datatype.
121
+ """
122
+ specs = []
123
+ for i, o in enumerate(self.inputs):
124
+ specs.append((o["name"], o['shape'], o['dtype']))
125
+ if self.debug:
126
+ print(f"trt input {i} -> {o['name']} -> {o['shape']}")
127
+ return specs
128
+
129
+ def output_spec(self):
130
+ """
131
+ Get the specs for the output tensors of the network. Useful to prepare memory allocations.
132
+ :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
133
+ """
134
+ specs = []
135
+ for i, o in enumerate(self.outputs):
136
+ specs.append((o["name"], o['shape'], o['dtype']))
137
+ if self.debug:
138
+ print(f"trt output {i} -> {o['name']} -> {o['shape']}")
139
+ return specs
140
+
141
+ def adjust_buffer(self, feed_dict):
142
+ nvtx.range_push("adjust_buffer")
143
+ for name, buf in feed_dict.items():
144
+ input_tensor = self.tensors[name]
145
+ current_shape = list(buf.shape)
146
+ slices = tuple(slice(0, dim) for dim in current_shape)
147
+ input_tensor[slices].copy_(buf)
148
+ self.context.set_input_shape(name, current_shape)
149
+ nvtx.range_pop()
150
+
151
+ def predict(self, feed_dict, stream):
152
+ """
153
+ Execute inference on a batch of images.
154
+ :param data: A list of inputs as numpy arrays.
155
+ :return A list of outputs as numpy arrays.
156
+ """
157
+ nvtx.range_push("set_tensors")
158
+ self.adjust_buffer(feed_dict)
159
+ for name, tensor in self.tensors.items():
160
+ self.context.set_tensor_address(name, tensor.data_ptr())
161
+ nvtx.range_pop()
162
+ nvtx.range_push("execute")
163
+ noerror = self.context.execute_async_v3(stream)
164
+ if not noerror:
165
+ raise ValueError("ERROR: inference failed.")
166
+ nvtx.range_pop()
167
+ return self.tensors
168
+
169
+ def __del__(self):
170
+ del self.engine
171
+ del self.context
172
+ del self.inputs
173
+ del self.outputs
174
+ del self.tensors
175
+
176
+ class OnnxRuntimePredictor:
177
+ """
178
+ OnnxRuntime Prediction
179
+ """
180
+
181
+ def __init__(self, **kwargs):
182
+ model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
183
+ assert os.path.exists(model_path), "model path must exist!"
184
+ # print("loading ort model:{}".format(model_path))
185
+ self.debug = kwargs.get("debug", False)
186
+ providers = ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
187
+
188
+ print(f"OnnxRuntime use {providers}")
189
+ opts = onnxruntime.SessionOptions()
190
+ # opts.inter_op_num_threads = kwargs.get("num_threads", 4)
191
+ # opts.intra_op_num_threads = kwargs.get("num_threads", 4)
192
+ # opts.log_severity_level = 3
193
+
194
+ self.onnx_model = onnxruntime.InferenceSession(model_path, providers=providers, sess_options=opts)
195
+ self.inputs = self.onnx_model.get_inputs()
196
+ self.outputs = self.onnx_model.get_outputs()
197
+
198
+ def input_spec(self):
199
+ """
200
+ Get the specs for the input tensor of the network. Useful to prepare memory allocations.
201
+ :return: Two items, the shape of the input tensor and its (numpy) datatype.
202
+ """
203
+ specs = []
204
+ for i, o in enumerate(self.inputs):
205
+ specs.append((o.name, o.shape, o.type))
206
+ if self.debug:
207
+ print(f"ort {i} -> {o.name} -> {o.shape}")
208
+ return specs
209
+
210
+ def output_spec(self):
211
+ """
212
+ Get the specs for the output tensors of the network. Useful to prepare memory allocations.
213
+ :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
214
+ """
215
+ specs = []
216
+ for i, o in enumerate(self.outputs):
217
+ specs.append((o.name, o.shape, o.type))
218
+ if self.debug:
219
+ print(f"ort output {i} -> {o.name} -> {o.shape}")
220
+ return specs
221
+
222
+ def predict(self, *data):
223
+ input_feeds = {}
224
+ for i in range(len(data)):
225
+ if self.inputs[i].type == 'tensor(float16)':
226
+ input_feeds[self.inputs[i].name] = data[i].astype(np.float16)
227
+ else:
228
+ input_feeds[self.inputs[i].name] = data[i].astype(np.float32)
229
+ results = self.onnx_model.run(None, input_feeds)
230
+ return results
231
+
232
+ def __del__(self):
233
+ del self.onnx_model
234
+ self.onnx_model = None
235
+
236
+
237
+ class OnnxRuntimePredictorSingleton(OnnxRuntimePredictor):
238
+ """
239
+ 单例模式,防止模型被加载多次
240
+ """
241
+ _instance_lock = threading.Lock()
242
+ _instance = {}
243
+
244
+ def __new__(cls, *args, **kwargs):
245
+ model_path = kwargs.get("model_path", "") # 用模型路径区分是否是一样的实例
246
+ assert os.path.exists(model_path), "model path must exist!"
247
+ # 单例模式,避免重复加载模型
248
+ with OnnxRuntimePredictorSingleton._instance_lock:
249
+ if model_path not in OnnxRuntimePredictorSingleton._instance or \
250
+ OnnxRuntimePredictorSingleton._instance[model_path].onnx_model is None:
251
+ OnnxRuntimePredictorSingleton._instance[model_path] = OnnxRuntimePredictor(**kwargs)
252
+
253
+ return OnnxRuntimePredictorSingleton._instance[model_path]
254
+
255
+
256
+ def get_predictor(**kwargs):
257
+ predict_type = kwargs.get("predict_type", "trt")
258
+ if predict_type == "ort":
259
+ return OnnxRuntimePredictorSingleton(**kwargs)
260
+ elif predict_type == "trt":
261
+ return TensorRTPredictor(**kwargs)
262
+ else:
263
+ raise NotImplementedError