iswaalex commited on
Commit
0db70ac
·
verified ·
1 Parent(s): 2474210

Upload 8 files

Browse files
test_cpp/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ build
2
+ configure.txt
3
+ passContext.txt
test_cpp/CMakeLists.txt ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.5)
2
+
3
+ project(app VERSION 1.0.0 LANGUAGES C CXX)
4
+
5
+ set(RYZEN_AI_INSTALLATION_PATH $ENV{RYZEN_AI_INSTALLATION_PATH})
6
+ set(ONNXRUNTIME_ROOTDIR "${RYZEN_AI_INSTALLATION_PATH}/onnxruntime")
7
+ set(PYTHONHOME "$ENV{CONDA_PREFIX}")
8
+
9
+ message(STATUS "RYZEN_AI_INSTALLATION_PATH = ${RYZEN_AI_INSTALLATION_PATH}")
10
+ message(STATUS "ONNXRUNTIME_ROOTDIR = ${ONNXRUNTIME_ROOTDIR}")
11
+ message(STATUS "PYTHONHOME = ${PYTHONHOME}")
12
+
13
+ set(CMAKE_CONFIGURATION_TYPES Release)
14
+ set(CMAKE_CXX_STANDARD 17)
15
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
16
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
17
+ set(CMAKE_INSTALL_PREFIX .)
18
+ set(CMAKE_PREFIX_PATH .)
19
+ set(BUILD_SHARED_LIBS OFF)
20
+
21
+ add_compile_options(
22
+ /Zc:__cplusplus
23
+ /wd4100
24
+ /wd4996
25
+ /WX
26
+ /std:c++latest
27
+ )
28
+
29
+ include_directories(
30
+ "${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session"
31
+ src
32
+ )
33
+
34
+ add_compile_options(
35
+ /Qspectre
36
+ /ZH:SHA_256
37
+ /guard:cf
38
+ /W3
39
+ /Zi
40
+ /Zc:__cplusplus
41
+ )
42
+ add_link_options(
43
+ /CETCOMPAT
44
+ /DEBUG
45
+ )
46
+
47
+ link_directories("${ONNXRUNTIME_ROOTDIR}/lib")
48
+ link_directories("${CMAKE_INSTALL_PREFIX}/lib")
49
+
50
+ add_executable(${PROJECT_NAME} src/main.cpp src/npu_util.cpp)
51
+
52
+ target_link_libraries(${PROJECT_NAME} onnxruntime)
53
+
54
+
55
+ # -- Copy the RyzenAI runtime DLLs in folder containing the executable
56
+
57
+ list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/voe/dyn_dispatch_core.dll")
58
+ list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/voe/onnxruntime.dll")
59
+ list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/voe/onnxruntime_providers_shared.dll")
60
+ list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/voe/onnxruntime_providers_vitisai.dll")
61
+ list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/voe/onnxruntime_vitisai_ep.dll")
62
+ list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/voe/transaction.dll")
63
+ list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/voe/xclbin.dll")
64
+
65
+ foreach(DLL_FILE ${dll_list})
66
+ add_custom_command(
67
+ TARGET ${PROJECT_NAME} POST_BUILD
68
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $<TARGET_FILE_DIR:${PROJECT_NAME}>
69
+ )
70
+ endforeach()
71
+
72
+ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different
73
+ "${RYZEN_AI_INSTALLATION_PATH}/quicktest/test_model.onnx" $<TARGET_FILE_DIR:${PROJECT_NAME}>
74
+ )
75
+ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different
76
+ "${RYZEN_AI_INSTALLATION_PATH}/voe-4.0-win_amd64/xclbins/phoenix/1x4.xclbin"
77
+ $<TARGET_FILE_DIR:${PROJECT_NAME}>/xclbins/phoenix/1x4.xclbin
78
+ )
79
+ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different
80
+ "${RYZEN_AI_INSTALLATION_PATH}/voe-4.0-win_amd64/xclbins/strix/AMD_AIE2P_Nx4_Overlay.xclbin"
81
+ $<TARGET_FILE_DIR:${PROJECT_NAME}>/xclbins/strix/AMD_AIE2P_Nx4_Overlay.xclbin
82
+ )
83
+
84
+
85
+
86
+
test_cpp/README.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Simple ONNX RT C++ Example for the NPU
2
+
3
+ A simple C++ program which runs an ONNX model on the CPU and then on the NPU (if the system configuration allows it).
4
+
5
+ ## Requirements
6
+
7
+ To build the example, the following software must be installed:
8
+ - Ryzen AI 1.4
9
+ - Cmake
10
+ - Visual Studio 2022
11
+
12
+ ## Instructions
13
+
14
+ - Build the example
15
+
16
+ ```
17
+ conda activate <RAI env>
18
+ compile.bat
19
+ ```
20
+
21
+ - Run the example
22
+
23
+ ```
24
+ run.bat
25
+ ```
26
+
27
+ Note: it is not necessary to run the example from within the RyzenAI conda environment.
28
+
29
+ ## Notes
30
+
31
+ - The CMakeLists.txt copies all the required files (DLLs, XCLBINs) from the RyzenAI installation tree into the folder where the executable is created. With this approach, there is no runtime dependency on the RyzenAI installation tree. Once built, the example can be run on machines without RyzenAI installed.
32
+
33
+ - This example shows that it is possible to run without any dependency on Python.
34
+
35
+ - The example uses the test_model.onnx model from the quicktest folder.
36
+
37
+ - Like the Python quicktest, this example runs the model with random data and doesn't test the output results.
38
+
39
+ - The application uses the VAI-EP session options to specify the XCLBIN, and it explicitly unsets the XLNX_VART_FIRMWARE and XLNX_TARGET_NAME env vars.
40
+
41
+ - The example uses the helper functions in `npu_util.cpp` to check whether VAI-EP 1.4 can be used on the deployment machine, and to determine the type of NPU present (PHX/HPT or STX/KRK).
42
+
43
+ - The test program only supports models with exactly 1 input node and 1 output node. But the code can easily be extended to support an arbitrary number of nodes if need be.
44
+
45
+
46
+ ## Sample Output
47
+
48
+ Sample output of a successful run:
49
+
50
+ ```
51
+ >run.bat
52
+
53
+ -------------------------------------------------------
54
+ Running quicktest on CPU
55
+ -------------------------------------------------------
56
+ Creating ORT env
57
+ Initializing session options
58
+ Creating ONNX Session
59
+ ONNX model : c:\temp\quicktest_cpp\build\Release\test_model.onnx
60
+ input -1x3x32x32
61
+ output -1x10
62
+ Dynamic batch size detected. Setting batch size to 1.
63
+ Running the model
64
+ -------------------------------------------------------
65
+ Test PASSED!
66
+ -------------------------------------------------------
67
+
68
+ -------------------------------------------------------
69
+ Performing compatibility check for VitisAI EP 1.4
70
+ -------------------------------------------------------
71
+ - NPU Device ID : 0x1502
72
+ - NPU Device Name : NPU Compute Accelerator Device
73
+ - NPU Driver Version: 32.0.203.252
74
+ Environment compatible for VitisAI EP
75
+
76
+ -------------------------------------------------------
77
+ Running quicktest on NPU
78
+ -------------------------------------------------------
79
+ Creating ORT env
80
+ Initializing session options
81
+ Configuring VAI EP
82
+ Creating ONNX Session
83
+ WARNING: Logging before InitGoogleLogging() is written to STDERR
84
+ I20250221 12:48:40.532923 76424 vitisai_compile_model.cpp:1046] Vitis AI EP Load ONNX Model Success
85
+ I20250221 12:48:40.532923 76424 vitisai_compile_model.cpp:1047] Graph Input Node Name/Shape (1)
86
+ I20250221 12:48:40.532923 76424 vitisai_compile_model.cpp:1051] input : [-1x3x32x32]
87
+ I20250221 12:48:40.532923 76424 vitisai_compile_model.cpp:1057] Graph Output Node Name/Shape (1)
88
+ I20250221 12:48:40.532923 76424 vitisai_compile_model.cpp:1061] output : [-1x10]
89
+ [Vitis AI EP] No. of Operators : CPU 2 NPU 18e
90
+ [Vitis AI EP] No. of Subgraphs : CPU 1 NPU 1 Actually running on NPU 1
91
+ ONNX model : C:\temp\quicktest_cpp\build\Release\test_model.onnx
92
+ input -1x3x32x32
93
+ output -1x10
94
+ Dynamic batch size detected. Setting batch size to 1.
95
+ Running the model
96
+ -------------------------------------------------------
97
+ Test PASSED!
98
+ -------------------------------------------------------
99
+ ```
test_cpp/compile.bat ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ if "%CONDA_PREFIX%" == "" echo CONDA_PREFIX not set. This script must be executed from within the RyzenAI conda environment. & goto :error
4
+ if "%RYZEN_AI_INSTALLATION_PATH%" == "" echo RYZEN_AI_INSTALLATION_PATH not set. This script requires the RYZEN_AI_INSTALLATION_PATH env var to be set to the RyzenAI installation folder. & goto :error
5
+
6
+ cmake -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -B build -S . -G "Visual Studio 17 2022"
7
+
8
+ cmake --build .\build --config Release --target ALL_BUILD
9
+
10
+ echo.
11
+
12
+ :error
13
+ exit /b %errorlevel%
test_cpp/run.bat ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ build\Release\app.exe
4
+
5
+ echo.
6
+
test_cpp/src/main.cpp ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************************
2
+ MIT License
3
+
4
+ Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7
+
8
+ The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11
+ ************************************************************************************/
12
+ #include <assert.h>
13
+ #include <onnxruntime_cxx_api.h>
14
+
15
+ #include <iostream>
16
+ #include <sstream>
17
+ #include <vector>
18
+ #include <codecvt>
19
+ #include <filesystem>
20
+
21
+ #include "npu_util.h"
22
+
23
+
24
+ static int get_num_elements(const std::vector<int64_t>& v) {
25
+ int total = 1;
26
+ for (auto& i : v)
27
+ total *= (int)i;
28
+ return total;
29
+ }
30
+
31
+ template <typename T>
32
+ std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
33
+ {
34
+ os << "[";
35
+ for (int i = 0; i < v.size(); ++i)
36
+ {
37
+ os << v[i];
38
+ if (i != v.size() - 1)
39
+ {
40
+ os << ", ";
41
+ }
42
+ }
43
+ os << "]";
44
+ return os;
45
+ }
46
+
47
+ // pretty prints a shape dimension vector
48
+ static std::string print_shape(const std::vector<int64_t>& v) {
49
+ std::stringstream ss("");
50
+ for (size_t i = 0; i < v.size() - 1; i++)
51
+ ss << v[i] << "x";
52
+ ss << v[v.size() - 1];
53
+ return ss.str();
54
+ }
55
+
56
+ static std::string print_tensor(Ort::Value& tensor) {
57
+ auto shape = tensor.GetTensorTypeAndShapeInfo().GetShape();
58
+ auto nelem = get_num_elements(shape);
59
+ auto tensor_ptr = tensor.GetTensorMutableData<float>();
60
+
61
+ std::stringstream ss("");
62
+ for (auto i = 0; i < nelem; i++)
63
+ ss << tensor_ptr[i] << " ";
64
+ return ss.str();
65
+ }
66
+
67
+ template <typename T>
68
+ Ort::Value vec_to_tensor(std::vector<T>& data, const std::vector<std::int64_t>& shape) {
69
+ Ort::MemoryInfo mem_info =
70
+ Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
71
+ auto tensor = Ort::Value::CreateTensor<T>(mem_info, data.data(), data.size(), shape.data(), shape.size());
72
+ return tensor;
73
+ }
74
+
75
+ std::string get_program_dir()
76
+ {
77
+ char* exe_path; _get_pgmptr(&exe_path); // full path and name of the executable
78
+ return std::filesystem::path(exe_path).parent_path().string(); // directory in which the executable is located
79
+ }
80
+
81
+
82
+ int runtest(std::string& model_name, std::unordered_map<std::string, std::string>& vai_ep_options)
83
+ {
84
+ int64_t batch_size = 1;
85
+
86
+ printf("Creating ORT env\n");
87
+ Ort::Env env(ORT_LOGGING_LEVEL_ERROR, "quicktest");
88
+
89
+ printf("Initializing session options\n");
90
+ auto session_options = Ort::SessionOptions();
91
+
92
+ if (vai_ep_options.empty()==false) // If VAI EP options are provided, initialize the VitisAI EP
93
+ {
94
+ printf("Configuring VAI EP\n");
95
+ try {
96
+ session_options.AppendExecutionProvider_VitisAI(vai_ep_options);
97
+ }
98
+ catch (const std::exception& e) {
99
+ std::cerr << "Exception occurred in appending execution provider: " << e.what() << std::endl;
100
+ }
101
+ }
102
+
103
+ printf("Creating ONNX Session\n");
104
+ auto session = Ort::Session(env, std::basic_string<ORTCHAR_T>(model_name.begin(), model_name.end()).c_str(), session_options);
105
+
106
+ // Get names and shapes of model inputs and outputs
107
+ Ort::AllocatorWithDefaultOptions allocator;
108
+ auto input_count = session.GetInputCount();
109
+ auto input_names = std::vector<std::string>();
110
+ auto input_names_char = std::vector<const char*>();
111
+ auto input_shapes = std::vector<std::vector<int64_t>>();
112
+ auto output_count = session.GetOutputCount();
113
+ auto output_names = std::vector<std::string>();
114
+ auto output_names_char = std::vector<const char*>();
115
+ auto output_shapes = std::vector<std::vector<int64_t>>();
116
+ for (size_t i = 0; i < input_count; i++)
117
+ {
118
+ auto shape = session.GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
119
+ std::string name = session.GetInputNameAllocated(i, allocator).get();
120
+ input_names.emplace_back(name);
121
+ input_names_char.emplace_back(input_names.at(i).c_str());
122
+ input_shapes.emplace_back(shape);
123
+ }
124
+ for (size_t i = 0; i < output_count; i++)
125
+ {
126
+ auto shape = session.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
127
+ std::string name = session.GetOutputNameAllocated(i, allocator).get();
128
+ output_names.emplace_back(name);
129
+ output_names_char.emplace_back(output_names.at(i).c_str());
130
+ output_shapes.emplace_back(shape);
131
+ }
132
+
133
+ // Display model info
134
+ std::cout << "ONNX model : " << model_name << std::endl;
135
+ for (size_t i = 0; i < input_count; i++)
136
+ std::cout << " " << input_names.at(i) << " " << print_shape(input_shapes.at(i)) << std::endl;
137
+ for (size_t i = 0; i < output_count; i++)
138
+ std::cout << " " << output_names.at(i) << " " << print_shape(output_shapes.at(i)) << std::endl;
139
+
140
+ // The code which follows expects the model to have 1 input node and 1 output node.
141
+ if (output_count != 1 && input_count != 1) {
142
+ std::cout << "This version of the program only supports models with 1 input node and 1 output node. Exiting." << std::endl;
143
+ exit(-1);
144
+ }
145
+
146
+ // If input shape has dynamic batch size, set it to a fixed value
147
+ auto input_shape = input_shapes[0];
148
+ if (input_shape[0] < 0) {
149
+ std::cout << "Dynamic batch size detected. Setting batch size to " << batch_size << "." << std::endl;
150
+ input_shape[0] = batch_size;
151
+ }
152
+
153
+ printf("Running the model\n");
154
+ for (int i = 0; i < 1; i++)
155
+ {
156
+ // Initialize input data with random numbers in the range [0, 255]
157
+ std::vector<float> input_tensor_values(get_num_elements(input_shape));
158
+ std::generate(input_tensor_values.begin(), input_tensor_values.end(), [&] { return (float)(rand() % 255); });
159
+
160
+ // Initialize input tensor with input data
161
+ std::vector<Ort::Value> input_tensors;
162
+ input_tensors.emplace_back(vec_to_tensor<float>(input_tensor_values, input_shape));
163
+
164
+ // Pass input tensors through model
165
+ try {
166
+ auto output_tensors = session.Run(
167
+ Ort::RunOptions(),
168
+ input_names_char.data(), input_tensors.data(), input_names_char.size(),
169
+ output_names_char.data(), output_names_char.size()
170
+ );
171
+ // std::cout << i << " : " << print_tensor(output_tensors[0]) << std::endl;
172
+ }
173
+ catch (const Ort::Exception& exception) {
174
+ std::cout << "ERROR running model inference: " << exception.what() << std::endl;
175
+ exit(-1);
176
+ }
177
+ }
178
+ printf("-------------------------------------------------------\n");
179
+ printf("Test PASSED!\n");
180
+ printf("-------------------------------------------------------\n");
181
+ printf("\n");
182
+
183
+ return 0;
184
+ }
185
+
186
+
187
+ int run_on_cpu(std::string& model_name, std::string& exe_dir)
188
+ {
189
+ // Leave VitisAI EP options empty to run on CPU
190
+ std::unordered_map<std::string, std::string> vai_ep_options;
191
+
192
+ // Full path to the ONNX model
193
+ std::string model_path = exe_dir + "\\" + model_name;
194
+
195
+ // Run test
196
+ printf("-------------------------------------------------------\n");
197
+ printf("Running quicktest on CPU \n");
198
+ printf("-------------------------------------------------------\n");
199
+ return runtest(model_path, vai_ep_options);
200
+ }
201
+
202
+ int run_on_npu(std::string& model_name, std::string& exe_dir)
203
+ {
204
+ printf("-------------------------------------------------------\n");
205
+ printf("Performing compatibility check for VitisAI EP 1.4 \n");
206
+ printf("-------------------------------------------------------\n");
207
+ auto npu_info = npu_util::checkCompatibility_RAI_1_4();
208
+
209
+ std::cout << " - NPU Device ID : 0x" << std::hex << npu_info.device_id << std::dec << std::endl;
210
+ std::cout << " - NPU Device Name : " << npu_info.device_name << std::endl;
211
+ std::cout << " - NPU Driver Version: " << npu_info.driver_version_string << std::endl;
212
+ switch (npu_info.check) {
213
+ case npu_util::Status::OK:
214
+ std::cout << "Environment compatible for VitisAI EP" << std::endl;
215
+ break;
216
+ case npu_util::Status::NPU_UNRECOGNIZED:
217
+ std::cout << "NPU type not recognized." << std::endl;
218
+ std::cout << "Skipping run with VitisAI EP." << std::endl;
219
+ return -1;
220
+ break;
221
+ case npu_util::Status::DRIVER_TOO_OLD:
222
+ std::cout << "Installed drivers are too old." << std::endl;
223
+ std::cout << "Skipping run with VitisAI EP." << std::endl;
224
+ return -1;
225
+ break;
226
+ case npu_util::Status::EP_TOO_OLD:
227
+ std::cout << "VitisAI EP is too old." << std::endl;
228
+ std::cout << "Skipping run with VitisAI EP." << std::endl;
229
+ return -1;
230
+ break;
231
+ default:
232
+ std::cout << "Unknown state." << std::endl;
233
+ std::cout << "Skipping run with VitisAI EP." << std::endl;
234
+ return -1;
235
+ break;
236
+ }
237
+ std::cout << std::endl;
238
+
239
+ // Set VitisAI EP options
240
+ std::unordered_map<std::string, std::string> vai_ep_options;
241
+ switch(npu_info.device_id) {
242
+ case 0x1502: // PHX/HPT NPU
243
+ vai_ep_options["cacheDir"] = exe_dir + "\\modelcache";
244
+ vai_ep_options["cacheKey"] = "testmodel_phx";
245
+ vai_ep_options["xclbin"] = exe_dir + "\\xclbins\\phoenix\\1x4.xclbin";;
246
+ break;
247
+ case 0x17F0: // STX/KRK NPU
248
+ vai_ep_options["cacheDir"] = exe_dir + "\\modelcache";
249
+ vai_ep_options["cacheKey"] = "testmodel_stx";
250
+ vai_ep_options["xclbin"] = exe_dir + "\\xclbins\\strix\\AMD_AIE2P_Nx4_Overlay.xclbin";
251
+ break;
252
+ default:
253
+ std::cout << "Unsupported NPU device ID." << std::endl;
254
+ return -1;
255
+ break;
256
+ }
257
+
258
+ // Set environment variables
259
+ _putenv("XLNX_VART_FIRMWARE="); // Unset XLNX_VART_FIRMWARE (use VAI-EP option to set XCLBIN)
260
+ _putenv("XLNX_TARGET_NAME="); // Unset XLNX_TARGET_NAME (rely on default value: AMD_AIE2P_Nx4_Overlay)
261
+
262
+ // Full path to the ONNX model
263
+ std::string model_path = exe_dir + "\\" + model_name;
264
+
265
+ // Run test
266
+ printf("-------------------------------------------------------\n");
267
+ printf("Running quicktest on NPU \n");
268
+ printf("-------------------------------------------------------\n");
269
+ return runtest(model_path, vai_ep_options);
270
+ }
271
+
272
+ int main(int argc, char* argv[])
273
+ {
274
+ std::string exe_dir = get_program_dir();
275
+ std::string model_name ="test_model.onnx";
276
+
277
+ run_on_cpu(model_name, exe_dir);
278
+ run_on_npu(model_name, exe_dir);
279
+
280
+ return 0;
281
+ }
test_cpp/src/npu_util.cpp ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************************
2
+ MIT License
3
+
4
+ Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
7
+ software and associated documentation files (the "Software"), to deal in the Software
8
+ without restriction, including without limitation the rights to use, copy, modify,
9
+ merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
10
+ persons to whom the Software is furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice (including the next paragraph) shall
13
+ be included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16
+ INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17
+ PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
18
+ FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
+ OTHER DEALINGS IN THE SOFTWARE.
21
+ ************************************************************************************/
22
+
23
+ // compile using: /std:c++latest
24
+
25
+ #pragma comment(lib, "setupapi.lib")
26
+
27
+ #include <chrono>
28
+ #include <vector>
29
+ #include <mutex>
30
+
31
+ #include <Windows.h>
32
+ #include <setupapi.h>
33
+ #include <devguid.h>
34
+
35
+ #include "npu_util.h"
36
+
37
+
38
+ namespace npu_util {
39
+
40
+ std::string DriverHexToString(DWORDLONG ver) {
41
+ std::stringstream string_stream;
42
+ string_stream << ((ver >> 48) & 0xffff) << "." << ((ver >> 32) & 0xffff) << "." << ((ver >> 16) & 0xffff) << "." << ((ver >> 0) & 0xffff);
43
+ return string_stream.str();
44
+ }
45
+
46
+ DWORDLONG DriverNumberToHex(DWORDLONG a, DWORDLONG b, DWORDLONG c, DWORDLONG d) {
47
+ DWORDLONG ver = ((a & 0xffff) << 48) | ((b & 0xffff) << 32) | ((c & 0xffff) << 16) | ((d & 0xffff) << 0) ;
48
+ return ver;
49
+ }
50
+
51
+ // Extract NPU information
52
+ NPUInfo extractNPUInfo()
53
+ {
54
+ // Make extractNPUInfo thread-safe
55
+ static std::mutex function_mutex;
56
+ std::lock_guard<std::mutex> guard(function_mutex);
57
+
58
+ NPUInfo npu_info;
59
+ npu_info.device_id = -1;
60
+ npu_info.device_name = "";
61
+ npu_info.driver_version_number = -1;
62
+ npu_info.driver_version_string = "";
63
+ npu_info.check = Status::UNKNOWN;
64
+
65
+ static const std::vector<std::pair<std::string, int>> PCI_IDS = {
66
+ { "PCI\\VEN_1022&DEV_1502", 0x1502 }, // AIE2
67
+ { "PCI\\VEN_1022&DEV_17F0", 0x17F0 } // AIE2P
68
+ };
69
+
70
+ static const std::vector<const GUID*> DEV_CLASSES = {
71
+ &GUID_DEVCLASS_COMPUTEACCELERATOR,
72
+ &GUID_DEVCLASS_SYSTEM
73
+ };
74
+
75
+ for (const auto& devClass : DEV_CLASSES) {
76
+ HDEVINFO deviceInfoSet = SetupDiGetClassDevs(devClass, nullptr, nullptr, DIGCF_PRESENT);
77
+ if (deviceInfoSet == INVALID_HANDLE_VALUE) {
78
+ continue;
79
+ }
80
+
81
+ SP_DEVINFO_DATA deviceInfoData = { 0 };
82
+ deviceInfoData.cbSize = sizeof(deviceInfoData);
83
+
84
+ DWORD index = 0;
85
+ while (npu_info.device_id == -1 && SetupDiEnumDeviceInfo(deviceInfoSet, index, &deviceInfoData)) {
86
+ DWORD requiredSize = 0;
87
+
88
+ SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_HARDWAREID, nullptr, nullptr, 0, &requiredSize);
89
+
90
+ std::vector<BYTE> buffer(requiredSize);
91
+
92
+ if (SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_HARDWAREID, nullptr, buffer.data(), requiredSize, nullptr)) {
93
+ std::string hardwareId(reinterpret_cast<const char*>(buffer.data()));
94
+
95
+ for (const auto& entry : PCI_IDS) {
96
+ if (hardwareId.find(entry.first) != std::string::npos) {
97
+ npu_info.device_id = entry.second;
98
+ requiredSize = 0;
99
+ SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_DEVICEDESC, nullptr, nullptr, 0, &requiredSize);
100
+
101
+ buffer.resize(requiredSize);
102
+ if (SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_DEVICEDESC, nullptr, buffer.data(), requiredSize, nullptr)) {
103
+ std::string dev_desc(reinterpret_cast<const char*>(buffer.data()));
104
+ npu_info.device_name = dev_desc;
105
+ }
106
+ SP_DEVINSTALL_PARAMS DeviceInstallParams;
107
+ ZeroMemory(&DeviceInstallParams, sizeof(DeviceInstallParams));
108
+ DeviceInstallParams.cbSize = sizeof(SP_DEVINSTALL_PARAMS);
109
+ DeviceInstallParams.FlagsEx |= (DI_FLAGSEX_INSTALLEDDRIVER | DI_FLAGSEX_ALLOWEXCLUDEDDRVS);
110
+ if (SetupDiSetDeviceInstallParams(deviceInfoSet, &deviceInfoData, &DeviceInstallParams)) {
111
+ if (SetupDiBuildDriverInfoList(deviceInfoSet, &deviceInfoData, SPDIT_COMPATDRIVER)) {
112
+ SP_DRVINFO_DATA DriverInfoData;
113
+ DriverInfoData.cbSize = sizeof(SP_DRVINFO_DATA);
114
+ if (SetupDiEnumDriverInfo(deviceInfoSet, &deviceInfoData, SPDIT_COMPATDRIVER, 0, &DriverInfoData)) {
115
+ npu_info.driver_version_number = DriverInfoData.DriverVersion;
116
+ npu_info.driver_version_string = DriverHexToString(DriverInfoData.DriverVersion).c_str();
117
+ }
118
+ }
119
+ SetupDiDestroyDriverInfoList(deviceInfoSet, &deviceInfoData, SPDIT_COMPATDRIVER);
120
+ break;
121
+ }
122
+ }
123
+ }
124
+ }
125
+
126
+ ++index;
127
+ }
128
+
129
+ SetupDiDestroyDeviceInfoList(deviceInfoSet);
130
+
131
+ if (npu_info.device_id != -1) {
132
+ break;
133
+ }
134
+ }
135
+ return npu_info;
136
+ }
137
+
138
+ NPUInfo checkCompatibility(DWORDLONG min_driver_version, std::chrono::year_month_day max_date)
139
+ {
140
+ NPUInfo info = extractNPUInfo();
141
+
142
+ // Check if supported NPU is present
143
+ if (info.device_id==-1) {
144
+ info.check = Status::NPU_UNRECOGNIZED;
145
+ return info;
146
+ }
147
+
148
+ // Check if minimum version of driver is installed
149
+ if (info.driver_version_number<min_driver_version) {
150
+ info.check = Status::DRIVER_TOO_OLD;
151
+ return info;
152
+ }
153
+
154
+ // Check for 3 yr EP/driver compatibility window
155
+ std::chrono::year_month_day current_date{std::chrono::floor<std::chrono::days>(std::chrono::system_clock::now())};;
156
+ if (current_date>max_date) {
157
+ info.check = Status::EP_TOO_OLD;
158
+ return info;
159
+ }
160
+
161
+ info.check = Status::OK;
162
+ return info;
163
+ }
164
+
165
+ NPUInfo checkCompatibility_RAI_1_2()
166
+ {
167
+ // Min driver: 32.0.201.204
168
+ // Max date : 2027-07-30 (3 yrs after the release date of RyzenAI 1.2)
169
+ return checkCompatibility(DriverNumberToHex(32,0,201,204), { std::chrono::July / 30 / 2027 });
170
+ }
171
+
172
+ NPUInfo checkCompatibility_RAI_1_3()
173
+ {
174
+ // Min driver: 32.0.203.237
175
+ // Max date : 2027-11-26 (3 yrs after the release date of RyzenAI 1.3)
176
+ return checkCompatibility(DriverNumberToHex(32,0,203,237), { std::chrono::November / 26 / 2027 });
177
+ }
178
+
179
+ NPUInfo checkCompatibility_RAI_1_3_1()
180
+ {
181
+ // Min driver: 32.0.203.242
182
+ // Max date : 2028-01-17 (3 yrs after the release date of RyzenAI 1.3)
183
+ return checkCompatibility(DriverNumberToHex(32,0,203,242), { std::chrono::January / 15 / 2028 });
184
+ }
185
+
186
+ NPUInfo checkCompatibility_RAI_1_4()
187
+ {
188
+ // Min driver: 32.0.203.257 (May change before the release)
189
+ // Max date : 2028-03-25 (Will change before the release)
190
+ return checkCompatibility(DriverNumberToHex(32,0,203,257), { std::chrono::March / 25 / 2028 });
191
+ }
192
+
193
+ } // npu_util
test_cpp/src/npu_util.h ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************************
2
+ MIT License
3
+
4
+ Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
7
+ software and associated documentation files (the "Software"), to deal in the Software
8
+ without restriction, including without limitation the rights to use, copy, modify,
9
+ merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
10
+ persons to whom the Software is furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice (including the next paragraph) shall
13
+ be included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16
+ INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17
+ PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
18
+ FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
+ OTHER DEALINGS IN THE SOFTWARE.
21
+ ************************************************************************************/
22
+
23
+ #pragma once
24
+
25
+ #include <string>
26
+
27
+ #include <Windows.h>
28
+
29
+
30
+ namespace npu_util {
31
+
32
+ enum Status {
33
+ OK = 0,
34
+ UNKNOWN,
35
+ NPU_UNRECOGNIZED,
36
+ DRIVER_TOO_OLD,
37
+ EP_TOO_OLD
38
+ };
39
+
40
+ struct NPUInfo {
41
+ int device_id;
42
+ std::string device_name;
43
+ DWORDLONG driver_version_number;
44
+ std::string driver_version_string;
45
+ Status check;
46
+ };
47
+
48
+ // Checks whether the system configuration is compatible for VitisAI EP 1.2
49
+ NPUInfo checkCompatibility_RAI_1_2();
50
+
51
+ // Checks whether the system configuration is compatible for VitisAI EP 1.3
52
+ NPUInfo checkCompatibility_RAI_1_3();
53
+
54
+ // Checks whether the system configuration is compatible for VitisAI EP 1.3.1
55
+ NPUInfo checkCompatibility_RAI_1_3_1();
56
+
57
+ // Checks whether the system configuration is compatible for VitisAI EP 1.4
58
+ NPUInfo checkCompatibility_RAI_1_4();
59
+
60
+ } // npu_util
61
+