qc903113684 commited on 16 days ago

Commit

806da53

verified ·

1 Parent(s): e2f17fd

Upload 32 files

Browse files

Files changed (33) hide show

.gitattributes +12 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/README.md +58 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp/CMakeLists.txt +30 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp/main.cpp +370 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp/test.png +3 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem +3 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/python/run_test.py +147 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/python/test.png +3 -0
model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/python/utils.py +160 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/README.md +58 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp/CMakeLists.txt +30 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp/main.cpp +370 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp/test.png +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/models/cutoff_yolov6m_fp16.qnn223.ctx.bin.aidem +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/python/run_test.py +147 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/python/test.png +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/python/utils.py +160 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/README.md +58 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp/CMakeLists.txt +30 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp/main.cpp +370 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp/test.png +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/python/run_test.py +147 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/python/test.png +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/python/utils.py +160 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/README.md +58 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp/CMakeLists.txt +30 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp/main.cpp +370 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp/test.png +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/models/cutoff_yolov6m_w8a16.qnn223.ctx.bin.aidem +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/python/run_test.py +147 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/python/test.png +3 -0
model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/python/utils.py +160 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp/test.png filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/python/test.png filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp/test.png filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/models/cutoff_yolov6m_fp16.qnn223.ctx.bin.aidem filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/python/test.png filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp/test.png filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/python/test.png filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp/test.png filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/models/cutoff_yolov6m_w8a16.qnn223.ctx.bin.aidem filter=lfs diff=lfs merge=lfs -text
+model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/python/test.png filter=lfs diff=lfs merge=lfs -text

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+## Model Information
+### Source model
+- Input shape: 1x3x640x640
+- Number of parameters: 33.24M
+- Model size: 133.20MB
+- Output shape: 1x8400x85
+Source model repository: [yolov6](https://github.com/meituan/YOLOv6/tree/main)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.23
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd yolov6m/model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite
+python3  python/run_test.py --target_model ./models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem --imgs ./python/test.png  --invoke_nums 10
+```
+#### cpp
+```bash
+cd yolov6m/model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp
+mkdir build && cd build
+cmake .. && make
+./run_test --target_model ../../models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem --imgs ../test.png  --invoke_nums 10
+```

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+)

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp/main.cpp ADDED Viewed

	@@ -0,0 +1,370 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+const float INPUT_WIDTH = 640.0;
+const float INPUT_HEIGHT = 640.0;
+const float SCORE_THRESHOLD = 0.25;
+const float NMS_THRESHOLD = 0.45;
+const float CONFIDENCE_THRESHOLD = 0.25;
+const uint32_t size = 640;
+const uint32_t out_size = 8400;
+const int FONT_FACE = cv::FONT_HERSHEY_SIMPLEX;
+cv::Scalar WHITE = cv::Scalar(255,255,255);
+const float FONT_SCALE = 1;
+const int THICKNESS = 2;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem";
+    std::string imgs = "../test.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+void concatenate(float* qnn_trans_data, float* qnn_mul_data, int batch, int num_elements, int trans_dim, int mul_dim, std::vector<float>& output) {
+    int out_dim = trans_dim + mul_dim + 1;
+    output.resize(batch * num_elements * out_dim);
+    for (int i = 0; i < batch * num_elements; ++i) {
+        std::memcpy(&output[i * out_dim], &qnn_mul_data[i * mul_dim], mul_dim * sizeof(float));
+        float max_val = *std::max_element(&qnn_trans_data[i * trans_dim], &qnn_trans_data[i * trans_dim + trans_dim]);
+        output[i * out_dim + 4] = max_val;
+        std::memcpy(&output[i * out_dim + 5], &qnn_trans_data[i * trans_dim], trans_dim * sizeof(float));
+    }
+}
+cv::Mat letterbox(cv::Mat im, cv::Size new_shape = cv::Size(640, 640),
+                  cv::Scalar color = cv::Scalar(114, 114, 114),
+                  bool auto_pad = true, bool scaleup = true, int stride = 32) {
+    // current shape [height, width]
+    cv::Size shape = im.size();
+    int height = shape.height;
+    int width = shape.width;
+    if (new_shape.width == 0) {
+        new_shape = cv::Size(new_shape.height, new_shape.height);
+    }
+    // Scale ratio (new / old)
+    float r = std::min((float)new_shape.height / height, (float)new_shape.width / width);
+    if (!scaleup) {
+        // only scale down, do not scale up (for better val mAP)
+        r = std::min(r, 1.0f);
+    }
+    // Compute padding
+    cv::Size new_unpad(round(width * r), round(height * r));
+    int dw = new_shape.width - new_unpad.width;
+    int dh = new_shape.height - new_unpad.height;
+    // minimum rectangle
+    if (auto_pad) {
+        dw = dw % stride;
+        dh = dh % stride;
+    }
+    dw /= 2;  // divide padding into 2 sides
+    dh /= 2;
+    // resize
+    if (cv::Size(width, height) != new_unpad) {
+        cv::resize(im, im, new_unpad, 0, 0, cv::INTER_LINEAR);
+    }
+    int top = round(dh - 0.1);
+    int bottom = round(dh + 0.1);
+    int left = round(dw - 0.1);
+    int right = round(dw + 0.1);
+    cv::copyMakeBorder(im, im, top, bottom, left, right, cv::BORDER_CONSTANT, color);
+    return im;
+}
+cv::Scalar generate_colors(int i, bool bgr = false) {
+    static const std::vector<std::string> hex_colors = {
+        "FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A",
+        "92CC17", "3DDB86", "1A9334", "00D4BB", "2C99A8", "00C2FF",
+        "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF",
+        "FF95C8", "FF37C7"
+    };
+    int num = hex_colors.size();
+    std::string hex = hex_colors[i % num];
+    int r = std::stoi(hex.substr(0, 2), nullptr, 16);
+    int g = std::stoi(hex.substr(2, 2), nullptr, 16);
+    int b = std::stoi(hex.substr(4, 2), nullptr, 16);
+    if (bgr)
+        return cv::Scalar(b, g, r);
+    else
+        return cv::Scalar(r, g, b);
+}
+void draw_label(cv::Mat& input_image, std::string label, int left, int top, cv::Scalar color)
+{
+    int baseLine;
+    cv::Size label_size = cv::getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS, &baseLine);
+    int y = top - label_size.height - baseLine;
+    if (y < 0) {
+        y = top + label_size.height + baseLine;
+    }
+    cv::Point tlc(left, y);
+    cv::Point brc(left + label_size.width, y + label_size.height + baseLine);
+    rectangle(input_image, tlc, brc, color, cv::FILLED);
+    putText(input_image, label, cv::Point(left, y + label_size.height), FONT_FACE, FONT_SCALE, WHITE, THICKNESS);
+}
+cv::Mat post_process(cv::Mat &input_image, std::vector<float> &outputs, const std::vector<std::string> &class_name)
+{
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    // Resizing factor.
+    float r = std::min(INPUT_WIDTH / (float)input_image.cols, INPUT_HEIGHT / (float)input_image.rows);
+    int new_unpad_w = round(input_image.cols * r);
+    int new_unpad_h = round(input_image.rows * r);
+    int dw = (int)INPUT_WIDTH - new_unpad_w;
+    int dh = (int)INPUT_HEIGHT - new_unpad_h;
+    dw /= 24;
+    dh /= 24;
+    // Iterate through outputs for each box prediction
+    for (int i = 0; i < outputs.size(); i+=85)
+    {
+        float confidence = outputs[i+4];
+        if (confidence >= CONFIDENCE_THRESHOLD)
+        {
+            // Create a 1x80 Mat and store class scores of 80 classes.
+            cv::Mat scores(1, class_name.size(), CV_32FC1, outputs.data() + i + 5);
+            cv::Point class_id;
+            double max_class_score;
+            // For multi-label, check each class score
+            for (int c = 0; c < class_name.size(); c++) {
+                float class_score = scores.at<float>(0, c);
+                // If class score is above threshold, consider this class for the box
+                if (class_score > SCORE_THRESHOLD) {
+                    // Store class ID and confidence in the pre-defined respective vectors.
+                    confidences.push_back(confidence * class_score);  // Multiply with confidence
+                    class_ids.push_back(c);  // class index
+                    // Center and box dimension.
+                    float cx = outputs[i];
+                    float cy = outputs[i+1];
+                    float w = outputs[i+2];
+                    float h = outputs[i+3];
+                    float x0 = (cx - 0.5f * w - dw) / r;
+                    float y0 = (cy - 0.5f * h - dh) / r;
+                    float x1 = (cx + 0.5f * w - dw) / r;
+                    float y1 = (cy + 0.5f * h - dh) / r;
+                    int left = int(x0);
+                    int top = int(y0);
+                    int width = int(x1 - x0);
+                    int height = int(y1 - y0);
+                    // Store good detections in the boxes vector.
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+    }
+    // Perform Non Maximum Suppression and draw predictions.
+    std::vector<int> indices;
+    cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, indices);
+    printf("Detected {%ld} targets.\n", indices.size());
+    // Loop over NMS results and draw bounding boxes
+    for (int i = 0; i < indices.size(); i++)
+    {
+        int idx = indices[i];
+        cv::Rect box = boxes[idx];
+        int left = box.x;
+        int top = box.y;
+        int width = box.width;
+        int height = box.height;
+        cv::Scalar color = generate_colors(class_ids[idx]);
+        // Draw bounding box.
+        rectangle(input_image, cv::Point(left, top), cv::Point(left + width, top + height), color, 3*THICKNESS);
+        // Get the label for the class name and its confidence.
+        std::string label = cv::format("%.2f", confidences[idx]);
+        label = class_name[class_ids[idx]] + ":" + label;
+        // Draw class labels.
+        draw_label(input_image, label, left, top, color);
+    }
+    printf("Processing finished.\n");
+    return input_image;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN223;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1, size, size, 3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1, out_size, 80}, {1, out_size, 4}};
+    model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Size img_size(size, size);
+    cv::Mat img_src = cv::imread(args.imgs);
+    printf("img_src cols: %d, img_src rows: %d\n", img_src.cols, img_src.rows);
+    cv::Mat img_ori = img_src.clone();
+    cv::cvtColor(img_ori, img_ori, cv::COLOR_BGR2RGB);
+    cv::Mat resized_img = letterbox(img_ori, img_size);
+    cv::Mat input_img = cv::Mat::zeros(img_size, CV_32FC3);
+    resized_img.convertTo(resized_img, CV_32FC3, 1.0 / 255.0);
+    resized_img.copyTo(input_img(cv::Rect(0, 0, resized_img.cols, resized_img.rows)));
+    float *qnn_trans_data = nullptr;
+    float *qnn_mul_data = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_img.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+          // 开始计时
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_1 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&qnn_trans_data, &out_data_1);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_2 = 0;
+        result = fast_interpreter->get_output_tensor(1, (void**)&qnn_mul_data, &out_data_2);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 2 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    std::vector<std::string> class_list = {
+        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
+        "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter",
+        "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
+        "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
+        "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
+        "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+        "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
+        "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
+        "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
+        "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
+        "scissors", "teddy bear", "hair drier", "toothbrush"
+    };
+    // post process
+    std::vector<float> qnn_concat;
+    concatenate(qnn_trans_data, qnn_mul_data, 1, out_size, 80, 4, qnn_concat);
+    cv::Mat img = post_process(img_src, qnn_concat, class_list);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/cpp/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b5f70de77d86203135bf45778bb7448565566682bf659a418155d6148b19a13
+size 38488656

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/python/run_test.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os,  torch,  cv2
+import numpy as np
+import time
+import aidlite
+import argparse
+from utils import letterbox,plot_box_and_label,rescale,generate_colors,non_max_suppression
+import torch
+def process_image(path, img_size):
+    img_src = cv2.imread(path)
+    img_src = cv2.cvtColor(img_src,cv2.COLOR_BGR2RGB)
+    image = letterbox(img_src, img_size)[0]
+    new_h,new_w,_=image.shape
+    input_img = np.zeros((img_size[0], img_size[1], 3), np.uint8)
+    input_img[0:new_h, 0:new_w] = image
+    input_img = input_img.astype(np.float32)
+    input_img /= 255  # 0 - 255 to 0.0 - 1.0
+    input_img = np.expand_dims(input_img,0)
+    return image,input_img, img_src
+def main(args):
+    print("Start main ... ...")
+    # aidlite.set_log_level(aidlite.LogLevel.INFO)
+    # aidlite.log_to_stderr()
+    # print(f"Aidlite library version : {aidlite.get_library_version()}")
+    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
+    size=640
+    out_size=8400
+    config = aidlite.Config.create_instance()
+    if config is None:
+        print("Create config failed !")
+        return False
+    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+    if args.model_type.lower()=="qnn":
+        config.framework_type = aidlite.FrameworkType.TYPE_QNN223
+    elif args.model_type.lower()=="snpe2" or args.model_type.lower()=="snpe":
+        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
+    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+    config.is_quantify_model = 1
+    model = aidlite.Model.create_instance(args.target_model)
+    if model is None:
+        print("Create model failed !")
+        return False
+    input_shapes = [[1, size, size, 3]]
+    output_shapes = [[1, out_size,80],[1, out_size,4]]
+    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
+    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
+    if interpreter is None:
+        print("build_interpretper_from_model_and_config failed !")
+        return None
+    result = interpreter.init()
+    if result != 0:
+        print(f"interpreter init failed !")
+        return False
+    result = interpreter.load_model()
+    if result != 0:
+        print("interpreter load model failed !")
+        return False
+    print("detect model load success!")
+    # image process
+    img_size=[size,size]
+    resize_img,input_img, img_src = process_image(args.imgs, img_size)
+    # qnn run
+    invoke_time=[]
+    for i in range(args.invoke_nums):
+        result = interpreter.set_input_tensor(0, input_img.data)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        t1=time.time()
+        result = interpreter.invoke()
+        cost_time = (time.time()-t1)*1000
+        invoke_time.append(cost_time)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        qnn_trans = interpreter.get_output_tensor(0).reshape(1,out_size,80)
+        qnn_mul = interpreter.get_output_tensor(1).reshape(1,out_size,4)
+    result = interpreter.destory()
+    ## time 统计
+    max_invoke_time = max(invoke_time)
+    min_invoke_time = min(invoke_time)
+    mean_invoke_time = sum(invoke_time)/args.invoke_nums
+    var_invoketime=np.var(invoke_time)
+    print("=======================================")
+    print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+    print("=======================================")
+    # 后处理
+    conf_thres =0.25 #@param {type:"number"}
+    iou_thres =0.45 #@param {type:"number"}
+    max_det=  1000#@param {type:"integer"}
+    agnostic_nms= False #@param {type:"boolean"}
+    classes =None
+    hide_labels = False #@param {type:"boolean"}
+    hide_conf= False #@param {type:"boolean"}
+    qnn_conf = np.ones((1,out_size,1))
+    qnn_predict=np.concatenate((qnn_mul,qnn_conf,qnn_trans), axis=2)
+    pred_results =torch.from_numpy(qnn_predict.copy())
+    det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
+    class_names=[ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]
+    img_ori = img_src.copy()
+    print(f"Detected {len(det)} targets.")
+    if len(det):
+        det[:, :4] = rescale(resize_img.shape[:2], det[:, :4], img_src.shape).round()
+        for *xyxy, conf, cls in reversed(det):
+            class_num = int(cls)
+            label = None if hide_labels else (class_names[class_num] if hide_conf else f'{class_names[class_num]} {conf:.2f}')
+            plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=generate_colors(class_num, True))
+    cv2.imwrite("./python/results.png",cv2.cvtColor(img_ori,cv2.COLOR_RGB2BGR))
+def parser_args():
+    parser = argparse.ArgumentParser(description="Run model benchmarks")
+    parser.add_argument('--target_model',type=str,default='./models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem',help="inference model path")
+    parser.add_argument('--imgs',type=str,default='./python/test.png',help="Predict images path")
+    parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
+    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parser_args()
+    main(args)

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/python/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs6490_qnn2.23_int8_aidlite/python/utils.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import cv2
+import numpy as np
+import torch
+import torchvision
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
+    '''Resize and pad image while meeting stride-multiple constraints.'''
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    elif isinstance(new_shape, list) and len(new_shape) == 1:
+       new_shape = (new_shape[0], new_shape[0])
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, r, (left, top)
+def xywh2xyxy(x):
+    '''Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 is top-left, x2y2=bottom-right.'''
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
+    """Runs Non-Maximum Suppression (NMS) on inference results.
+    This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
+    Args:
+        prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
+        conf_thres: (float) confidence threshold.
+        iou_thres: (float) iou threshold.
+        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
+        agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
+        multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
+        max_det:(int), max number of output bboxes.
+    Returns:
+         list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
+    """
+    num_classes = prediction.shape[2] - 5  # number of classes
+    pred_candidates = torch.logical_and(prediction[..., 4] > conf_thres, torch.max(prediction[..., 5:], axis=-1)[0] > conf_thres)  # candidates
+    # Check the parameters.
+    assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
+    assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
+    # Function settings.
+    max_wh = 4096  # maximum box width and height
+    max_nms = 30000  # maximum number of boxes put into torchvision.ops.nms()
+    time_limit = 10.0  # quit the function when nms cost time exceed the limit time.
+    multi_label &= num_classes > 1  # multiple labels per box
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for img_idx, x in enumerate(prediction):  # image index, image inference
+        x = x[pred_candidates[img_idx]]  # confidence
+        # If no box remains, skip the next process.
+        if not x.shape[0]:
+            continue
+        # confidence multiply the objectness
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+        # (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+        # Detections matrix's shape is  (n,6), each row represents (xyxy, conf, cls)
+        if multi_label:
+            box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1)
+        else:  # Only keep the class with highest scores.
+            conf, class_idx = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres]
+        # Filter by class, only keep boxes whose category is in classes.
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+        # Check shape
+        num_box = x.shape[0]  # number of boxes
+        if not num_box:  # no boxes kept.
+            continue
+        elif num_box > max_nms:  # excess max boxes' number.
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # Batched NMS
+        class_offset = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + class_offset, x[:, 4]  # boxes (offset by class), scores
+        keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if keep_box_idx.shape[0] > max_det:  # limit detections
+            keep_box_idx = keep_box_idx[:max_det]
+        output[img_idx] = x[keep_box_idx]
+    return output
+def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), font=cv2.FONT_HERSHEY_COMPLEX):
+    # Add one xyxy box to image with label
+    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+    cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(lw - 1, 1)  # font thickness
+        w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
+        outside = p1[1] - h - 3 >= 0  # label fits outside box
+        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+        cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), font, lw / 3, txt_color,
+                    thickness=tf, lineType=cv2.LINE_AA)
+def generate_colors(i, bgr=False):
+    hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
+            '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
+    palette = []
+    for iter in hex:
+        h = '#' + iter
+        palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)))
+    num = len(palette)
+    color = palette[int(i) % num]
+    return (color[2], color[1], color[0]) if bgr else color
+def rescale(ori_shape, boxes, target_shape):
+    '''Rescale the output to the original image shape'''
+    ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
+    padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
+    boxes[:, [0, 2]] -= padding[0]
+    boxes[:, [1, 3]] -= padding[1]
+    boxes[:, :4] /= ratio
+    boxes[:, 0].clamp_(0, target_shape[1])  # x1
+    boxes[:, 1].clamp_(0, target_shape[0])  # y1
+    boxes[:, 2].clamp_(0, target_shape[1])  # x2
+    boxes[:, 3].clamp_(0, target_shape[0])  # y2
+    return boxes

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+## Model Information
+### Source model
+- Input shape: 1x3x640x640
+- Number of parameters: 33.24M
+- Model size: 133.20MB
+- Output shape: 1x8400x85
+Source model repository: [yolov6](https://github.com/meituan/YOLOv6/tree/main)
+### Converted model
+- Precision: FP16
+- Backend: QNN2.23
+- Target Device: SNM972 QCS8550
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd yolov6m/model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite
+python3  python/run_test.py --target_model ./models/cutoff_yolov6m_fp16.qnn223.ctx.bin.aidem --imgs ./python/test.png  --invoke_nums 10
+```
+#### cpp
+```bash
+cd yolov6m/model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp
+mkdir build && cd build
+cmake .. && make
+./run_test --target_model ../../models/cutoff_yolov6m_fp16.qnn223.ctx.bin.aidem --imgs ../test.png  --invoke_nums 10
+```

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+)

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp/main.cpp ADDED Viewed

	@@ -0,0 +1,370 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+const float INPUT_WIDTH = 640.0;
+const float INPUT_HEIGHT = 640.0;
+const float SCORE_THRESHOLD = 0.25;
+const float NMS_THRESHOLD = 0.45;
+const float CONFIDENCE_THRESHOLD = 0.25;
+const uint32_t size = 640;
+const uint32_t out_size = 8400;
+const int FONT_FACE = cv::FONT_HERSHEY_SIMPLEX;
+cv::Scalar WHITE = cv::Scalar(255,255,255);
+const float FONT_SCALE = 1;
+const int THICKNESS = 2;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/cutoff_yolov6m_fp16.qnn223.ctx.bin.aidem";
+    std::string imgs = "../test.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+void concatenate(float* qnn_trans_data, float* qnn_mul_data, int batch, int num_elements, int trans_dim, int mul_dim, std::vector<float>& output) {
+    int out_dim = trans_dim + mul_dim + 1;
+    output.resize(batch * num_elements * out_dim);
+    for (int i = 0; i < batch * num_elements; ++i) {
+        std::memcpy(&output[i * out_dim], &qnn_mul_data[i * mul_dim], mul_dim * sizeof(float));
+        float max_val = *std::max_element(&qnn_trans_data[i * trans_dim], &qnn_trans_data[i * trans_dim + trans_dim]);
+        output[i * out_dim + 4] = max_val;
+        std::memcpy(&output[i * out_dim + 5], &qnn_trans_data[i * trans_dim], trans_dim * sizeof(float));
+    }
+}
+cv::Mat letterbox(cv::Mat im, cv::Size new_shape = cv::Size(640, 640),
+                  cv::Scalar color = cv::Scalar(114, 114, 114),
+                  bool auto_pad = true, bool scaleup = true, int stride = 32) {
+    // current shape [height, width]
+    cv::Size shape = im.size();
+    int height = shape.height;
+    int width = shape.width;
+    if (new_shape.width == 0) {
+        new_shape = cv::Size(new_shape.height, new_shape.height);
+    }
+    // Scale ratio (new / old)
+    float r = std::min((float)new_shape.height / height, (float)new_shape.width / width);
+    if (!scaleup) {
+        // only scale down, do not scale up (for better val mAP)
+        r = std::min(r, 1.0f);
+    }
+    // Compute padding
+    cv::Size new_unpad(round(width * r), round(height * r));
+    int dw = new_shape.width - new_unpad.width;
+    int dh = new_shape.height - new_unpad.height;
+    // minimum rectangle
+    if (auto_pad) {
+        dw = dw % stride;
+        dh = dh % stride;
+    }
+    dw /= 2;  // divide padding into 2 sides
+    dh /= 2;
+    // resize
+    if (cv::Size(width, height) != new_unpad) {
+        cv::resize(im, im, new_unpad, 0, 0, cv::INTER_LINEAR);
+    }
+    int top = round(dh - 0.1);
+    int bottom = round(dh + 0.1);
+    int left = round(dw - 0.1);
+    int right = round(dw + 0.1);
+    cv::copyMakeBorder(im, im, top, bottom, left, right, cv::BORDER_CONSTANT, color);
+    return im;
+}
+cv::Scalar generate_colors(int i, bool bgr = false) {
+    static const std::vector<std::string> hex_colors = {
+        "FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A",
+        "92CC17", "3DDB86", "1A9334", "00D4BB", "2C99A8", "00C2FF",
+        "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF",
+        "FF95C8", "FF37C7"
+    };
+    int num = hex_colors.size();
+    std::string hex = hex_colors[i % num];
+    int r = std::stoi(hex.substr(0, 2), nullptr, 16);
+    int g = std::stoi(hex.substr(2, 2), nullptr, 16);
+    int b = std::stoi(hex.substr(4, 2), nullptr, 16);
+    if (bgr)
+        return cv::Scalar(b, g, r);
+    else
+        return cv::Scalar(r, g, b);
+}
+void draw_label(cv::Mat& input_image, std::string label, int left, int top, cv::Scalar color)
+{
+    int baseLine;
+    cv::Size label_size = cv::getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS, &baseLine);
+    int y = top - label_size.height - baseLine;
+    if (y < 0) {
+        y = top + label_size.height + baseLine;
+    }
+    cv::Point tlc(left, y);
+    cv::Point brc(left + label_size.width, y + label_size.height + baseLine);
+    rectangle(input_image, tlc, brc, color, cv::FILLED);
+    putText(input_image, label, cv::Point(left, y + label_size.height), FONT_FACE, FONT_SCALE, WHITE, THICKNESS);
+}
+cv::Mat post_process(cv::Mat &input_image, std::vector<float> &outputs, const std::vector<std::string> &class_name)
+{
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    // Resizing factor.
+    float r = std::min(INPUT_WIDTH / (float)input_image.cols, INPUT_HEIGHT / (float)input_image.rows);
+    int new_unpad_w = round(input_image.cols * r);
+    int new_unpad_h = round(input_image.rows * r);
+    int dw = (int)INPUT_WIDTH - new_unpad_w;
+    int dh = (int)INPUT_HEIGHT - new_unpad_h;
+    dw /= 24;
+    dh /= 24;
+    // Iterate through outputs for each box prediction
+    for (int i = 0; i < outputs.size(); i+=85)
+    {
+        float confidence = outputs[i+4];
+        if (confidence >= CONFIDENCE_THRESHOLD)
+        {
+            // Create a 1x80 Mat and store class scores of 80 classes.
+            cv::Mat scores(1, class_name.size(), CV_32FC1, outputs.data() + i + 5);
+            cv::Point class_id;
+            double max_class_score;
+            // For multi-label, check each class score
+            for (int c = 0; c < class_name.size(); c++) {
+                float class_score = scores.at<float>(0, c);
+                // If class score is above threshold, consider this class for the box
+                if (class_score > SCORE_THRESHOLD) {
+                    // Store class ID and confidence in the pre-defined respective vectors.
+                    confidences.push_back(confidence * class_score);  // Multiply with confidence
+                    class_ids.push_back(c);  // class index
+                    // Center and box dimension.
+                    float cx = outputs[i];
+                    float cy = outputs[i+1];
+                    float w = outputs[i+2];
+                    float h = outputs[i+3];
+                    float x0 = (cx - 0.5f * w - dw) / r;
+                    float y0 = (cy - 0.5f * h - dh) / r;
+                    float x1 = (cx + 0.5f * w - dw) / r;
+                    float y1 = (cy + 0.5f * h - dh) / r;
+                    int left = int(x0);
+                    int top = int(y0);
+                    int width = int(x1 - x0);
+                    int height = int(y1 - y0);
+                    // Store good detections in the boxes vector.
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+    }
+    // Perform Non Maximum Suppression and draw predictions.
+    std::vector<int> indices;
+    cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, indices);
+    printf("Detected {%ld} targets.\n", indices.size());
+    // Loop over NMS results and draw bounding boxes
+    for (int i = 0; i < indices.size(); i++)
+    {
+        int idx = indices[i];
+        cv::Rect box = boxes[idx];
+        int left = box.x;
+        int top = box.y;
+        int width = box.width;
+        int height = box.height;
+        cv::Scalar color = generate_colors(class_ids[idx]);
+        // Draw bounding box.
+        rectangle(input_image, cv::Point(left, top), cv::Point(left + width, top + height), color, 3*THICKNESS);
+        // Get the label for the class name and its confidence.
+        std::string label = cv::format("%.2f", confidences[idx]);
+        label = class_name[class_ids[idx]] + ":" + label;
+        // Draw class labels.
+        draw_label(input_image, label, left, top, color);
+    }
+    printf("Processing finished.\n");
+    return input_image;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN223;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1, size, size, 3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1, out_size, 80}, {1, out_size, 4}};
+    model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Size img_size(size, size);
+    cv::Mat img_src = cv::imread(args.imgs);
+    printf("img_src cols: %d, img_src rows: %d\n", img_src.cols, img_src.rows);
+    cv::Mat img_ori = img_src.clone();
+    cv::cvtColor(img_ori, img_ori, cv::COLOR_BGR2RGB);
+    cv::Mat resized_img = letterbox(img_ori, img_size);
+    cv::Mat input_img = cv::Mat::zeros(img_size, CV_32FC3);
+    resized_img.convertTo(resized_img, CV_32FC3, 1.0 / 255.0);
+    resized_img.copyTo(input_img(cv::Rect(0, 0, resized_img.cols, resized_img.rows)));
+    float *qnn_trans_data = nullptr;
+    float *qnn_mul_data = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_img.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+          // 开始计时
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_1 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&qnn_trans_data, &out_data_1);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_2 = 0;
+        result = fast_interpreter->get_output_tensor(1, (void**)&qnn_mul_data, &out_data_2);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 2 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    std::vector<std::string> class_list = {
+        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
+        "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter",
+        "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
+        "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
+        "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
+        "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+        "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
+        "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
+        "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
+        "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
+        "scissors", "teddy bear", "hair drier", "toothbrush"
+    };
+    // post process
+    std::vector<float> qnn_concat;
+    concatenate(qnn_trans_data, qnn_mul_data, 1, out_size, 80, 4, qnn_concat);
+    cv::Mat img = post_process(img_src, qnn_concat, class_list);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/cpp/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/models/cutoff_yolov6m_fp16.qnn223.ctx.bin.aidem ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b687e374d65441b8677cab88cb92a146fdd30b7369d7c186d2c4143be99d37f5
+size 70849520

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/python/run_test.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os,  torch,  cv2
+import numpy as np
+import time
+import aidlite
+import argparse
+from utils import letterbox,plot_box_and_label,rescale,generate_colors,non_max_suppression
+import torch
+def process_image(path, img_size):
+    img_src = cv2.imread(path)
+    img_src = cv2.cvtColor(img_src,cv2.COLOR_BGR2RGB)
+    image = letterbox(img_src, img_size)[0]
+    new_h,new_w,_=image.shape
+    input_img = np.zeros((img_size[0], img_size[1], 3), np.uint8)
+    input_img[0:new_h, 0:new_w] = image
+    input_img = input_img.astype(np.float32)
+    input_img /= 255  # 0 - 255 to 0.0 - 1.0
+    input_img = np.expand_dims(input_img,0)
+    return image,input_img, img_src
+def main(args):
+    print("Start main ... ...")
+    # aidlite.set_log_level(aidlite.LogLevel.INFO)
+    # aidlite.log_to_stderr()
+    # print(f"Aidlite library version : {aidlite.get_library_version()}")
+    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
+    size=640
+    out_size=8400
+    config = aidlite.Config.create_instance()
+    if config is None:
+        print("Create config failed !")
+        return False
+    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+    if args.model_type.lower()=="qnn":
+        config.framework_type = aidlite.FrameworkType.TYPE_QNN223
+    elif args.model_type.lower()=="snpe2" or args.model_type.lower()=="snpe":
+        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
+    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+    config.is_quantify_model = 1
+    model = aidlite.Model.create_instance(args.target_model)
+    if model is None:
+        print("Create model failed !")
+        return False
+    input_shapes = [[1, size, size, 3]]
+    output_shapes = [[1, out_size,80],[1, out_size,4]]
+    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
+    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
+    if interpreter is None:
+        print("build_interpretper_from_model_and_config failed !")
+        return None
+    result = interpreter.init()
+    if result != 0:
+        print(f"interpreter init failed !")
+        return False
+    result = interpreter.load_model()
+    if result != 0:
+        print("interpreter load model failed !")
+        return False
+    print("detect model load success!")
+    # image process
+    img_size=[size,size]
+    resize_img,input_img, img_src = process_image(args.imgs, img_size)
+    # qnn run
+    invoke_time=[]
+    for i in range(args.invoke_nums):
+        result = interpreter.set_input_tensor(0, input_img.data)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        t1=time.time()
+        result = interpreter.invoke()
+        cost_time = (time.time()-t1)*1000
+        invoke_time.append(cost_time)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        qnn_trans = interpreter.get_output_tensor(0).reshape(1,out_size,80)
+        qnn_mul = interpreter.get_output_tensor(1).reshape(1,out_size,4)
+    result = interpreter.destory()
+    ## time 统计
+    max_invoke_time = max(invoke_time)
+    min_invoke_time = min(invoke_time)
+    mean_invoke_time = sum(invoke_time)/args.invoke_nums
+    var_invoketime=np.var(invoke_time)
+    print("=======================================")
+    print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+    print("=======================================")
+    # 后处理
+    conf_thres =0.25 #@param {type:"number"}
+    iou_thres =0.45 #@param {type:"number"}
+    max_det=  1000#@param {type:"integer"}
+    agnostic_nms= False #@param {type:"boolean"}
+    classes =None
+    hide_labels = False #@param {type:"boolean"}
+    hide_conf= False #@param {type:"boolean"}
+    qnn_conf = np.ones((1,out_size,1))
+    qnn_predict=np.concatenate((qnn_mul,qnn_conf,qnn_trans), axis=2)
+    pred_results =torch.from_numpy(qnn_predict.copy())
+    det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
+    class_names=[ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]
+    img_ori = img_src.copy()
+    print(f"Detected {len(det)} targets.")
+    if len(det):
+        det[:, :4] = rescale(resize_img.shape[:2], det[:, :4], img_src.shape).round()
+        for *xyxy, conf, cls in reversed(det):
+            class_num = int(cls)
+            label = None if hide_labels else (class_names[class_num] if hide_conf else f'{class_names[class_num]} {conf:.2f}')
+            plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=generate_colors(class_num, True))
+    cv2.imwrite("./python/results.png",cv2.cvtColor(img_ori,cv2.COLOR_RGB2BGR))
+def parser_args():
+    parser = argparse.ArgumentParser(description="Run model benchmarks")
+    parser.add_argument('--target_model',type=str,default='./models/cutoff_yolov6m_fp16.qnn223.ctx.bin',help="inference model path")
+    parser.add_argument('--imgs',type=str,default='./python/test.png',help="Predict images path")
+    parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
+    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parser_args()
+    main(args)

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/python/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs8550_qnn2.23_fp16_aidlite/python/utils.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import cv2
+import numpy as np
+import torch
+import torchvision
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
+    '''Resize and pad image while meeting stride-multiple constraints.'''
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    elif isinstance(new_shape, list) and len(new_shape) == 1:
+       new_shape = (new_shape[0], new_shape[0])
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, r, (left, top)
+def xywh2xyxy(x):
+    '''Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 is top-left, x2y2=bottom-right.'''
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
+    """Runs Non-Maximum Suppression (NMS) on inference results.
+    This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
+    Args:
+        prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
+        conf_thres: (float) confidence threshold.
+        iou_thres: (float) iou threshold.
+        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
+        agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
+        multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
+        max_det:(int), max number of output bboxes.
+    Returns:
+         list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
+    """
+    num_classes = prediction.shape[2] - 5  # number of classes
+    pred_candidates = torch.logical_and(prediction[..., 4] > conf_thres, torch.max(prediction[..., 5:], axis=-1)[0] > conf_thres)  # candidates
+    # Check the parameters.
+    assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
+    assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
+    # Function settings.
+    max_wh = 4096  # maximum box width and height
+    max_nms = 30000  # maximum number of boxes put into torchvision.ops.nms()
+    time_limit = 10.0  # quit the function when nms cost time exceed the limit time.
+    multi_label &= num_classes > 1  # multiple labels per box
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for img_idx, x in enumerate(prediction):  # image index, image inference
+        x = x[pred_candidates[img_idx]]  # confidence
+        # If no box remains, skip the next process.
+        if not x.shape[0]:
+            continue
+        # confidence multiply the objectness
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+        # (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+        # Detections matrix's shape is  (n,6), each row represents (xyxy, conf, cls)
+        if multi_label:
+            box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1)
+        else:  # Only keep the class with highest scores.
+            conf, class_idx = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres]
+        # Filter by class, only keep boxes whose category is in classes.
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+        # Check shape
+        num_box = x.shape[0]  # number of boxes
+        if not num_box:  # no boxes kept.
+            continue
+        elif num_box > max_nms:  # excess max boxes' number.
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # Batched NMS
+        class_offset = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + class_offset, x[:, 4]  # boxes (offset by class), scores
+        keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if keep_box_idx.shape[0] > max_det:  # limit detections
+            keep_box_idx = keep_box_idx[:max_det]
+        output[img_idx] = x[keep_box_idx]
+    return output
+def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), font=cv2.FONT_HERSHEY_COMPLEX):
+    # Add one xyxy box to image with label
+    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+    cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(lw - 1, 1)  # font thickness
+        w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
+        outside = p1[1] - h - 3 >= 0  # label fits outside box
+        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+        cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), font, lw / 3, txt_color,
+                    thickness=tf, lineType=cv2.LINE_AA)
+def generate_colors(i, bgr=False):
+    hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
+            '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
+    palette = []
+    for iter in hex:
+        h = '#' + iter
+        palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)))
+    num = len(palette)
+    color = palette[int(i) % num]
+    return (color[2], color[1], color[0]) if bgr else color
+def rescale(ori_shape, boxes, target_shape):
+    '''Rescale the output to the original image shape'''
+    ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
+    padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
+    boxes[:, [0, 2]] -= padding[0]
+    boxes[:, [1, 3]] -= padding[1]
+    boxes[:, :4] /= ratio
+    boxes[:, 0].clamp_(0, target_shape[1])  # x1
+    boxes[:, 1].clamp_(0, target_shape[0])  # y1
+    boxes[:, 2].clamp_(0, target_shape[1])  # x2
+    boxes[:, 3].clamp_(0, target_shape[0])  # y2
+    return boxes

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+## Model Information
+### Source model
+- Input shape: 1x3x640x640
+- Number of parameters: 33.24M
+- Model size: 133.20MB
+- Output shape: 1x8400x85
+Source model repository: [yolov6](https://github.com/meituan/YOLOv6/tree/main)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.23
+- Target Device: SNM972 QCS8550
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd yolov6m/model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite
+python3  python/run_test.py --target_model ./models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem --imgs ./python/test.png  --invoke_nums 10
+```
+#### cpp
+```bash
+cd yolov6m/model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp
+mkdir build && cd build
+cmake .. && make
+./run_test --target_model ../../models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem --imgs ../test.png  --invoke_nums 10
+```

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+)

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp/main.cpp ADDED Viewed

	@@ -0,0 +1,370 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+const float INPUT_WIDTH = 640.0;
+const float INPUT_HEIGHT = 640.0;
+const float SCORE_THRESHOLD = 0.25;
+const float NMS_THRESHOLD = 0.45;
+const float CONFIDENCE_THRESHOLD = 0.25;
+const uint32_t size = 640;
+const uint32_t out_size = 8400;
+const int FONT_FACE = cv::FONT_HERSHEY_SIMPLEX;
+cv::Scalar WHITE = cv::Scalar(255,255,255);
+const float FONT_SCALE = 1;
+const int THICKNESS = 2;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem";
+    std::string imgs = "../test.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+void concatenate(float* qnn_trans_data, float* qnn_mul_data, int batch, int num_elements, int trans_dim, int mul_dim, std::vector<float>& output) {
+    int out_dim = trans_dim + mul_dim + 1;
+    output.resize(batch * num_elements * out_dim);
+    for (int i = 0; i < batch * num_elements; ++i) {
+        std::memcpy(&output[i * out_dim], &qnn_mul_data[i * mul_dim], mul_dim * sizeof(float));
+        float max_val = *std::max_element(&qnn_trans_data[i * trans_dim], &qnn_trans_data[i * trans_dim + trans_dim]);
+        output[i * out_dim + 4] = max_val;
+        std::memcpy(&output[i * out_dim + 5], &qnn_trans_data[i * trans_dim], trans_dim * sizeof(float));
+    }
+}
+cv::Mat letterbox(cv::Mat im, cv::Size new_shape = cv::Size(640, 640),
+                  cv::Scalar color = cv::Scalar(114, 114, 114),
+                  bool auto_pad = true, bool scaleup = true, int stride = 32) {
+    // current shape [height, width]
+    cv::Size shape = im.size();
+    int height = shape.height;
+    int width = shape.width;
+    if (new_shape.width == 0) {
+        new_shape = cv::Size(new_shape.height, new_shape.height);
+    }
+    // Scale ratio (new / old)
+    float r = std::min((float)new_shape.height / height, (float)new_shape.width / width);
+    if (!scaleup) {
+        // only scale down, do not scale up (for better val mAP)
+        r = std::min(r, 1.0f);
+    }
+    // Compute padding
+    cv::Size new_unpad(round(width * r), round(height * r));
+    int dw = new_shape.width - new_unpad.width;
+    int dh = new_shape.height - new_unpad.height;
+    // minimum rectangle
+    if (auto_pad) {
+        dw = dw % stride;
+        dh = dh % stride;
+    }
+    dw /= 2;  // divide padding into 2 sides
+    dh /= 2;
+    // resize
+    if (cv::Size(width, height) != new_unpad) {
+        cv::resize(im, im, new_unpad, 0, 0, cv::INTER_LINEAR);
+    }
+    int top = round(dh - 0.1);
+    int bottom = round(dh + 0.1);
+    int left = round(dw - 0.1);
+    int right = round(dw + 0.1);
+    cv::copyMakeBorder(im, im, top, bottom, left, right, cv::BORDER_CONSTANT, color);
+    return im;
+}
+cv::Scalar generate_colors(int i, bool bgr = false) {
+    static const std::vector<std::string> hex_colors = {
+        "FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A",
+        "92CC17", "3DDB86", "1A9334", "00D4BB", "2C99A8", "00C2FF",
+        "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF",
+        "FF95C8", "FF37C7"
+    };
+    int num = hex_colors.size();
+    std::string hex = hex_colors[i % num];
+    int r = std::stoi(hex.substr(0, 2), nullptr, 16);
+    int g = std::stoi(hex.substr(2, 2), nullptr, 16);
+    int b = std::stoi(hex.substr(4, 2), nullptr, 16);
+    if (bgr)
+        return cv::Scalar(b, g, r);
+    else
+        return cv::Scalar(r, g, b);
+}
+void draw_label(cv::Mat& input_image, std::string label, int left, int top, cv::Scalar color)
+{
+    int baseLine;
+    cv::Size label_size = cv::getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS, &baseLine);
+    int y = top - label_size.height - baseLine;
+    if (y < 0) {
+        y = top + label_size.height + baseLine;
+    }
+    cv::Point tlc(left, y);
+    cv::Point brc(left + label_size.width, y + label_size.height + baseLine);
+    rectangle(input_image, tlc, brc, color, cv::FILLED);
+    putText(input_image, label, cv::Point(left, y + label_size.height), FONT_FACE, FONT_SCALE, WHITE, THICKNESS);
+}
+cv::Mat post_process(cv::Mat &input_image, std::vector<float> &outputs, const std::vector<std::string> &class_name)
+{
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    // Resizing factor.
+    float r = std::min(INPUT_WIDTH / (float)input_image.cols, INPUT_HEIGHT / (float)input_image.rows);
+    int new_unpad_w = round(input_image.cols * r);
+    int new_unpad_h = round(input_image.rows * r);
+    int dw = (int)INPUT_WIDTH - new_unpad_w;
+    int dh = (int)INPUT_HEIGHT - new_unpad_h;
+    dw /= 24;
+    dh /= 24;
+    // Iterate through outputs for each box prediction
+    for (int i = 0; i < outputs.size(); i+=85)
+    {
+        float confidence = outputs[i+4];
+        if (confidence >= CONFIDENCE_THRESHOLD)
+        {
+            // Create a 1x80 Mat and store class scores of 80 classes.
+            cv::Mat scores(1, class_name.size(), CV_32FC1, outputs.data() + i + 5);
+            cv::Point class_id;
+            double max_class_score;
+            // For multi-label, check each class score
+            for (int c = 0; c < class_name.size(); c++) {
+                float class_score = scores.at<float>(0, c);
+                // If class score is above threshold, consider this class for the box
+                if (class_score > SCORE_THRESHOLD) {
+                    // Store class ID and confidence in the pre-defined respective vectors.
+                    confidences.push_back(confidence * class_score);  // Multiply with confidence
+                    class_ids.push_back(c);  // class index
+                    // Center and box dimension.
+                    float cx = outputs[i];
+                    float cy = outputs[i+1];
+                    float w = outputs[i+2];
+                    float h = outputs[i+3];
+                    float x0 = (cx - 0.5f * w - dw) / r;
+                    float y0 = (cy - 0.5f * h - dh) / r;
+                    float x1 = (cx + 0.5f * w - dw) / r;
+                    float y1 = (cy + 0.5f * h - dh) / r;
+                    int left = int(x0);
+                    int top = int(y0);
+                    int width = int(x1 - x0);
+                    int height = int(y1 - y0);
+                    // Store good detections in the boxes vector.
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+    }
+    // Perform Non Maximum Suppression and draw predictions.
+    std::vector<int> indices;
+    cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, indices);
+    printf("Detected {%ld} targets.\n", indices.size());
+    // Loop over NMS results and draw bounding boxes
+    for (int i = 0; i < indices.size(); i++)
+    {
+        int idx = indices[i];
+        cv::Rect box = boxes[idx];
+        int left = box.x;
+        int top = box.y;
+        int width = box.width;
+        int height = box.height;
+        cv::Scalar color = generate_colors(class_ids[idx]);
+        // Draw bounding box.
+        rectangle(input_image, cv::Point(left, top), cv::Point(left + width, top + height), color, 3*THICKNESS);
+        // Get the label for the class name and its confidence.
+        std::string label = cv::format("%.2f", confidences[idx]);
+        label = class_name[class_ids[idx]] + ":" + label;
+        // Draw class labels.
+        draw_label(input_image, label, left, top, color);
+    }
+    printf("Processing finished.\n");
+    return input_image;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN223;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1, size, size, 3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1, out_size, 80}, {1, out_size, 4}};
+    model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Size img_size(size, size);
+    cv::Mat img_src = cv::imread(args.imgs);
+    printf("img_src cols: %d, img_src rows: %d\n", img_src.cols, img_src.rows);
+    cv::Mat img_ori = img_src.clone();
+    cv::cvtColor(img_ori, img_ori, cv::COLOR_BGR2RGB);
+    cv::Mat resized_img = letterbox(img_ori, img_size);
+    cv::Mat input_img = cv::Mat::zeros(img_size, CV_32FC3);
+    resized_img.convertTo(resized_img, CV_32FC3, 1.0 / 255.0);
+    resized_img.copyTo(input_img(cv::Rect(0, 0, resized_img.cols, resized_img.rows)));
+    float *qnn_trans_data = nullptr;
+    float *qnn_mul_data = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_img.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+          // 开始计时
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_1 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&qnn_trans_data, &out_data_1);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_2 = 0;
+        result = fast_interpreter->get_output_tensor(1, (void**)&qnn_mul_data, &out_data_2);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 2 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    std::vector<std::string> class_list = {
+        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
+        "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter",
+        "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
+        "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
+        "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
+        "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+        "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
+        "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
+        "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
+        "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
+        "scissors", "teddy bear", "hair drier", "toothbrush"
+    };
+    // post process
+    std::vector<float> qnn_concat;
+    concatenate(qnn_trans_data, qnn_mul_data, 1, out_size, 80, 4, qnn_concat);
+    cv::Mat img = post_process(img_src, qnn_concat, class_list);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/cpp/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/models/cutoff_yolov6m_w8a8.qnn223.ctx.bin.aidem ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f0bb56df2017a4e7fcd2423204a7f15a4a596964e6cd5609f469d3bab8104c1
+size 35596880

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/python/run_test.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os,  torch,  cv2
+import numpy as np
+import time
+import aidlite
+import argparse
+from utils import letterbox,plot_box_and_label,rescale,generate_colors,non_max_suppression
+import torch
+def process_image(path, img_size):
+    img_src = cv2.imread(path)
+    img_src = cv2.cvtColor(img_src,cv2.COLOR_BGR2RGB)
+    image = letterbox(img_src, img_size)[0]
+    new_h,new_w,_=image.shape
+    input_img = np.zeros((img_size[0], img_size[1], 3), np.uint8)
+    input_img[0:new_h, 0:new_w] = image
+    input_img = input_img.astype(np.float32)
+    input_img /= 255  # 0 - 255 to 0.0 - 1.0
+    input_img = np.expand_dims(input_img,0)
+    return image,input_img, img_src
+def main(args):
+    print("Start main ... ...")
+    # aidlite.set_log_level(aidlite.LogLevel.INFO)
+    # aidlite.log_to_stderr()
+    # print(f"Aidlite library version : {aidlite.get_library_version()}")
+    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
+    size=640
+    out_size=8400
+    config = aidlite.Config.create_instance()
+    if config is None:
+        print("Create config failed !")
+        return False
+    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+    if args.model_type.lower()=="qnn":
+        config.framework_type = aidlite.FrameworkType.TYPE_QNN223
+    elif args.model_type.lower()=="snpe2" or args.model_type.lower()=="snpe":
+        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
+    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+    config.is_quantify_model = 1
+    model = aidlite.Model.create_instance(args.target_model)
+    if model is None:
+        print("Create model failed !")
+        return False
+    input_shapes = [[1, size, size, 3]]
+    output_shapes = [[1, out_size,80],[1, out_size,4]]
+    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
+    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
+    if interpreter is None:
+        print("build_interpretper_from_model_and_config failed !")
+        return None
+    result = interpreter.init()
+    if result != 0:
+        print(f"interpreter init failed !")
+        return False
+    result = interpreter.load_model()
+    if result != 0:
+        print("interpreter load model failed !")
+        return False
+    print("detect model load success!")
+    # image process
+    img_size=[size,size]
+    resize_img,input_img, img_src = process_image(args.imgs, img_size)
+    # qnn run
+    invoke_time=[]
+    for i in range(args.invoke_nums):
+        result = interpreter.set_input_tensor(0, input_img.data)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        t1=time.time()
+        result = interpreter.invoke()
+        cost_time = (time.time()-t1)*1000
+        invoke_time.append(cost_time)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        qnn_trans = interpreter.get_output_tensor(0).reshape(1,out_size,80)
+        qnn_mul = interpreter.get_output_tensor(1).reshape(1,out_size,4)
+    result = interpreter.destory()
+    ## time 统计
+    max_invoke_time = max(invoke_time)
+    min_invoke_time = min(invoke_time)
+    mean_invoke_time = sum(invoke_time)/args.invoke_nums
+    var_invoketime=np.var(invoke_time)
+    print("=======================================")
+    print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+    print("=======================================")
+    # 后处理
+    conf_thres =0.25 #@param {type:"number"}
+    iou_thres =0.45 #@param {type:"number"}
+    max_det=  1000#@param {type:"integer"}
+    agnostic_nms= False #@param {type:"boolean"}
+    classes =None
+    hide_labels = False #@param {type:"boolean"}
+    hide_conf= False #@param {type:"boolean"}
+    qnn_conf = np.ones((1,out_size,1))
+    qnn_predict=np.concatenate((qnn_mul,qnn_conf,qnn_trans), axis=2)
+    pred_results =torch.from_numpy(qnn_predict.copy())
+    det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
+    class_names=[ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]
+    img_ori = img_src.copy()
+    print(f"Detected {len(det)} targets.")
+    if len(det):
+        det[:, :4] = rescale(resize_img.shape[:2], det[:, :4], img_src.shape).round()
+        for *xyxy, conf, cls in reversed(det):
+            class_num = int(cls)
+            label = None if hide_labels else (class_names[class_num] if hide_conf else f'{class_names[class_num]} {conf:.2f}')
+            plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=generate_colors(class_num, True))
+    cv2.imwrite("./python/results.png",cv2.cvtColor(img_ori,cv2.COLOR_RGB2BGR))
+def parser_args():
+    parser = argparse.ArgumentParser(description="Run model benchmarks")
+    parser.add_argument('--target_model',type=str,default='./models/cutoff_yolov6m_w8a8.qnn223.ctx.bin',help="inference model path")
+    parser.add_argument('--imgs',type=str,default='./python/test.png',help="Predict images path")
+    parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
+    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parser_args()
+    main(args)

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/python/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs8550_qnn2.23_int8_aidlite/python/utils.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import cv2
+import numpy as np
+import torch
+import torchvision
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
+    '''Resize and pad image while meeting stride-multiple constraints.'''
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    elif isinstance(new_shape, list) and len(new_shape) == 1:
+       new_shape = (new_shape[0], new_shape[0])
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, r, (left, top)
+def xywh2xyxy(x):
+    '''Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 is top-left, x2y2=bottom-right.'''
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
+    """Runs Non-Maximum Suppression (NMS) on inference results.
+    This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
+    Args:
+        prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
+        conf_thres: (float) confidence threshold.
+        iou_thres: (float) iou threshold.
+        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
+        agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
+        multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
+        max_det:(int), max number of output bboxes.
+    Returns:
+         list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
+    """
+    num_classes = prediction.shape[2] - 5  # number of classes
+    pred_candidates = torch.logical_and(prediction[..., 4] > conf_thres, torch.max(prediction[..., 5:], axis=-1)[0] > conf_thres)  # candidates
+    # Check the parameters.
+    assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
+    assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
+    # Function settings.
+    max_wh = 4096  # maximum box width and height
+    max_nms = 30000  # maximum number of boxes put into torchvision.ops.nms()
+    time_limit = 10.0  # quit the function when nms cost time exceed the limit time.
+    multi_label &= num_classes > 1  # multiple labels per box
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for img_idx, x in enumerate(prediction):  # image index, image inference
+        x = x[pred_candidates[img_idx]]  # confidence
+        # If no box remains, skip the next process.
+        if not x.shape[0]:
+            continue
+        # confidence multiply the objectness
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+        # (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+        # Detections matrix's shape is  (n,6), each row represents (xyxy, conf, cls)
+        if multi_label:
+            box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1)
+        else:  # Only keep the class with highest scores.
+            conf, class_idx = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres]
+        # Filter by class, only keep boxes whose category is in classes.
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+        # Check shape
+        num_box = x.shape[0]  # number of boxes
+        if not num_box:  # no boxes kept.
+            continue
+        elif num_box > max_nms:  # excess max boxes' number.
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # Batched NMS
+        class_offset = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + class_offset, x[:, 4]  # boxes (offset by class), scores
+        keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if keep_box_idx.shape[0] > max_det:  # limit detections
+            keep_box_idx = keep_box_idx[:max_det]
+        output[img_idx] = x[keep_box_idx]
+    return output
+def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), font=cv2.FONT_HERSHEY_COMPLEX):
+    # Add one xyxy box to image with label
+    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+    cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(lw - 1, 1)  # font thickness
+        w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
+        outside = p1[1] - h - 3 >= 0  # label fits outside box
+        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+        cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), font, lw / 3, txt_color,
+                    thickness=tf, lineType=cv2.LINE_AA)
+def generate_colors(i, bgr=False):
+    hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
+            '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
+    palette = []
+    for iter in hex:
+        h = '#' + iter
+        palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)))
+    num = len(palette)
+    color = palette[int(i) % num]
+    return (color[2], color[1], color[0]) if bgr else color
+def rescale(ori_shape, boxes, target_shape):
+    '''Rescale the output to the original image shape'''
+    ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
+    padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
+    boxes[:, [0, 2]] -= padding[0]
+    boxes[:, [1, 3]] -= padding[1]
+    boxes[:, :4] /= ratio
+    boxes[:, 0].clamp_(0, target_shape[1])  # x1
+    boxes[:, 1].clamp_(0, target_shape[0])  # y1
+    boxes[:, 2].clamp_(0, target_shape[1])  # x2
+    boxes[:, 3].clamp_(0, target_shape[0])  # y2
+    return boxes

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+## Model Information
+### Source model
+- Input shape: 1x3x640x640
+- Number of parameters: 33.24M
+- Model size: 133.20MB
+- Output shape: 1x8400x85
+Source model repository: [yolov6](https://github.com/meituan/YOLOv6/tree/main)
+### Converted model
+- Precision: W8A16
+- Backend: QNN2.23
+- Target Device: SNM972 QCS8550
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd yolov6m/model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite
+python3  python/run_test.py --target_model ./models/cutoff_yolov6m_w8a16.qnn223.ctx.bin.aidem --imgs ./python/test.png  --invoke_nums 10
+```
+#### cpp
+```bash
+cd yolov6m/model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp
+mkdir build && cd build
+cmake .. && make
+./run_test --target_model ../../models/cutoff_yolov6m_w8a16.qnn223.ctx.bin.aidem --imgs ../test.png  --invoke_nums 10
+```

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+)

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp/main.cpp ADDED Viewed

	@@ -0,0 +1,370 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+const float INPUT_WIDTH = 640.0;
+const float INPUT_HEIGHT = 640.0;
+const float SCORE_THRESHOLD = 0.25;
+const float NMS_THRESHOLD = 0.45;
+const float CONFIDENCE_THRESHOLD = 0.25;
+const uint32_t size = 640;
+const uint32_t out_size = 8400;
+const int FONT_FACE = cv::FONT_HERSHEY_SIMPLEX;
+cv::Scalar WHITE = cv::Scalar(255,255,255);
+const float FONT_SCALE = 1;
+const int THICKNESS = 2;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/cutoff_yolov6m_w8a16.qnn223.ctx.bin.aidem";
+    std::string imgs = "../test.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+void concatenate(float* qnn_trans_data, float* qnn_mul_data, int batch, int num_elements, int trans_dim, int mul_dim, std::vector<float>& output) {
+    int out_dim = trans_dim + mul_dim + 1;
+    output.resize(batch * num_elements * out_dim);
+    for (int i = 0; i < batch * num_elements; ++i) {
+        std::memcpy(&output[i * out_dim], &qnn_mul_data[i * mul_dim], mul_dim * sizeof(float));
+        float max_val = *std::max_element(&qnn_trans_data[i * trans_dim], &qnn_trans_data[i * trans_dim + trans_dim]);
+        output[i * out_dim + 4] = max_val;
+        std::memcpy(&output[i * out_dim + 5], &qnn_trans_data[i * trans_dim], trans_dim * sizeof(float));
+    }
+}
+cv::Mat letterbox(cv::Mat im, cv::Size new_shape = cv::Size(640, 640),
+                  cv::Scalar color = cv::Scalar(114, 114, 114),
+                  bool auto_pad = true, bool scaleup = true, int stride = 32) {
+    // current shape [height, width]
+    cv::Size shape = im.size();
+    int height = shape.height;
+    int width = shape.width;
+    if (new_shape.width == 0) {
+        new_shape = cv::Size(new_shape.height, new_shape.height);
+    }
+    // Scale ratio (new / old)
+    float r = std::min((float)new_shape.height / height, (float)new_shape.width / width);
+    if (!scaleup) {
+        // only scale down, do not scale up (for better val mAP)
+        r = std::min(r, 1.0f);
+    }
+    // Compute padding
+    cv::Size new_unpad(round(width * r), round(height * r));
+    int dw = new_shape.width - new_unpad.width;
+    int dh = new_shape.height - new_unpad.height;
+    // minimum rectangle
+    if (auto_pad) {
+        dw = dw % stride;
+        dh = dh % stride;
+    }
+    dw /= 2;  // divide padding into 2 sides
+    dh /= 2;
+    // resize
+    if (cv::Size(width, height) != new_unpad) {
+        cv::resize(im, im, new_unpad, 0, 0, cv::INTER_LINEAR);
+    }
+    int top = round(dh - 0.1);
+    int bottom = round(dh + 0.1);
+    int left = round(dw - 0.1);
+    int right = round(dw + 0.1);
+    cv::copyMakeBorder(im, im, top, bottom, left, right, cv::BORDER_CONSTANT, color);
+    return im;
+}
+cv::Scalar generate_colors(int i, bool bgr = false) {
+    static const std::vector<std::string> hex_colors = {
+        "FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A",
+        "92CC17", "3DDB86", "1A9334", "00D4BB", "2C99A8", "00C2FF",
+        "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF",
+        "FF95C8", "FF37C7"
+    };
+    int num = hex_colors.size();
+    std::string hex = hex_colors[i % num];
+    int r = std::stoi(hex.substr(0, 2), nullptr, 16);
+    int g = std::stoi(hex.substr(2, 2), nullptr, 16);
+    int b = std::stoi(hex.substr(4, 2), nullptr, 16);
+    if (bgr)
+        return cv::Scalar(b, g, r);
+    else
+        return cv::Scalar(r, g, b);
+}
+void draw_label(cv::Mat& input_image, std::string label, int left, int top, cv::Scalar color)
+{
+    int baseLine;
+    cv::Size label_size = cv::getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS, &baseLine);
+    int y = top - label_size.height - baseLine;
+    if (y < 0) {
+        y = top + label_size.height + baseLine;
+    }
+    cv::Point tlc(left, y);
+    cv::Point brc(left + label_size.width, y + label_size.height + baseLine);
+    rectangle(input_image, tlc, brc, color, cv::FILLED);
+    putText(input_image, label, cv::Point(left, y + label_size.height), FONT_FACE, FONT_SCALE, WHITE, THICKNESS);
+}
+cv::Mat post_process(cv::Mat &input_image, std::vector<float> &outputs, const std::vector<std::string> &class_name)
+{
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    // Resizing factor.
+    float r = std::min(INPUT_WIDTH / (float)input_image.cols, INPUT_HEIGHT / (float)input_image.rows);
+    int new_unpad_w = round(input_image.cols * r);
+    int new_unpad_h = round(input_image.rows * r);
+    int dw = (int)INPUT_WIDTH - new_unpad_w;
+    int dh = (int)INPUT_HEIGHT - new_unpad_h;
+    dw /= 24;
+    dh /= 24;
+    // Iterate through outputs for each box prediction
+    for (int i = 0; i < outputs.size(); i+=85)
+    {
+        float confidence = outputs[i+4];
+        if (confidence >= CONFIDENCE_THRESHOLD)
+        {
+            // Create a 1x80 Mat and store class scores of 80 classes.
+            cv::Mat scores(1, class_name.size(), CV_32FC1, outputs.data() + i + 5);
+            cv::Point class_id;
+            double max_class_score;
+            // For multi-label, check each class score
+            for (int c = 0; c < class_name.size(); c++) {
+                float class_score = scores.at<float>(0, c);
+                // If class score is above threshold, consider this class for the box
+                if (class_score > SCORE_THRESHOLD) {
+                    // Store class ID and confidence in the pre-defined respective vectors.
+                    confidences.push_back(confidence * class_score);  // Multiply with confidence
+                    class_ids.push_back(c);  // class index
+                    // Center and box dimension.
+                    float cx = outputs[i];
+                    float cy = outputs[i+1];
+                    float w = outputs[i+2];
+                    float h = outputs[i+3];
+                    float x0 = (cx - 0.5f * w - dw) / r;
+                    float y0 = (cy - 0.5f * h - dh) / r;
+                    float x1 = (cx + 0.5f * w - dw) / r;
+                    float y1 = (cy + 0.5f * h - dh) / r;
+                    int left = int(x0);
+                    int top = int(y0);
+                    int width = int(x1 - x0);
+                    int height = int(y1 - y0);
+                    // Store good detections in the boxes vector.
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+    }
+    // Perform Non Maximum Suppression and draw predictions.
+    std::vector<int> indices;
+    cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, indices);
+    printf("Detected {%ld} targets.\n", indices.size());
+    // Loop over NMS results and draw bounding boxes
+    for (int i = 0; i < indices.size(); i++)
+    {
+        int idx = indices[i];
+        cv::Rect box = boxes[idx];
+        int left = box.x;
+        int top = box.y;
+        int width = box.width;
+        int height = box.height;
+        cv::Scalar color = generate_colors(class_ids[idx]);
+        // Draw bounding box.
+        rectangle(input_image, cv::Point(left, top), cv::Point(left + width, top + height), color, 3*THICKNESS);
+        // Get the label for the class name and its confidence.
+        std::string label = cv::format("%.2f", confidences[idx]);
+        label = class_name[class_ids[idx]] + ":" + label;
+        // Draw class labels.
+        draw_label(input_image, label, left, top, color);
+    }
+    printf("Processing finished.\n");
+    return input_image;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN223;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1, size, size, 3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1, out_size, 80}, {1, out_size, 4}};
+    model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Size img_size(size, size);
+    cv::Mat img_src = cv::imread(args.imgs);
+    printf("img_src cols: %d, img_src rows: %d\n", img_src.cols, img_src.rows);
+    cv::Mat img_ori = img_src.clone();
+    cv::cvtColor(img_ori, img_ori, cv::COLOR_BGR2RGB);
+    cv::Mat resized_img = letterbox(img_ori, img_size);
+    cv::Mat input_img = cv::Mat::zeros(img_size, CV_32FC3);
+    resized_img.convertTo(resized_img, CV_32FC3, 1.0 / 255.0);
+    resized_img.copyTo(input_img(cv::Rect(0, 0, resized_img.cols, resized_img.rows)));
+    float *qnn_trans_data = nullptr;
+    float *qnn_mul_data = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_img.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+          // 开始计时
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_1 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&qnn_trans_data, &out_data_1);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_2 = 0;
+        result = fast_interpreter->get_output_tensor(1, (void**)&qnn_mul_data, &out_data_2);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 2 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    std::vector<std::string> class_list = {
+        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
+        "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter",
+        "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
+        "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
+        "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
+        "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+        "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
+        "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
+        "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
+        "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
+        "scissors", "teddy bear", "hair drier", "toothbrush"
+    };
+    // post process
+    std::vector<float> qnn_concat;
+    concatenate(qnn_trans_data, qnn_mul_data, 1, out_size, 80, 4, qnn_concat);
+    cv::Mat img = post_process(img_src, qnn_concat, class_list);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/cpp/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/models/cutoff_yolov6m_w8a16.qnn223.ctx.bin.aidem ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e920a05c31f9b5cc85cd66035ff82bcf0ffe07378413039b2b7e8c43096f2f8
+size 36018768

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/python/run_test.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os,  torch,  cv2
+import numpy as np
+import time
+import aidlite
+import argparse
+from utils import letterbox,plot_box_and_label,rescale,generate_colors,non_max_suppression
+import torch
+def process_image(path, img_size):
+    img_src = cv2.imread(path)
+    img_src = cv2.cvtColor(img_src,cv2.COLOR_BGR2RGB)
+    image = letterbox(img_src, img_size)[0]
+    new_h,new_w,_=image.shape
+    input_img = np.zeros((img_size[0], img_size[1], 3), np.uint8)
+    input_img[0:new_h, 0:new_w] = image
+    input_img = input_img.astype(np.float32)
+    input_img /= 255  # 0 - 255 to 0.0 - 1.0
+    input_img = np.expand_dims(input_img,0)
+    return image,input_img, img_src
+def main(args):
+    print("Start main ... ...")
+    # aidlite.set_log_level(aidlite.LogLevel.INFO)
+    # aidlite.log_to_stderr()
+    # print(f"Aidlite library version : {aidlite.get_library_version()}")
+    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
+    size=640
+    out_size=8400
+    config = aidlite.Config.create_instance()
+    if config is None:
+        print("Create config failed !")
+        return False
+    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+    if args.model_type.lower()=="qnn":
+        config.framework_type = aidlite.FrameworkType.TYPE_QNN223
+    elif args.model_type.lower()=="snpe2" or args.model_type.lower()=="snpe":
+        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
+    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+    config.is_quantify_model = 1
+    model = aidlite.Model.create_instance(args.target_model)
+    if model is None:
+        print("Create model failed !")
+        return False
+    input_shapes = [[1, size, size, 3]]
+    output_shapes = [[1, out_size,80],[1, out_size,4]]
+    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
+    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
+    if interpreter is None:
+        print("build_interpretper_from_model_and_config failed !")
+        return None
+    result = interpreter.init()
+    if result != 0:
+        print(f"interpreter init failed !")
+        return False
+    result = interpreter.load_model()
+    if result != 0:
+        print("interpreter load model failed !")
+        return False
+    print("detect model load success!")
+    # image process
+    img_size=[size,size]
+    resize_img,input_img, img_src = process_image(args.imgs, img_size)
+    # qnn run
+    invoke_time=[]
+    for i in range(args.invoke_nums):
+        result = interpreter.set_input_tensor(0, input_img.data)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        t1=time.time()
+        result = interpreter.invoke()
+        cost_time = (time.time()-t1)*1000
+        invoke_time.append(cost_time)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        qnn_trans = interpreter.get_output_tensor(0).reshape(1,out_size,80)
+        qnn_mul = interpreter.get_output_tensor(1).reshape(1,out_size,4)
+    result = interpreter.destory()
+    ## time 统计
+    max_invoke_time = max(invoke_time)
+    min_invoke_time = min(invoke_time)
+    mean_invoke_time = sum(invoke_time)/args.invoke_nums
+    var_invoketime=np.var(invoke_time)
+    print("=======================================")
+    print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+    print("=======================================")
+    # 后处理
+    conf_thres =0.25 #@param {type:"number"}
+    iou_thres =0.45 #@param {type:"number"}
+    max_det=  1000#@param {type:"integer"}
+    agnostic_nms= False #@param {type:"boolean"}
+    classes =None
+    hide_labels = False #@param {type:"boolean"}
+    hide_conf= False #@param {type:"boolean"}
+    qnn_conf = np.ones((1,out_size,1))
+    qnn_predict=np.concatenate((qnn_mul,qnn_conf,qnn_trans), axis=2)
+    pred_results =torch.from_numpy(qnn_predict.copy())
+    det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
+    class_names=[ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]
+    img_ori = img_src.copy()
+    print(f"Detected {len(det)} targets.")
+    if len(det):
+        det[:, :4] = rescale(resize_img.shape[:2], det[:, :4], img_src.shape).round()
+        for *xyxy, conf, cls in reversed(det):
+            class_num = int(cls)
+            label = None if hide_labels else (class_names[class_num] if hide_conf else f'{class_names[class_num]} {conf:.2f}')
+            plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=generate_colors(class_num, True))
+    cv2.imwrite("./python/results.png",cv2.cvtColor(img_ori,cv2.COLOR_RGB2BGR))
+def parser_args():
+    parser = argparse.ArgumentParser(description="Run model benchmarks")
+    parser.add_argument('--target_model',type=str,default='./models/cutoff_yolov6m_w8a16.qnn223.ctx.bin.aidem',help="inference model path")
+    parser.add_argument('--imgs',type=str,default='./python/test.png',help="Predict images path")
+    parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
+    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parser_args()
+    main(args)

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/python/test.png ADDED Viewed

Git LFS Details

SHA256: 7c95b20b95830c48c48c1ea3a004b3cc5a392672a3ea130a4eb41db5664ebfe5
Pointer size: 131 Bytes
Size of remote file: 970 kB

model_farm_yolov6m_qcs8550_qnn2.23_w8a16_aidlite/python/utils.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import cv2
+import numpy as np
+import torch
+import torchvision
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
+    '''Resize and pad image while meeting stride-multiple constraints.'''
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    elif isinstance(new_shape, list) and len(new_shape) == 1:
+       new_shape = (new_shape[0], new_shape[0])
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, r, (left, top)
+def xywh2xyxy(x):
+    '''Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 is top-left, x2y2=bottom-right.'''
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
+    """Runs Non-Maximum Suppression (NMS) on inference results.
+    This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
+    Args:
+        prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
+        conf_thres: (float) confidence threshold.
+        iou_thres: (float) iou threshold.
+        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
+        agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
+        multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
+        max_det:(int), max number of output bboxes.
+    Returns:
+         list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
+    """
+    num_classes = prediction.shape[2] - 5  # number of classes
+    pred_candidates = torch.logical_and(prediction[..., 4] > conf_thres, torch.max(prediction[..., 5:], axis=-1)[0] > conf_thres)  # candidates
+    # Check the parameters.
+    assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
+    assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
+    # Function settings.
+    max_wh = 4096  # maximum box width and height
+    max_nms = 30000  # maximum number of boxes put into torchvision.ops.nms()
+    time_limit = 10.0  # quit the function when nms cost time exceed the limit time.
+    multi_label &= num_classes > 1  # multiple labels per box
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for img_idx, x in enumerate(prediction):  # image index, image inference
+        x = x[pred_candidates[img_idx]]  # confidence
+        # If no box remains, skip the next process.
+        if not x.shape[0]:
+            continue
+        # confidence multiply the objectness
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+        # (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+        # Detections matrix's shape is  (n,6), each row represents (xyxy, conf, cls)
+        if multi_label:
+            box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1)
+        else:  # Only keep the class with highest scores.
+            conf, class_idx = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres]
+        # Filter by class, only keep boxes whose category is in classes.
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+        # Check shape
+        num_box = x.shape[0]  # number of boxes
+        if not num_box:  # no boxes kept.
+            continue
+        elif num_box > max_nms:  # excess max boxes' number.
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # Batched NMS
+        class_offset = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + class_offset, x[:, 4]  # boxes (offset by class), scores
+        keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if keep_box_idx.shape[0] > max_det:  # limit detections
+            keep_box_idx = keep_box_idx[:max_det]
+        output[img_idx] = x[keep_box_idx]
+    return output
+def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), font=cv2.FONT_HERSHEY_COMPLEX):
+    # Add one xyxy box to image with label
+    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+    cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(lw - 1, 1)  # font thickness
+        w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
+        outside = p1[1] - h - 3 >= 0  # label fits outside box
+        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+        cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), font, lw / 3, txt_color,
+                    thickness=tf, lineType=cv2.LINE_AA)
+def generate_colors(i, bgr=False):
+    hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
+            '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
+    palette = []
+    for iter in hex:
+        h = '#' + iter
+        palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)))
+    num = len(palette)
+    color = palette[int(i) % num]
+    return (color[2], color[1], color[0]) if bgr else color
+def rescale(ori_shape, boxes, target_shape):
+    '''Rescale the output to the original image shape'''
+    ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
+    padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
+    boxes[:, [0, 2]] -= padding[0]
+    boxes[:, [1, 3]] -= padding[1]
+    boxes[:, :4] /= ratio
+    boxes[:, 0].clamp_(0, target_shape[1])  # x1
+    boxes[:, 1].clamp_(0, target_shape[0])  # y1
+    boxes[:, 2].clamp_(0, target_shape[1])  # x2
+    boxes[:, 3].clamp_(0, target_shape[0])  # y2
+    return boxes