Spaces:
Running
Running
| from sentence_transformers import ( | |
| SentenceTransformer, | |
| export_static_quantized_openvino_model, | |
| export_dynamic_quantized_onnx_model, | |
| ) | |
| MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v1_0_7_8" | |
| def export_model(backend="onnx", use_qint8=False): | |
| if backend == "openvino": | |
| model = SentenceTransformer(MODEL_NAME, backend="openvino") | |
| if use_qint8: | |
| export_static_quantized_openvino_model( | |
| model, | |
| quantization_config=None, | |
| model_name_or_path=MODEL_NAME, | |
| push_to_hub=True, | |
| create_pr=True, | |
| ) | |
| else: | |
| model.push_to_hub(MODEL_NAME, create_pr=True) | |
| elif backend == "onnx": | |
| model = SentenceTransformer(MODEL_NAME, backend="onnx") | |
| if use_qint8: | |
| export_dynamic_quantized_onnx_model( | |
| model, "avx512_vnni", MODEL_NAME, push_to_hub=True | |
| ) | |
| else: | |
| model.push_to_hub(MODEL_NAME, create_pr=True) | |
| else: | |
| raise ValueError(f"Invalid backend: {backend}") | |
| # Export all combinations | |
| BACKEND = "openvino" | |
| USE_QINT8 = False | |
| print(f"Exporting {BACKEND} model with QINT8={USE_QINT8}") | |
| export_model(backend=BACKEND, use_qint8=USE_QINT8) | |
| # import tensorrt as trt | |
| # print(trt.__version__) | |