itdainb commited on May 2, 2024

Commit

ac08411

verified ·

1 Parent(s): a9588a7

Added optimized and quantized version

Browse files

Files changed (24) hide show

dynamic_q8/added_tokens.json +3 -0
dynamic_q8/bpe.codes +0 -0
dynamic_q8/config.json +29 -0
dynamic_q8/model_quantized.onnx +3 -0
dynamic_q8/ort_config.json +35 -0
dynamic_q8/special_tokens_map.json +51 -0
dynamic_q8/tokenizer_config.json +54 -0
dynamic_q8/vocab.txt +0 -0
optimized/added_tokens.json +3 -0
optimized/bpe.codes +0 -0
optimized/config.json +29 -0
optimized/model_optimized.onnx +3 -0
optimized/ort_config.json +39 -0
optimized/special_tokens_map.json +51 -0
optimized/tokenizer_config.json +54 -0
optimized/vocab.txt +0 -0
static_q8/added_tokens.json +3 -0
static_q8/bpe.codes +0 -0
static_q8/config.json +29 -0
static_q8/model_quantized.onnx +3 -0
static_q8/ort_config.json +47 -0
static_q8/special_tokens_map.json +51 -0
static_q8/tokenizer_config.json +54 -0
static_q8/vocab.txt +0 -0

dynamic_q8/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<mask>": 64000
+}

dynamic_q8/bpe.codes ADDED Viewed

The diff for this file is too large to render. See raw diff

dynamic_q8/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "onnx_v2",
+  "architectures": [
+    "RobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 258,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "PhobertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 64001
+}

dynamic_q8/model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee0ced372506cd045769c94c764efe4bba5a29494c3ccbc1a50699627b82bda7
+size 135223909

dynamic_q8/ort_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "one_external_file": true,
+  "opset": null,
+  "optimization": {},
+  "optimum_version": "1.19.1",
+  "quantization": {
+    "activations_dtype": "QUInt8",
+    "activations_symmetric": false,
+    "format": "QOperator",
+    "is_static": false,
+    "mode": "IntegerOps",
+    "nodes_to_exclude": [],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "Conv",
+      "MatMul",
+      "Attention",
+      "LSTM",
+      "Gather",
+      "Transpose",
+      "EmbedLayerNormalization"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": false,
+    "qdq_dedicated_pair": false,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
+  },
+  "transformers_version": "4.40.1",
+  "use_external_data_format": false
+}

dynamic_q8/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

dynamic_q8/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "64000": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "PhobertTokenizer",
+  "unk_token": "<unk>"
+}

dynamic_q8/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

optimized/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<mask>": 64000
+}

optimized/bpe.codes ADDED Viewed

The diff for this file is too large to render. See raw diff

optimized/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "vietnamese-embedding\\config.json",
+  "architectures": [
+    "RobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 258,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "PhobertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 64001
+}

optimized/model_optimized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7505b37bcde94d195fbedaa3197f0fb7a0407664d55c840aff1e24eccb1b76d
+size 537664772

optimized/ort_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "one_external_file": true,
+  "opset": null,
+  "optimization": {
+    "disable_attention": null,
+    "disable_attention_fusion": false,
+    "disable_bias_gelu": null,
+    "disable_bias_gelu_fusion": false,
+    "disable_bias_skip_layer_norm": null,
+    "disable_bias_skip_layer_norm_fusion": false,
+    "disable_embed_layer_norm": true,
+    "disable_embed_layer_norm_fusion": true,
+    "disable_gelu": null,
+    "disable_gelu_fusion": false,
+    "disable_group_norm_fusion": true,
+    "disable_layer_norm": null,
+    "disable_layer_norm_fusion": false,
+    "disable_packed_kv": true,
+    "disable_rotary_embeddings": false,
+    "disable_shape_inference": false,
+    "disable_skip_layer_norm": null,
+    "disable_skip_layer_norm_fusion": false,
+    "enable_gelu_approximation": true,
+    "enable_gemm_fast_gelu_fusion": false,
+    "enable_transformers_specific_optimizations": true,
+    "fp16": false,
+    "no_attention_mask": false,
+    "optimization_level": 2,
+    "optimize_for_gpu": true,
+    "optimize_with_onnxruntime_only": null,
+    "use_mask_index": false,
+    "use_multi_head_attention": false,
+    "use_raw_attention_mask": false
+  },
+  "optimum_version": "1.19.1",
+  "quantization": {},
+  "transformers_version": "4.40.1",
+  "use_external_data_format": false
+}

optimized/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

optimized/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "64000": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "PhobertTokenizer",
+  "unk_token": "<unk>"
+}

optimized/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

static_q8/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<mask>": 64000
+}

static_q8/bpe.codes ADDED Viewed

The diff for this file is too large to render. See raw diff

static_q8/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "onnx_v2",
+  "architectures": [
+    "RobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 258,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "PhobertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 64001
+}

static_q8/model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c72afa92c9bb7523d5937dd5e37e8040a60ab5aa5350c1bf32f754c7eb431405
+size 135308653

static_q8/ort_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "one_external_file": true,
+  "opset": null,
+  "optimization": {},
+  "optimum_version": "1.19.1",
+  "quantization": {
+    "activations_dtype": "QUInt8",
+    "activations_symmetric": false,
+    "format": "QDQ",
+    "is_static": true,
+    "mode": "QLinearOps",
+    "nodes_to_exclude": [],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "Conv",
+      "ConvTranspose",
+      "Gemm",
+      "Clip",
+      "Relu",
+      "Reshape",
+      "Transpose",
+      "Squeeze",
+      "Unsqueeze",
+      "Resize",
+      "MaxPool",
+      "AveragePool",
+      "MatMul",
+      "Split",
+      "Gather",
+      "Softmax",
+      "Where",
+      "InstanceNormalization",
+      "LayerNormalization"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": false,
+    "qdq_dedicated_pair": false,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
+  },
+  "transformers_version": "4.40.1",
+  "use_external_data_format": false
+}

static_q8/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

static_q8/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "64000": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "PhobertTokenizer",
+  "unk_token": "<unk>"
+}

static_q8/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff