Spaces:
Runtime error
Runtime error
FEAT: New model
Browse files- model/6a62f122a90e090b285f0344a1d79e753f2000bb/config.json +26 -0
- model/6a62f122a90e090b285f0344a1d79e753f2000bb/info.json +4 -0
- model/6a62f122a90e090b285f0344a1d79e753f2000bb/meta-info.json +1 -0
- model/6a62f122a90e090b285f0344a1d79e753f2000bb/pytorch_model.bin +3 -0
- model/6a62f122a90e090b285f0344a1d79e753f2000bb/special_tokens_map.json +1 -0
- model/6a62f122a90e090b285f0344a1d79e753f2000bb/tokenizer_config.json +1 -0
- model/6a62f122a90e090b285f0344a1d79e753f2000bb/training_args.bin +3 -0
- model/6a62f122a90e090b285f0344a1d79e753f2000bb/vocab.txt +0 -0
model/6a62f122a90e090b285f0344a1d79e753f2000bb/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-uncased",
|
3 |
+
"architectures": [
|
4 |
+
"OwnBertForNextSentencePrediction"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.17.0",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
model/6a62f122a90e090b285f0344a1d79e753f2000bb/info.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "BERT-NSP-v6",
|
3 |
+
"description": "Model trained on DailyDialogue and CommonDialogues. Using [unused1] token to divide sentences in context. Improved training arguments (warmup, smaller learning rate). Using frozen test set to better compare models and therefore trained longer time (about 60 epochs). The model also have bigger classification head (from one layer liner as classical). Added method for **Data Augmentation**. More info can be found at https://wandb.ai/alquist/next-sentence-prediction/runs/xvzhhw2r/overview?workspace=user-petr-lorenc"
|
4 |
+
}
|
model/6a62f122a90e090b285f0344a1d79e753f2000bb/meta-info.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"args": [], "kwargs": {"model_package": "models", "model_class": "OwnBertForNextSentencePrediction", "data_root": "/home/lorenpe2/project/data", "data_sources": [["COMMON_DIALOGUES", "common_dialogues/train.json", "common_dialogues/valid_frozen.json", "common_dialogues/test_frozen.json"], ["DAILY_DIALOGUES", "daily_dialogues/dialogues_text.train.txt", "daily_dialogues/dev_frozen.json", "daily_dialogues/test_frozen.json"]], "pretrained_model": "bert-base-uncased", "tokenizer": "bert-base-uncased", "approach": "IGNORE_DUPLICITIES", "data_augmentation": ["ADD_PARTLY_BROKEN_CONTEXT"], "special_token": "[unused1]", "learning_rate": 5e-07, "warmup_ratio": 0.1, "freeze_prefinetuning": true, "prefinenuting_epoch": 10, "finetuning_epochs": 75}, "tokenizer_args": {"padding": "max_length", "max_length_ctx": 256, "max_length_res": 40, "truncation": "only_first", "return_tensors": "np", "is_split_into_words": true, "approach": "IGNORE_DUPLICITIES", "special_token": "[unused1]"}}
|
model/6a62f122a90e090b285f0344a1d79e753f2000bb/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb065d01ae7e4e255d81ca3fdafadb21c42daea7cafdaaac3c21923f11827641
|
3 |
+
size 438871109
|
model/6a62f122a90e090b285f0344a1d79e753f2000bb/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["[unused1]"]}
|
model/6a62f122a90e090b285f0344a1d79e753f2000bb/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
|
model/6a62f122a90e090b285f0344a1d79e753f2000bb/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0032b55f2a68888f89b97e84498c69d4a7a65403f1a209c41da390a1762f76fc
|
3 |
+
size 3195
|
model/6a62f122a90e090b285f0344a1d79e753f2000bb/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|