Model save

Browse files

Files changed (11) hide show

README.md +8 -17
config.json +70 -27
merges.txt +0 -0
model.safetensors +2 -2
runs/Oct04_05-38-28_sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121/events.out.tfevents.1728020309.sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121 +3 -0
runs/Oct04_05-39-14_sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121/events.out.tfevents.1728020355.sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121 +3 -0
special_tokens_map.json +8 -44
tokenizer.json +0 -0
tokenizer_config.json +23 -24
training_args.bin +2 -2
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,25 +1,20 @@
 ---
 library_name: transformers
-license: apache-2.0
-base_model: knowledgator/comprehend_it-base
 tags:
 - generated_from_trainer
-metrics:
-- accuracy
 model-index:
-- name: comprehendo-lower
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# comprehendo-lower
-This model is a fine-tuned version of [knowledgator/comprehend_it-base](https://huggingface.co/knowledgator/comprehend_it-base) on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.0145
-- Accuracy: 0.9958
 ## Model description
@@ -38,21 +33,17 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 3e-05
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 3
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Accuracy |
-|:-------------:|:-----:|:-----:|:---------------:|:--------:|
-| 0.0454        | 1.0   | 12169 | 0.0336          | 0.9919   |
-| 0.0165        | 2.0   | 24338 | 0.0186          | 0.9955   |
-| 0.0074        | 3.0   | 36507 | 0.0145          | 0.9958   |
 ### Framework versions

 ---
 library_name: transformers
+license: mit
+base_model: roberta-base
 tags:
 - generated_from_trainer
 model-index:
+- name: winzo_feature_classifier_lowercase
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# winzo_feature_classifier_lowercase
+This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on the None dataset.
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 5e-05
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 5
 ### Training results
 ### Framework versions

config.json CHANGED Viewed

@@ -1,45 +1,88 @@
 {
-  "_name_or_path": "knowledgator/comprehend_it-base",
   "architectures": [
-    "DebertaV2ForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
-    "0": "contradiction",
-    "1": "entailment",
-    "2": "neutral"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
-    "contradiction": 0,
-    "entailment": 1,
-    "neutral": 2
   },
-  "layer_norm_eps": 1e-07,
-  "max_position_embeddings": 512,
-  "max_relative_positions": -1,
-  "model_type": "deberta-v2",
-  "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "pooler_dropout": 0,
-  "pooler_hidden_act": "gelu",
-  "pooler_hidden_size": 768,
-  "pos_att_type": [
-    "p2c",
-    "c2p"
-  ],
-  "position_biased_input": false,
-  "position_buckets": 256,
-  "relative_attention": true,
-  "share_att_key": true,
   "torch_dtype": "float32",
   "transformers_version": "4.45.1",
-  "type_vocab_size": 0,
-  "vocab_size": 128100
 }

 {
+  "_name_or_path": "roberta-base",
   "architectures": [
+    "RobertaForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16",
+    "17": "LABEL_17",
+    "18": "LABEL_18",
+    "19": "LABEL_19",
+    "20": "LABEL_20",
+    "21": "LABEL_21",
+    "22": "LABEL_22",
+    "23": "LABEL_23",
+    "24": "LABEL_24",
+    "25": "LABEL_25",
+    "26": "LABEL_26",
+    "27": "LABEL_27"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_16": 16,
+    "LABEL_17": 17,
+    "LABEL_18": 18,
+    "LABEL_19": 19,
+    "LABEL_2": 2,
+    "LABEL_20": 20,
+    "LABEL_21": 21,
+    "LABEL_22": 22,
+    "LABEL_23": 23,
+    "LABEL_24": 24,
+    "LABEL_25": 25,
+    "LABEL_26": 26,
+    "LABEL_27": 27,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
   },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.45.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
 }

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a5e758e47dfbea6d19d00337076f96d80ce3ad0998a999bb02a53b5f9924bf7
-size 737722356

 version https://git-lfs.github.com/spec/v1
+oid sha256:3375f4737db47470175b2fbedbff9716c3e4ef59a54ac9a39ea605b4d55c58fc
+size 498692800

runs/Oct04_05-38-28_sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121/events.out.tfevents.1728020309.sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7f53f9b20c56d5d56b7c341fe1979ce321d503da17bf7811bfac0e87ef1380a
+size 6229

runs/Oct04_05-39-14_sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121/events.out.tfevents.1728020355.sagemaker-data-scie-ml-g4dn-xlarge-ba9c1f5c18780e053e03a6778121 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b150e53eca67ee250b2686f27ae07c5146c9277001a9346f95c805c78adfa533
+size 32095

special_tokens_map.json CHANGED Viewed

@@ -1,51 +1,15 @@
 {
-  "bos_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
   "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,58 +1,57 @@
 {
   "added_tokens_decoder": {
     "0": {
-      "content": "[PAD]",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "1": {
-      "content": "[CLS]",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "2": {
-      "content": "[SEP]",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "3": {
-      "content": "[UNK]",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "128000": {
-      "content": "[MASK]",
-      "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
-  "bos_token": "[CLS]",
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_lower_case": false,
-  "eos_token": "[SEP]",
-  "mask_token": "[MASK]",
   "model_max_length": 512,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "sp_model_kwargs": {},
-  "split_by_punct": false,
-  "tokenizer_class": "DebertaV2Tokenizer",
-  "unk_token": "[UNK]",
-  "vocab_type": "spm"
 }

 {
+  "add_prefix_space": false,
   "added_tokens_decoder": {
     "0": {
+      "content": "<s>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "1": {
+      "content": "<pad>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "2": {
+      "content": "</s>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "3": {
+      "content": "<unk>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
   "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:273af5fbb55b39ced9509acf52aca544c3d32c8129b3047a6b2072525f192119
-size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:358a110cb11bd65700f2d3a4573cb985a1343b8e0b68d848e631301ea2a6e0ef
+size 5368

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff