Training in progress, epoch 1

Browse files

Files changed (7) hide show

config.json +49 -49
model.safetensors +2 -2
runs/Jan09_18-38-04_c50f821e5c9f/events.out.tfevents.1736447897.c50f821e5c9f.1900.0 +3 -0
runs/Jan09_18-41-30_c50f821e5c9f/events.out.tfevents.1736448096.c50f821e5c9f.1900.1 +3 -0
runs/Jan09_18-44-26_c50f821e5c9f/events.out.tfevents.1736448277.c50f821e5c9f.1900.2 +3 -0
runs/Jan09_18-55-21_c50f821e5c9f/events.out.tfevents.1736448922.c50f821e5c9f.1900.3 +3 -0
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
   "architectures": [
     "ModernBertForSequenceClassification"
   ],
@@ -19,10 +19,18 @@
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
-  "hidden_size": 1024,
   "id2label": {
     "0": "AI Applications",
     "1": "AI Infrastructure",
     "10": "Deep Learning",
     "11": "Embeddings",
     "12": "Ethics",
@@ -33,7 +41,6 @@
     "17": "Gradio",
     "18": "Hardware",
     "19": "Hugging Face",
-    "2": "Agents",
     "20": "Image",
     "21": "LLMs",
     "22": "ML Theory",
@@ -44,58 +51,51 @@
     "27": "Reinforcement Learning",
     "28": "Research Papers",
     "29": "Robotics",
-    "3": "Argilla",
     "30": "Security",
     "31": "Time Series",
     "32": "Tutorials",
     "33": "Video",
-    "34": "XetHub",
-    "4": "Audio",
-    "5": "Cloud Deployment",
-    "6": "Code",
-    "7": "Computer Vision",
-    "8": "Data Engineering",
-    "9": "Data Science"
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
-  "intermediate_size": 2624,
   "label2id": {
-    "AI Applications": "0",
-    "AI Infrastructure": "1",
-    "Agents": "2",
-    "Argilla": "3",
-    "Audio": "4",
-    "Cloud Deployment": "5",
-    "Code": "6",
-    "Computer Vision": "7",
-    "Data Engineering": "8",
-    "Data Science": "9",
-    "Deep Learning": "10",
-    "Embeddings": "11",
-    "Ethics": "12",
-    "Evaluation": "13",
-    "Fine-Tuning": "14",
-    "Gaming": "15",
-    "Generative AI": "16",
-    "Gradio": "17",
-    "Hardware": "18",
-    "Hugging Face": "19",
-    "Image": "20",
-    "LLMs": "21",
-    "ML Theory": "22",
-    "MLOps": "23",
-    "Model Optimization": "24",
-    "Multimodal": "25",
-    "NLP": "26",
-    "Reinforcement Learning": "27",
-    "Research Papers": "28",
-    "Robotics": "29",
-    "Security": "30",
-    "Time Series": "31",
-    "Tutorials": "32",
-    "Video": "33",
-    "XetHub": "34"
   },
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
@@ -106,8 +106,8 @@
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
-  "num_attention_heads": 16,
-  "num_hidden_layers": 28,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "problem_type": "multi_label_classification",

 {
+  "_name_or_path": "answerdotai/ModernBERT-base",
   "architectures": [
     "ModernBertForSequenceClassification"
   ],
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
+  "hidden_size": 768,
   "id2label": {
     "0": "AI Applications",
     "1": "AI Infrastructure",
+    "2": "Agents",
+    "3": "Argilla",
+    "4": "Audio",
+    "5": "Cloud Deployment",
+    "6": "Code",
+    "7": "Computer Vision",
+    "8": "Data Engineering",
+    "9": "Data Science",
     "10": "Deep Learning",
     "11": "Embeddings",
     "12": "Ethics",
     "17": "Gradio",
     "18": "Hardware",
     "19": "Hugging Face",
     "20": "Image",
     "21": "LLMs",
     "22": "ML Theory",
     "27": "Reinforcement Learning",
     "28": "Research Papers",
     "29": "Robotics",
     "30": "Security",
     "31": "Time Series",
     "32": "Tutorials",
     "33": "Video",
+    "34": "XetHub"
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
+  "intermediate_size": 1152,
   "label2id": {
+    "AI Applications": 0,
+    "AI Infrastructure": 1,
+    "Agents": 2,
+    "Argilla": 3,
+    "Audio": 4,
+    "Cloud Deployment": 5,
+    "Code": 6,
+    "Computer Vision": 7,
+    "Data Engineering": 8,
+    "Data Science": 9,
+    "Deep Learning": 10,
+    "Embeddings": 11,
+    "Ethics": 12,
+    "Evaluation": 13,
+    "Fine-Tuning": 14,
+    "Gaming": 15,
+    "Generative AI": 16,
+    "Gradio": 17,
+    "Hardware": 18,
+    "Hugging Face": 19,
+    "Image": 20,
+    "LLMs": 21,
+    "ML Theory": 22,
+    "MLOps": 23,
+    "Model Optimization": 24,
+    "Multimodal": 25,
+    "NLP": 26,
+    "Reinforcement Learning": 27,
+    "Research Papers": 28,
+    "Robotics": 29,
+    "Security": 30,
+    "Time Series": 31,
+    "Tutorials": 32,
+    "Video": 33,
+    "XetHub": 34
   },
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 22,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "problem_type": "multi_label_classification",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26c55b4ac1821aea273ef1971a067e5a38c04d77880ef7300b28c58c20f99aed
-size 1583486940

 version https://git-lfs.github.com/spec/v1
+oid sha256:d194be30d181b8cfb9202739e9f4b26972d406990975b65d04cc0e9f175d81bb
+size 598541300

runs/Jan09_18-38-04_c50f821e5c9f/events.out.tfevents.1736447897.c50f821e5c9f.1900.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:285768ad12d8643cfe2018722492fb63534d6be78eb262f57dc45f4708a3d1f2
+size 5846

runs/Jan09_18-41-30_c50f821e5c9f/events.out.tfevents.1736448096.c50f821e5c9f.1900.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b30db8cd2bde882cc94193d606f8ff49c5ae1687d630b0c4a2e4a55750341dd5
+size 5846

runs/Jan09_18-44-26_c50f821e5c9f/events.out.tfevents.1736448277.c50f821e5c9f.1900.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fb689cc88c838302afaa2ff2255d0eb7c2b20ef7eca85e6288fef7b56267a4c
+size 5844

runs/Jan09_18-55-21_c50f821e5c9f/events.out.tfevents.1736448922.c50f821e5c9f.1900.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:113d7bc31449babf7582255b2d0daf76da803cae562809cb3c150a8011dadfc1
+size 7820

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d5b4f5503943b9b4ab58e46f0f1707a99bb374600b44c587d28061e0a442891
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:25bd22ddc0517daf7682a75eb884fa14c040494f0abb64be5908371babad56b0
 size 5432