fdaudens HF staff commited on
Commit
eafa9e4
·
verified ·
1 Parent(s): 938646e

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
3
  "architectures": [
4
  "ModernBertForSequenceClassification"
5
  ],
@@ -19,10 +19,18 @@
19
  "global_rope_theta": 160000.0,
20
  "gradient_checkpointing": false,
21
  "hidden_activation": "gelu",
22
- "hidden_size": 1024,
23
  "id2label": {
24
  "0": "AI Applications",
25
  "1": "AI Infrastructure",
 
 
 
 
 
 
 
 
26
  "10": "Deep Learning",
27
  "11": "Embeddings",
28
  "12": "Ethics",
@@ -33,7 +41,6 @@
33
  "17": "Gradio",
34
  "18": "Hardware",
35
  "19": "Hugging Face",
36
- "2": "Agents",
37
  "20": "Image",
38
  "21": "LLMs",
39
  "22": "ML Theory",
@@ -44,58 +51,51 @@
44
  "27": "Reinforcement Learning",
45
  "28": "Research Papers",
46
  "29": "Robotics",
47
- "3": "Argilla",
48
  "30": "Security",
49
  "31": "Time Series",
50
  "32": "Tutorials",
51
  "33": "Video",
52
- "34": "XetHub",
53
- "4": "Audio",
54
- "5": "Cloud Deployment",
55
- "6": "Code",
56
- "7": "Computer Vision",
57
- "8": "Data Engineering",
58
- "9": "Data Science"
59
  },
60
  "initializer_cutoff_factor": 2.0,
61
  "initializer_range": 0.02,
62
- "intermediate_size": 2624,
63
  "label2id": {
64
- "AI Applications": "0",
65
- "AI Infrastructure": "1",
66
- "Agents": "2",
67
- "Argilla": "3",
68
- "Audio": "4",
69
- "Cloud Deployment": "5",
70
- "Code": "6",
71
- "Computer Vision": "7",
72
- "Data Engineering": "8",
73
- "Data Science": "9",
74
- "Deep Learning": "10",
75
- "Embeddings": "11",
76
- "Ethics": "12",
77
- "Evaluation": "13",
78
- "Fine-Tuning": "14",
79
- "Gaming": "15",
80
- "Generative AI": "16",
81
- "Gradio": "17",
82
- "Hardware": "18",
83
- "Hugging Face": "19",
84
- "Image": "20",
85
- "LLMs": "21",
86
- "ML Theory": "22",
87
- "MLOps": "23",
88
- "Model Optimization": "24",
89
- "Multimodal": "25",
90
- "NLP": "26",
91
- "Reinforcement Learning": "27",
92
- "Research Papers": "28",
93
- "Robotics": "29",
94
- "Security": "30",
95
- "Time Series": "31",
96
- "Tutorials": "32",
97
- "Video": "33",
98
- "XetHub": "34"
99
  },
100
  "layer_norm_eps": 1e-05,
101
  "local_attention": 128,
@@ -106,8 +106,8 @@
106
  "model_type": "modernbert",
107
  "norm_bias": false,
108
  "norm_eps": 1e-05,
109
- "num_attention_heads": 16,
110
- "num_hidden_layers": 28,
111
  "pad_token_id": 50283,
112
  "position_embedding_type": "absolute",
113
  "problem_type": "multi_label_classification",
 
1
  {
2
+ "_name_or_path": "answerdotai/ModernBERT-base",
3
  "architectures": [
4
  "ModernBertForSequenceClassification"
5
  ],
 
19
  "global_rope_theta": 160000.0,
20
  "gradient_checkpointing": false,
21
  "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
  "id2label": {
24
  "0": "AI Applications",
25
  "1": "AI Infrastructure",
26
+ "2": "Agents",
27
+ "3": "Argilla",
28
+ "4": "Audio",
29
+ "5": "Cloud Deployment",
30
+ "6": "Code",
31
+ "7": "Computer Vision",
32
+ "8": "Data Engineering",
33
+ "9": "Data Science",
34
  "10": "Deep Learning",
35
  "11": "Embeddings",
36
  "12": "Ethics",
 
41
  "17": "Gradio",
42
  "18": "Hardware",
43
  "19": "Hugging Face",
 
44
  "20": "Image",
45
  "21": "LLMs",
46
  "22": "ML Theory",
 
51
  "27": "Reinforcement Learning",
52
  "28": "Research Papers",
53
  "29": "Robotics",
 
54
  "30": "Security",
55
  "31": "Time Series",
56
  "32": "Tutorials",
57
  "33": "Video",
58
+ "34": "XetHub"
 
 
 
 
 
 
59
  },
60
  "initializer_cutoff_factor": 2.0,
61
  "initializer_range": 0.02,
62
+ "intermediate_size": 1152,
63
  "label2id": {
64
+ "AI Applications": 0,
65
+ "AI Infrastructure": 1,
66
+ "Agents": 2,
67
+ "Argilla": 3,
68
+ "Audio": 4,
69
+ "Cloud Deployment": 5,
70
+ "Code": 6,
71
+ "Computer Vision": 7,
72
+ "Data Engineering": 8,
73
+ "Data Science": 9,
74
+ "Deep Learning": 10,
75
+ "Embeddings": 11,
76
+ "Ethics": 12,
77
+ "Evaluation": 13,
78
+ "Fine-Tuning": 14,
79
+ "Gaming": 15,
80
+ "Generative AI": 16,
81
+ "Gradio": 17,
82
+ "Hardware": 18,
83
+ "Hugging Face": 19,
84
+ "Image": 20,
85
+ "LLMs": 21,
86
+ "ML Theory": 22,
87
+ "MLOps": 23,
88
+ "Model Optimization": 24,
89
+ "Multimodal": 25,
90
+ "NLP": 26,
91
+ "Reinforcement Learning": 27,
92
+ "Research Papers": 28,
93
+ "Robotics": 29,
94
+ "Security": 30,
95
+ "Time Series": 31,
96
+ "Tutorials": 32,
97
+ "Video": 33,
98
+ "XetHub": 34
99
  },
100
  "layer_norm_eps": 1e-05,
101
  "local_attention": 128,
 
106
  "model_type": "modernbert",
107
  "norm_bias": false,
108
  "norm_eps": 1e-05,
109
+ "num_attention_heads": 12,
110
+ "num_hidden_layers": 22,
111
  "pad_token_id": 50283,
112
  "position_embedding_type": "absolute",
113
  "problem_type": "multi_label_classification",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26c55b4ac1821aea273ef1971a067e5a38c04d77880ef7300b28c58c20f99aed
3
- size 1583486940
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d194be30d181b8cfb9202739e9f4b26972d406990975b65d04cc0e9f175d81bb
3
+ size 598541300
runs/Jan09_18-38-04_c50f821e5c9f/events.out.tfevents.1736447897.c50f821e5c9f.1900.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285768ad12d8643cfe2018722492fb63534d6be78eb262f57dc45f4708a3d1f2
3
+ size 5846
runs/Jan09_18-41-30_c50f821e5c9f/events.out.tfevents.1736448096.c50f821e5c9f.1900.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30db8cd2bde882cc94193d606f8ff49c5ae1687d630b0c4a2e4a55750341dd5
3
+ size 5846
runs/Jan09_18-44-26_c50f821e5c9f/events.out.tfevents.1736448277.c50f821e5c9f.1900.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fb689cc88c838302afaa2ff2255d0eb7c2b20ef7eca85e6288fef7b56267a4c
3
+ size 5844
runs/Jan09_18-55-21_c50f821e5c9f/events.out.tfevents.1736448922.c50f821e5c9f.1900.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:113d7bc31449babf7582255b2d0daf76da803cae562809cb3c150a8011dadfc1
3
+ size 7820
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d5b4f5503943b9b4ab58e46f0f1707a99bb374600b44c587d28061e0a442891
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25bd22ddc0517daf7682a75eb884fa14c040494f0abb64be5908371babad56b0
3
  size 5432