Training in progress, epoch 1
Browse files- config.json +49 -49
- model.safetensors +2 -2
- runs/Jan09_18-38-04_c50f821e5c9f/events.out.tfevents.1736447897.c50f821e5c9f.1900.0 +3 -0
- runs/Jan09_18-41-30_c50f821e5c9f/events.out.tfevents.1736448096.c50f821e5c9f.1900.1 +3 -0
- runs/Jan09_18-44-26_c50f821e5c9f/events.out.tfevents.1736448277.c50f821e5c9f.1900.2 +3 -0
- runs/Jan09_18-55-21_c50f821e5c9f/events.out.tfevents.1736448922.c50f821e5c9f.1900.3 +3 -0
- training_args.bin +1 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"ModernBertForSequenceClassification"
|
5 |
],
|
@@ -19,10 +19,18 @@
|
|
19 |
"global_rope_theta": 160000.0,
|
20 |
"gradient_checkpointing": false,
|
21 |
"hidden_activation": "gelu",
|
22 |
-
"hidden_size":
|
23 |
"id2label": {
|
24 |
"0": "AI Applications",
|
25 |
"1": "AI Infrastructure",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
"10": "Deep Learning",
|
27 |
"11": "Embeddings",
|
28 |
"12": "Ethics",
|
@@ -33,7 +41,6 @@
|
|
33 |
"17": "Gradio",
|
34 |
"18": "Hardware",
|
35 |
"19": "Hugging Face",
|
36 |
-
"2": "Agents",
|
37 |
"20": "Image",
|
38 |
"21": "LLMs",
|
39 |
"22": "ML Theory",
|
@@ -44,58 +51,51 @@
|
|
44 |
"27": "Reinforcement Learning",
|
45 |
"28": "Research Papers",
|
46 |
"29": "Robotics",
|
47 |
-
"3": "Argilla",
|
48 |
"30": "Security",
|
49 |
"31": "Time Series",
|
50 |
"32": "Tutorials",
|
51 |
"33": "Video",
|
52 |
-
"34": "XetHub"
|
53 |
-
"4": "Audio",
|
54 |
-
"5": "Cloud Deployment",
|
55 |
-
"6": "Code",
|
56 |
-
"7": "Computer Vision",
|
57 |
-
"8": "Data Engineering",
|
58 |
-
"9": "Data Science"
|
59 |
},
|
60 |
"initializer_cutoff_factor": 2.0,
|
61 |
"initializer_range": 0.02,
|
62 |
-
"intermediate_size":
|
63 |
"label2id": {
|
64 |
-
"AI Applications":
|
65 |
-
"AI Infrastructure":
|
66 |
-
"Agents":
|
67 |
-
"Argilla":
|
68 |
-
"Audio":
|
69 |
-
"Cloud Deployment":
|
70 |
-
"Code":
|
71 |
-
"Computer Vision":
|
72 |
-
"Data Engineering":
|
73 |
-
"Data Science":
|
74 |
-
"Deep Learning":
|
75 |
-
"Embeddings":
|
76 |
-
"Ethics":
|
77 |
-
"Evaluation":
|
78 |
-
"Fine-Tuning":
|
79 |
-
"Gaming":
|
80 |
-
"Generative AI":
|
81 |
-
"Gradio":
|
82 |
-
"Hardware":
|
83 |
-
"Hugging Face":
|
84 |
-
"Image":
|
85 |
-
"LLMs":
|
86 |
-
"ML Theory":
|
87 |
-
"MLOps":
|
88 |
-
"Model Optimization":
|
89 |
-
"Multimodal":
|
90 |
-
"NLP":
|
91 |
-
"Reinforcement Learning":
|
92 |
-
"Research Papers":
|
93 |
-
"Robotics":
|
94 |
-
"Security":
|
95 |
-
"Time Series":
|
96 |
-
"Tutorials":
|
97 |
-
"Video":
|
98 |
-
"XetHub":
|
99 |
},
|
100 |
"layer_norm_eps": 1e-05,
|
101 |
"local_attention": 128,
|
@@ -106,8 +106,8 @@
|
|
106 |
"model_type": "modernbert",
|
107 |
"norm_bias": false,
|
108 |
"norm_eps": 1e-05,
|
109 |
-
"num_attention_heads":
|
110 |
-
"num_hidden_layers":
|
111 |
"pad_token_id": 50283,
|
112 |
"position_embedding_type": "absolute",
|
113 |
"problem_type": "multi_label_classification",
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "answerdotai/ModernBERT-base",
|
3 |
"architectures": [
|
4 |
"ModernBertForSequenceClassification"
|
5 |
],
|
|
|
19 |
"global_rope_theta": 160000.0,
|
20 |
"gradient_checkpointing": false,
|
21 |
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 768,
|
23 |
"id2label": {
|
24 |
"0": "AI Applications",
|
25 |
"1": "AI Infrastructure",
|
26 |
+
"2": "Agents",
|
27 |
+
"3": "Argilla",
|
28 |
+
"4": "Audio",
|
29 |
+
"5": "Cloud Deployment",
|
30 |
+
"6": "Code",
|
31 |
+
"7": "Computer Vision",
|
32 |
+
"8": "Data Engineering",
|
33 |
+
"9": "Data Science",
|
34 |
"10": "Deep Learning",
|
35 |
"11": "Embeddings",
|
36 |
"12": "Ethics",
|
|
|
41 |
"17": "Gradio",
|
42 |
"18": "Hardware",
|
43 |
"19": "Hugging Face",
|
|
|
44 |
"20": "Image",
|
45 |
"21": "LLMs",
|
46 |
"22": "ML Theory",
|
|
|
51 |
"27": "Reinforcement Learning",
|
52 |
"28": "Research Papers",
|
53 |
"29": "Robotics",
|
|
|
54 |
"30": "Security",
|
55 |
"31": "Time Series",
|
56 |
"32": "Tutorials",
|
57 |
"33": "Video",
|
58 |
+
"34": "XetHub"
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
},
|
60 |
"initializer_cutoff_factor": 2.0,
|
61 |
"initializer_range": 0.02,
|
62 |
+
"intermediate_size": 1152,
|
63 |
"label2id": {
|
64 |
+
"AI Applications": 0,
|
65 |
+
"AI Infrastructure": 1,
|
66 |
+
"Agents": 2,
|
67 |
+
"Argilla": 3,
|
68 |
+
"Audio": 4,
|
69 |
+
"Cloud Deployment": 5,
|
70 |
+
"Code": 6,
|
71 |
+
"Computer Vision": 7,
|
72 |
+
"Data Engineering": 8,
|
73 |
+
"Data Science": 9,
|
74 |
+
"Deep Learning": 10,
|
75 |
+
"Embeddings": 11,
|
76 |
+
"Ethics": 12,
|
77 |
+
"Evaluation": 13,
|
78 |
+
"Fine-Tuning": 14,
|
79 |
+
"Gaming": 15,
|
80 |
+
"Generative AI": 16,
|
81 |
+
"Gradio": 17,
|
82 |
+
"Hardware": 18,
|
83 |
+
"Hugging Face": 19,
|
84 |
+
"Image": 20,
|
85 |
+
"LLMs": 21,
|
86 |
+
"ML Theory": 22,
|
87 |
+
"MLOps": 23,
|
88 |
+
"Model Optimization": 24,
|
89 |
+
"Multimodal": 25,
|
90 |
+
"NLP": 26,
|
91 |
+
"Reinforcement Learning": 27,
|
92 |
+
"Research Papers": 28,
|
93 |
+
"Robotics": 29,
|
94 |
+
"Security": 30,
|
95 |
+
"Time Series": 31,
|
96 |
+
"Tutorials": 32,
|
97 |
+
"Video": 33,
|
98 |
+
"XetHub": 34
|
99 |
},
|
100 |
"layer_norm_eps": 1e-05,
|
101 |
"local_attention": 128,
|
|
|
106 |
"model_type": "modernbert",
|
107 |
"norm_bias": false,
|
108 |
"norm_eps": 1e-05,
|
109 |
+
"num_attention_heads": 12,
|
110 |
+
"num_hidden_layers": 22,
|
111 |
"pad_token_id": 50283,
|
112 |
"position_embedding_type": "absolute",
|
113 |
"problem_type": "multi_label_classification",
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d194be30d181b8cfb9202739e9f4b26972d406990975b65d04cc0e9f175d81bb
|
3 |
+
size 598541300
|
runs/Jan09_18-38-04_c50f821e5c9f/events.out.tfevents.1736447897.c50f821e5c9f.1900.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:285768ad12d8643cfe2018722492fb63534d6be78eb262f57dc45f4708a3d1f2
|
3 |
+
size 5846
|
runs/Jan09_18-41-30_c50f821e5c9f/events.out.tfevents.1736448096.c50f821e5c9f.1900.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b30db8cd2bde882cc94193d606f8ff49c5ae1687d630b0c4a2e4a55750341dd5
|
3 |
+
size 5846
|
runs/Jan09_18-44-26_c50f821e5c9f/events.out.tfevents.1736448277.c50f821e5c9f.1900.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fb689cc88c838302afaa2ff2255d0eb7c2b20ef7eca85e6288fef7b56267a4c
|
3 |
+
size 5844
|
runs/Jan09_18-55-21_c50f821e5c9f/events.out.tfevents.1736448922.c50f821e5c9f.1900.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:113d7bc31449babf7582255b2d0daf76da803cae562809cb3c150a8011dadfc1
|
3 |
+
size 7820
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25bd22ddc0517daf7682a75eb884fa14c040494f0abb64be5908371babad56b0
|
3 |
size 5432
|