Training in progress, epoch 2
Browse files
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2323030a1456390635c8b4e5f449b682a619123f01cc8b50e2d920ea2ce38ba
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b124836fe0c38fb22b359785b9088cd64d5baad3add9c2be98d92b57bd79306
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a76e13dea231215a4b7eb125f62d8452e93cac61f5fb9911095c09b57d1fae6d
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bcee2a295e28bf02818709eac3bf41464601066ce4d4194b0eba070beedee38
|
3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
@@ -89,3 +89,66 @@
|
|
89 |
{"current_steps": 89, "total_steps": 315, "loss": 0.3053, "lr": 8.59994133243427e-06, "epoch": 1.4126984126984126, "percentage": 28.25, "elapsed_time": "0:07:57", "remaining_time": "0:20:13"}
|
90 |
{"current_steps": 90, "total_steps": 315, "loss": 0.2567, "lr": 8.563284730770222e-06, "epoch": 1.4285714285714286, "percentage": 28.57, "elapsed_time": "0:08:02", "remaining_time": "0:20:06"}
|
91 |
{"current_steps": 91, "total_steps": 315, "loss": 0.2339, "lr": 8.52623475669285e-06, "epoch": 1.4444444444444444, "percentage": 28.89, "elapsed_time": "0:08:07", "remaining_time": "0:20:00"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
{"current_steps": 89, "total_steps": 315, "loss": 0.3053, "lr": 8.59994133243427e-06, "epoch": 1.4126984126984126, "percentage": 28.25, "elapsed_time": "0:07:57", "remaining_time": "0:20:13"}
|
90 |
{"current_steps": 90, "total_steps": 315, "loss": 0.2567, "lr": 8.563284730770222e-06, "epoch": 1.4285714285714286, "percentage": 28.57, "elapsed_time": "0:08:02", "remaining_time": "0:20:06"}
|
91 |
{"current_steps": 91, "total_steps": 315, "loss": 0.2339, "lr": 8.52623475669285e-06, "epoch": 1.4444444444444444, "percentage": 28.89, "elapsed_time": "0:08:07", "remaining_time": "0:20:00"}
|
92 |
+
{"current_steps": 92, "total_steps": 315, "loss": 0.2888, "lr": 8.488795500371427e-06, "epoch": 1.4603174603174602, "percentage": 29.21, "elapsed_time": "0:08:12", "remaining_time": "0:19:53"}
|
93 |
+
{"current_steps": 93, "total_steps": 315, "loss": 0.2689, "lr": 8.450971094950433e-06, "epoch": 1.4761904761904763, "percentage": 29.52, "elapsed_time": "0:08:16", "remaining_time": "0:19:45"}
|
94 |
+
{"current_steps": 94, "total_steps": 315, "loss": 0.3078, "lr": 8.412765716093273e-06, "epoch": 1.492063492063492, "percentage": 29.84, "elapsed_time": "0:08:21", "remaining_time": "0:19:39"}
|
95 |
+
{"current_steps": 95, "total_steps": 315, "loss": 0.2912, "lr": 8.374183581521288e-06, "epoch": 1.507936507936508, "percentage": 30.16, "elapsed_time": "0:08:27", "remaining_time": "0:19:35"}
|
96 |
+
{"current_steps": 96, "total_steps": 315, "loss": 0.3256, "lr": 8.335228950548164e-06, "epoch": 1.5238095238095237, "percentage": 30.48, "elapsed_time": "0:08:32", "remaining_time": "0:19:28"}
|
97 |
+
{"current_steps": 97, "total_steps": 315, "loss": 0.2535, "lr": 8.29590612360969e-06, "epoch": 1.5396825396825395, "percentage": 30.79, "elapsed_time": "0:08:36", "remaining_time": "0:19:20"}
|
98 |
+
{"current_steps": 98, "total_steps": 315, "loss": 0.278, "lr": 8.256219441789023e-06, "epoch": 1.5555555555555556, "percentage": 31.11, "elapsed_time": "0:08:41", "remaining_time": "0:19:13"}
|
99 |
+
{"current_steps": 99, "total_steps": 315, "loss": 0.2403, "lr": 8.216173286337449e-06, "epoch": 1.5714285714285714, "percentage": 31.43, "elapsed_time": "0:08:45", "remaining_time": "0:19:07"}
|
100 |
+
{"current_steps": 100, "total_steps": 315, "loss": 0.2594, "lr": 8.175772078190706e-06, "epoch": 1.5873015873015874, "percentage": 31.75, "elapsed_time": "0:08:50", "remaining_time": "0:19:00"}
|
101 |
+
{"current_steps": 101, "total_steps": 315, "loss": 0.2489, "lr": 8.135020277480933e-06, "epoch": 1.6031746031746033, "percentage": 32.06, "elapsed_time": "0:08:55", "remaining_time": "0:18:55"}
|
102 |
+
{"current_steps": 102, "total_steps": 315, "loss": 0.2904, "lr": 8.093922383044293e-06, "epoch": 1.619047619047619, "percentage": 32.38, "elapsed_time": "0:09:00", "remaining_time": "0:18:49"}
|
103 |
+
{"current_steps": 103, "total_steps": 315, "loss": 0.2269, "lr": 8.052482931924307e-06, "epoch": 1.6349206349206349, "percentage": 32.7, "elapsed_time": "0:09:05", "remaining_time": "0:18:43"}
|
104 |
+
{"current_steps": 104, "total_steps": 315, "loss": 0.2961, "lr": 8.010706498870997e-06, "epoch": 1.6507936507936507, "percentage": 33.02, "elapsed_time": "0:09:10", "remaining_time": "0:18:37"}
|
105 |
+
{"current_steps": 105, "total_steps": 315, "loss": 0.2793, "lr": 7.968597695835845e-06, "epoch": 1.6666666666666665, "percentage": 33.33, "elapsed_time": "0:09:15", "remaining_time": "0:18:31"}
|
106 |
+
{"current_steps": 106, "total_steps": 315, "loss": 0.2727, "lr": 7.926161171462647e-06, "epoch": 1.6825396825396826, "percentage": 33.65, "elapsed_time": "0:09:21", "remaining_time": "0:18:26"}
|
107 |
+
{"current_steps": 107, "total_steps": 315, "loss": 0.3002, "lr": 7.883401610574338e-06, "epoch": 1.6984126984126984, "percentage": 33.97, "elapsed_time": "0:09:25", "remaining_time": "0:18:19"}
|
108 |
+
{"current_steps": 108, "total_steps": 315, "loss": 0.267, "lr": 7.84032373365578e-06, "epoch": 1.7142857142857144, "percentage": 34.29, "elapsed_time": "0:09:30", "remaining_time": "0:18:13"}
|
109 |
+
{"current_steps": 109, "total_steps": 315, "loss": 0.2372, "lr": 7.796932296332666e-06, "epoch": 1.7301587301587302, "percentage": 34.6, "elapsed_time": "0:09:35", "remaining_time": "0:18:07"}
|
110 |
+
{"current_steps": 110, "total_steps": 315, "loss": 0.209, "lr": 7.753232088846505e-06, "epoch": 1.746031746031746, "percentage": 34.92, "elapsed_time": "0:09:40", "remaining_time": "0:18:01"}
|
111 |
+
{"current_steps": 111, "total_steps": 315, "loss": 0.3326, "lr": 7.709227935525796e-06, "epoch": 1.7619047619047619, "percentage": 35.24, "elapsed_time": "0:09:44", "remaining_time": "0:17:55"}
|
112 |
+
{"current_steps": 112, "total_steps": 315, "loss": 0.2302, "lr": 7.664924694253444e-06, "epoch": 1.7777777777777777, "percentage": 35.56, "elapsed_time": "0:09:49", "remaining_time": "0:17:48"}
|
113 |
+
{"current_steps": 113, "total_steps": 315, "loss": 0.2521, "lr": 7.620327255930475e-06, "epoch": 1.7936507936507935, "percentage": 35.87, "elapsed_time": "0:09:54", "remaining_time": "0:17:42"}
|
114 |
+
{"current_steps": 114, "total_steps": 315, "loss": 0.281, "lr": 7.575440543936092e-06, "epoch": 1.8095238095238095, "percentage": 36.19, "elapsed_time": "0:09:59", "remaining_time": "0:17:36"}
|
115 |
+
{"current_steps": 115, "total_steps": 315, "loss": 0.2883, "lr": 7.530269513584158e-06, "epoch": 1.8253968253968254, "percentage": 36.51, "elapsed_time": "0:10:03", "remaining_time": "0:17:30"}
|
116 |
+
{"current_steps": 116, "total_steps": 315, "loss": 0.2206, "lr": 7.484819151576148e-06, "epoch": 1.8412698412698414, "percentage": 36.83, "elapsed_time": "0:10:09", "remaining_time": "0:17:25"}
|
117 |
+
{"current_steps": 117, "total_steps": 315, "loss": 0.2747, "lr": 7.439094475450638e-06, "epoch": 1.8571428571428572, "percentage": 37.14, "elapsed_time": "0:10:13", "remaining_time": "0:17:18"}
|
118 |
+
{"current_steps": 118, "total_steps": 315, "loss": 0.2526, "lr": 7.393100533029383e-06, "epoch": 1.873015873015873, "percentage": 37.46, "elapsed_time": "0:10:18", "remaining_time": "0:17:12"}
|
119 |
+
{"current_steps": 119, "total_steps": 315, "loss": 0.2664, "lr": 7.346842401860069e-06, "epoch": 1.8888888888888888, "percentage": 37.78, "elapsed_time": "0:10:22", "remaining_time": "0:17:05"}
|
120 |
+
{"current_steps": 120, "total_steps": 315, "loss": 0.3116, "lr": 7.300325188655762e-06, "epoch": 1.9047619047619047, "percentage": 38.1, "elapsed_time": "0:10:27", "remaining_time": "0:16:59"}
|
121 |
+
{"current_steps": 121, "total_steps": 315, "loss": 0.2469, "lr": 7.253554028731149e-06, "epoch": 1.9206349206349205, "percentage": 38.41, "elapsed_time": "0:10:31", "remaining_time": "0:16:52"}
|
122 |
+
{"current_steps": 122, "total_steps": 315, "loss": 0.3119, "lr": 7.206534085435626e-06, "epoch": 1.9365079365079365, "percentage": 38.73, "elapsed_time": "0:10:36", "remaining_time": "0:16:46"}
|
123 |
+
{"current_steps": 123, "total_steps": 315, "loss": 0.3276, "lr": 7.159270549583278e-06, "epoch": 1.9523809523809523, "percentage": 39.05, "elapsed_time": "0:10:41", "remaining_time": "0:16:40"}
|
124 |
+
{"current_steps": 124, "total_steps": 315, "loss": 0.3258, "lr": 7.111768638879834e-06, "epoch": 1.9682539682539684, "percentage": 39.37, "elapsed_time": "0:10:45", "remaining_time": "0:16:34"}
|
125 |
+
{"current_steps": 125, "total_steps": 315, "loss": 0.2642, "lr": 7.064033597346658e-06, "epoch": 1.9841269841269842, "percentage": 39.68, "elapsed_time": "0:10:50", "remaining_time": "0:16:28"}
|
126 |
+
{"current_steps": 126, "total_steps": 315, "loss": 0.2064, "lr": 7.016070694741824e-06, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:10:54", "remaining_time": "0:16:22"}
|
127 |
+
{"current_steps": 127, "total_steps": 315, "loss": 0.1503, "lr": 6.967885225978366e-06, "epoch": 2.015873015873016, "percentage": 40.32, "elapsed_time": "0:12:01", "remaining_time": "0:17:48"}
|
128 |
+
{"current_steps": 128, "total_steps": 315, "loss": 0.1975, "lr": 6.919482510539723e-06, "epoch": 2.0317460317460316, "percentage": 40.63, "elapsed_time": "0:12:06", "remaining_time": "0:17:41"}
|
129 |
+
{"current_steps": 129, "total_steps": 315, "loss": 0.1729, "lr": 6.870867891892511e-06, "epoch": 2.0476190476190474, "percentage": 40.95, "elapsed_time": "0:12:11", "remaining_time": "0:17:34"}
|
130 |
+
{"current_steps": 130, "total_steps": 315, "loss": 0.1817, "lr": 6.822046736896607e-06, "epoch": 2.0634920634920633, "percentage": 41.27, "elapsed_time": "0:12:16", "remaining_time": "0:17:27"}
|
131 |
+
{"current_steps": 131, "total_steps": 315, "loss": 0.1369, "lr": 6.773024435212678e-06, "epoch": 2.0793650793650795, "percentage": 41.59, "elapsed_time": "0:12:21", "remaining_time": "0:17:21"}
|
132 |
+
{"current_steps": 132, "total_steps": 315, "loss": 0.1838, "lr": 6.723806398707186e-06, "epoch": 2.0952380952380953, "percentage": 41.9, "elapsed_time": "0:12:25", "remaining_time": "0:17:14"}
|
133 |
+
{"current_steps": 133, "total_steps": 315, "loss": 0.1644, "lr": 6.674398060854931e-06, "epoch": 2.111111111111111, "percentage": 42.22, "elapsed_time": "0:12:30", "remaining_time": "0:17:07"}
|
134 |
+
{"current_steps": 134, "total_steps": 315, "loss": 0.1492, "lr": 6.624804876139227e-06, "epoch": 2.126984126984127, "percentage": 42.54, "elapsed_time": "0:12:35", "remaining_time": "0:16:59"}
|
135 |
+
{"current_steps": 135, "total_steps": 315, "loss": 0.1795, "lr": 6.57503231944974e-06, "epoch": 2.142857142857143, "percentage": 42.86, "elapsed_time": "0:12:39", "remaining_time": "0:16:53"}
|
136 |
+
{"current_steps": 136, "total_steps": 315, "loss": 0.1627, "lr": 6.525085885478088e-06, "epoch": 2.1587301587301586, "percentage": 43.17, "elapsed_time": "0:12:45", "remaining_time": "0:16:47"}
|
137 |
+
{"current_steps": 137, "total_steps": 315, "loss": 0.1748, "lr": 6.4749710881112485e-06, "epoch": 2.1746031746031744, "percentage": 43.49, "elapsed_time": "0:12:50", "remaining_time": "0:16:40"}
|
138 |
+
{"current_steps": 138, "total_steps": 315, "loss": 0.1992, "lr": 6.424693459822843e-06, "epoch": 2.1904761904761907, "percentage": 43.81, "elapsed_time": "0:12:55", "remaining_time": "0:16:34"}
|
139 |
+
{"current_steps": 139, "total_steps": 315, "loss": 0.1592, "lr": 6.374258551062377e-06, "epoch": 2.2063492063492065, "percentage": 44.13, "elapsed_time": "0:12:59", "remaining_time": "0:16:26"}
|
140 |
+
{"current_steps": 140, "total_steps": 315, "loss": 0.1651, "lr": 6.3236719296424985e-06, "epoch": 2.2222222222222223, "percentage": 44.44, "elapsed_time": "0:13:03", "remaining_time": "0:16:19"}
|
141 |
+
{"current_steps": 141, "total_steps": 315, "loss": 0.1316, "lr": 6.272939180124316e-06, "epoch": 2.238095238095238, "percentage": 44.76, "elapsed_time": "0:13:08", "remaining_time": "0:16:13"}
|
142 |
+
{"current_steps": 142, "total_steps": 315, "loss": 0.1803, "lr": 6.222065903200909e-06, "epoch": 2.253968253968254, "percentage": 45.08, "elapsed_time": "0:13:13", "remaining_time": "0:16:06"}
|
143 |
+
{"current_steps": 143, "total_steps": 315, "loss": 0.1498, "lr": 6.171057715079012e-06, "epoch": 2.2698412698412698, "percentage": 45.4, "elapsed_time": "0:13:18", "remaining_time": "0:16:00"}
|
144 |
+
{"current_steps": 144, "total_steps": 315, "loss": 0.1352, "lr": 6.119920246859025e-06, "epoch": 2.2857142857142856, "percentage": 45.71, "elapsed_time": "0:13:23", "remaining_time": "0:15:53"}
|
145 |
+
{"current_steps": 145, "total_steps": 315, "loss": 0.1555, "lr": 6.068659143913349e-06, "epoch": 2.3015873015873014, "percentage": 46.03, "elapsed_time": "0:13:27", "remaining_time": "0:15:47"}
|
146 |
+
{"current_steps": 146, "total_steps": 315, "loss": 0.1515, "lr": 6.0172800652631706e-06, "epoch": 2.317460317460317, "percentage": 46.35, "elapsed_time": "0:13:32", "remaining_time": "0:15:40"}
|
147 |
+
{"current_steps": 147, "total_steps": 315, "loss": 0.1414, "lr": 5.965788682953717e-06, "epoch": 2.3333333333333335, "percentage": 46.67, "elapsed_time": "0:13:36", "remaining_time": "0:15:33"}
|
148 |
+
{"current_steps": 148, "total_steps": 315, "loss": 0.1582, "lr": 5.914190681428098e-06, "epoch": 2.3492063492063493, "percentage": 46.98, "elapsed_time": "0:13:41", "remaining_time": "0:15:26"}
|
149 |
+
{"current_steps": 149, "total_steps": 315, "loss": 0.1948, "lr": 5.862491756899753e-06, "epoch": 2.365079365079365, "percentage": 47.3, "elapsed_time": "0:13:45", "remaining_time": "0:15:20"}
|
150 |
+
{"current_steps": 150, "total_steps": 315, "loss": 0.1588, "lr": 5.8106976167236236e-06, "epoch": 2.380952380952381, "percentage": 47.62, "elapsed_time": "0:13:50", "remaining_time": "0:15:13"}
|
151 |
+
{"current_steps": 151, "total_steps": 315, "loss": 0.1354, "lr": 5.758813978766077e-06, "epoch": 2.3968253968253967, "percentage": 47.94, "elapsed_time": "0:13:55", "remaining_time": "0:15:07"}
|
152 |
+
{"current_steps": 152, "total_steps": 315, "loss": 0.1673, "lr": 5.706846570773677e-06, "epoch": 2.4126984126984126, "percentage": 48.25, "elapsed_time": "0:13:59", "remaining_time": "0:15:00"}
|
153 |
+
{"current_steps": 153, "total_steps": 315, "loss": 0.1474, "lr": 5.654801129740863e-06, "epoch": 2.4285714285714284, "percentage": 48.57, "elapsed_time": "0:14:04", "remaining_time": "0:14:54"}
|
154 |
+
{"current_steps": 154, "total_steps": 315, "loss": 0.2067, "lr": 5.6026834012766155e-06, "epoch": 2.4444444444444446, "percentage": 48.89, "elapsed_time": "0:14:08", "remaining_time": "0:14:47"}
|