grg commited on
Commit
2a3cc01
·
1 Parent(s): 74922c5

Evaluation with CoT

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. static/leaderboard.csv +18 -27
  2. static/models_data/Dracarys2-72B-Instruct/cfa_metrics.csv +10 -0
  3. static/models_data/Dracarys2-72B-Instruct/matrix.svg +1963 -0
  4. static/models_data/Dracarys2-72B-Instruct/ranks.svg +0 -0
  5. static/models_data/Dracarys2-72B-Instruct/structure.svg +0 -0
  6. static/models_data/Falcon3-10B-Instruct/cfa_metrics.csv +0 -10
  7. static/models_data/Llama-3.1-70B-Instruct/cfa_metrics.csv +10 -0
  8. static/models_data/Llama-3.1-70B-Instruct/matrix.svg +1945 -0
  9. static/models_data/Llama-3.1-70B-Instruct/ranks.svg +0 -0
  10. static/models_data/Llama-3.1-70B-Instruct/structure.svg +0 -0
  11. static/models_data/Llama-3.1-8B-Instruct/cfa_metrics.csv +10 -0
  12. static/models_data/Llama-3.1-8B-Instruct/matrix.svg +1942 -0
  13. static/models_data/Llama-3.1-8B-Instruct/ranks.svg +0 -0
  14. static/models_data/Llama-3.1-8B-Instruct/structure.svg +0 -0
  15. static/models_data/Llama-3.1-Nemotron-70B-Instruct/cfa_metrics.csv +10 -0
  16. static/models_data/Llama-3.1-Nemotron-70B-Instruct/matrix.svg +1957 -0
  17. static/models_data/Llama-3.1-Nemotron-70B-Instruct/ranks.svg +0 -0
  18. static/models_data/Llama-3.1-Nemotron-70B-Instruct/structure.svg +0 -0
  19. static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/cfa_metrics.csv +10 -0
  20. static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/matrix.svg +1990 -0
  21. static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/ranks.svg +0 -0
  22. static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/structure.svg +0 -0
  23. static/models_data/Llama-3.2-1B-Instruct/cfa_metrics.csv +10 -0
  24. static/models_data/Llama-3.2-1B-Instruct/matrix.svg +1934 -0
  25. static/models_data/Llama-3.2-1B-Instruct/ranks.svg +0 -0
  26. static/models_data/Llama-3.2-1B-Instruct/structure.svg +0 -0
  27. static/models_data/Llama-3.2-3B-Instruct/cfa_metrics.csv +10 -0
  28. static/models_data/Llama-3.2-3B-Instruct/matrix.svg +1944 -0
  29. static/models_data/Llama-3.2-3B-Instruct/ranks.svg +0 -0
  30. static/models_data/Llama-3.2-3B-Instruct/structure.svg +0 -0
  31. static/models_data/Llama-3.3-70B-Instruct/cfa_metrics.csv +10 -0
  32. static/models_data/Llama-3.3-70B-Instruct/matrix.svg +1920 -0
  33. static/models_data/Llama-3.3-70B-Instruct/ranks.svg +0 -0
  34. static/models_data/Llama-3.3-70B-Instruct/structure.svg +0 -0
  35. static/models_data/Mistral-Large-Instruct-2407/cfa_metrics.csv +9 -9
  36. static/models_data/Mistral-Large-Instruct-2407/matrix.svg +223 -219
  37. static/models_data/Mistral-Large-Instruct-2407/ranks.svg +0 -0
  38. static/models_data/Mistral-Large-Instruct-2407/structure.svg +0 -0
  39. static/models_data/Mistral-Large-Instruct-2411/cfa_metrics.csv +10 -0
  40. static/models_data/Mistral-Large-Instruct-2411/matrix.svg +1949 -0
  41. static/models_data/Mistral-Large-Instruct-2411/ranks.svg +0 -0
  42. static/models_data/Mistral-Large-Instruct-2411/structure.svg +0 -0
  43. static/models_data/Mistral-Nemo-Instruct-2407/cfa_metrics.csv +9 -9
  44. static/models_data/Mistral-Nemo-Instruct-2407/matrix.svg +344 -348
  45. static/models_data/Mistral-Nemo-Instruct-2407/ranks.svg +0 -0
  46. static/models_data/Mistral-Nemo-Instruct-2407/structure.svg +0 -0
  47. static/models_data/Mistral-Small-3.1-24B-Instruct-2503/cfa_metrics.csv +10 -0
  48. static/models_data/Mistral-Small-3.1-24B-Instruct-2503/matrix.svg +2001 -0
  49. static/models_data/Mistral-Small-3.1-24B-Instruct-2503/ranks.svg +0 -0
  50. static/models_data/Mistral-Small-3.1-24B-Instruct-2503/structure.svg +0 -0
static/leaderboard.csv CHANGED
@@ -1,28 +1,19 @@
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
- reka-flash-3,0.45940170940170943,0.4899723723251134,0.2885614355228928,0.21908343015733012,0.6356444444444445,0.32448055555555566,0.3265472222222222
3
- DeepSeek-V3-0324,0.30982905982905984,0.3893755207780696,0.3004930917799752,0.23355431200645516,0.3651611111111111,0.5979083333333333,0.6206666666666667
4
- DeepSeek-V3-0324_user,0.44070512820512825,0.49753828417140256,0.3776747268598861,0.22438707851276918,0.5427472222222222,0.4287916666666667,0.4199611111111111
5
- gemma-3-27b-it,0.3894230769230769,0.5040524142149732,0.4494497801681616,0.22345480695286235,0.4511777777777778,0.4855138888888889,0.5075888888888889
6
- Llama-3.3-70B-Instruct,0.7083333333333334,0.6737308210529425,0.689130950089758,0.17019112082437948,0.5869277777777778,0.38021666666666665,0.4031972222222221
7
- Llama-3.1-70B-Instruct,0.7494658119658121,0.7109651955039333,0.6723010699062582,0.1770688267046745,0.7082666666666667,0.2531305555555555,0.27954999999999997
8
- Llama-3.1-Nemotron-70B-Instruct,0.7291666666666665,0.7021372470167542,0.6749074485556226,0.16936515142178976,0.6728861111111111,0.2835055555555557,0.3025472222222223
9
- Llama-3.1-8B-Instruct,0.5080128205128206,0.5681123119595507,0.46930842769163483,0.21202410397902305,0.5946111111111112,0.35378055555555554,0.3611416666666667
10
- Llama-3.2-3B-Instruct,0.219017094017094,0.30725051735750397,0.06779772942677353,0.2865006677359514,0.48239166666666666,0.5190555555555556,0.49002222222222214
11
- Llama-3.2-1B-Instruct,0.13354700854700854,0.23848817421801657,0.006872157585997037,0.29254990326652297,0.33736111111111117,0.5983222222222222,0.5660722222222222
12
- Mistral-Large-Instruct-2411,0.5657051282051282,0.6032900501455456,0.5261735061045867,0.2009208454762036,0.5878305555555555,0.32947777777777776,0.33580555555555547
13
- Mistral-Large-Instruct-2407,0.7964743589743589,0.7374229691535793,0.7644582301049158,0.16944638941325085,0.6510750000000001,0.31028611111111104,0.3297916666666667
14
- Mistral-Nemo-Instruct-2407,0.31303418803418803,0.4060740463440762,0.21398622636798964,0.25768864583046025,0.5191694444444444,0.4492111111111111,0.41962222222222223
15
- Mistral-Small-3.1-24B-Instruct-2503,0.6436965811965812,0.6471972798701351,0.5693346095644334,0.18619908818554043,0.6869249999999999,0.29532500000000006,0.305161111111111
16
- QwQ-32B,0.8263888888888888,0.7719313363889678,0.8091295835194909,0.1769481985217758,0.6934333333333333,0.27074999999999994,0.30680277777777776
17
- Qwen2.5-VL-72B-Instruct,0.8482905982905984,0.7728617287376658,0.7921085092426651,0.15380409595822364,0.7021583333333334,0.24572499999999997,0.2881694444444445
18
- Qwen2.5-VL-7B-Instruct,0.3365384615384615,0.4265332391154229,0.22155167860085653,0.25685191259115403,0.5591361111111112,0.39983055555555547,0.37639444444444436
19
- Qwen2.5-VL-3B-Instruct,0.12286324786324787,0.22915813420909414,0.021149465964301183,0.2913022346288958,0.3104166666666667,0.6484194444444444,0.6220277777777778
20
- Qwen2.5-72B-Instruct,0.7110042735042735,0.6892855084702477,0.6634524803081552,0.17931196458174936,0.6639638888888889,0.30247222222222225,0.3217055555555556
21
- Qwen2.5-32B-Instruct,0.5229700854700855,0.5797225465553821,0.6034765993068393,0.20283713589541164,0.46560833333333324,0.5226250000000001,0.5162722222222222
22
- Qwen2.5-14B-Instruct-1M,0.3782051282051282,0.46648036370033313,0.36401726539161106,0.2400178186304459,0.4806611111111111,0.4841472222222223,0.48072222222222216
23
- phi-4,0.20192307692307696,0.29450852038492736,0.06465994553954729,0.27174661907877007,0.41245277777777783,0.5301916666666666,0.5130861111111111
24
- Dracarys2-72B-Instruct,0.7216880341880343,0.6991833993340636,0.6630452722509824,0.18543611655364134,0.6917972222222222,0.27243055555555573,0.2926444444444445
25
- Nautilus-70B-v0.1,0.716880341880342,0.7074335111695115,0.6326443266767067,0.1814758840174009,0.7506972222222221,0.20927777777777778,0.23105277777777777
26
- Cydonia-22B-v1.2,0.6052350427350427,0.6469610565596798,0.5951243715693844,0.20282847824454076,0.6382611111111111,0.3130722222222222,0.3271694444444445
27
- Ministrations-8B-v1,0.3862179487179487,0.4584643004722592,0.32151643372620586,0.2588041089045269,0.5249611111111112,0.4474500000000001,0.4370583333333333
28
- dummy,0.13835470085470086,0.2291015386716794,-0.009004148398032956,0.2928877637010999,0.3755222222222222,0.622275,0.5915305555555557
 
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
+ reka-flash-3,0.3676470588235294,0.4899723723251134,0.2885614355228928,0.21908343015733012,0.6356444444444445,0.32448055555555566,0.3265472222222222
3
+ Llama-3.3-70B-Instruct,0.7720588235294118,0.7826258982618856,0.8101440811870466,0.15086913865310292,0.7070694444444444,0.2484805555555556,0.2872888888888889
4
+ Llama-3.1-70B-Instruct,0.6732026143790849,0.7716257197180342,0.7627363693137972,0.1590258306220288,0.7405305555555556,0.2069805555555555,0.2524638888888888
5
+ Llama-3.1-Nemotron-70B-Instruct,0.7663398692810457,0.806561245024004,0.7986269216633632,0.15639272646142155,0.7771694444444445,0.1574805555555555,0.2053138888888889
6
+ Llama-3.1-8B-Instruct,0.4566993464052287,0.6200396929619997,0.5544135256446929,0.177589336660551,0.6203138888888889,0.3474166666666667,0.3526444444444444
7
+ Llama-3.2-3B-Instruct,0.35947712418300654,0.4920203960749596,0.3095420768898777,0.23477458340427637,0.6402583333333334,0.3671361111111112,0.34035277777777784
8
+ Llama-3.2-1B-Instruct,0.14950980392156865,0.28494630130657267,0.017845599375419068,0.29245365371576204,0.4570666666666667,0.49829999999999997,0.458125
9
+ Mistral-Large-Instruct-2411,0.5857843137254901,0.7334649247049873,0.7061158170869489,0.17181887070789792,0.7237666666666667,0.24669166666666675,0.262
10
+ Mistral-Large-Instruct-2407,0.6846405228758169,0.7865403071348558,0.7786865940656633,0.18242669696158476,0.7757583333333334,0.19862499999999994,0.21713055555555572
11
+ Mistral-Nemo-Instruct-2407,0.34068627450980393,0.5243749748919985,0.409719354830282,0.21289984240736382,0.565475,0.40046944444444443,0.39598333333333324
12
+ Mistral-Small-3.1-24B-Instruct-2503,0.5326797385620915,0.7026337346865648,0.6848141294613206,0.17721164035276304,0.6713749999999999,0.29685555555555565,0.3154944444444444
13
+ QwQ-32B,0.7230392156862745,0.7719313363889678,0.8091295835194909,0.1769481985217758,0.6934333333333333,0.27074999999999994,0.30680277777777776
14
+ Qwen2.5-VL-72B-Instruct,0.8619281045751634,0.8360251140700989,0.8125460511443046,0.15932218090531514,0.8454611111111111,0.10019722222222227,0.14792499999999997
15
+ Qwen2.5-VL-7B-Instruct,0.2450980392156863,0.42422904460473104,0.28582477835186304,0.25948620101404807,0.45279444444444444,0.4787000000000001,0.464075
16
+ Qwen2.5-VL-3B-Instruct,0.1111111111111111,0.2655381207197522,0.059657867275330144,0.2877042811210807,0.3650055555555556,0.6013444444444445,0.5902833333333335
17
+ Qwen2.5-14B-Instruct-1M,0.5359477124183007,0.7022023055217502,0.6551137143166985,0.1785253019816836,0.7130944444444444,0.26870277777777785,0.26870277777777773
18
+ Dracarys2-72B-Instruct,0.7295751633986929,0.7740958932029343,0.789501612210195,0.15836985877285903,0.7307833333333333,0.2581972222222222,0.27945555555555546
19
+ dummy,0.09967320261437908,0.2291015386716794,-0.009004148398032956,0.2928877637010999,0.3755222222222222,0.622275,0.5915305555555557
 
 
 
 
 
 
 
 
 
static/models_data/Dracarys2-72B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.460975,0.441825,0.542575,0.555875
3
+ chunk_1,0.714475,0.70085,0.299775,0.314375
4
+ chunk_2,0.7062250000000001,0.68915,0.3023,0.318525
5
+ chunk_3,0.715225,0.7014,0.29862500000000003,0.31500000000000006
6
+ chunk_4,0.70265,0.6831499999999999,0.29910000000000003,0.333625
7
+ chunk_chess_0,0.8809,0.8429,0.088925,0.129125
8
+ chunk_grammar_1,0.892675,0.8629249999999999,0.091625,0.08435000000000001
9
+ chunk_no_conv,0.70625,0.6895,0.30005,0.32235
10
+ chunk_svs_no_conv,0.797675,0.7385,0.1008,0.141875
static/models_data/Dracarys2-72B-Instruct/matrix.svg ADDED
static/models_data/Dracarys2-72B-Instruct/ranks.svg ADDED
static/models_data/Dracarys2-72B-Instruct/structure.svg ADDED
static/models_data/Falcon3-10B-Instruct/cfa_metrics.csv DELETED
@@ -1,10 +0,0 @@
1
- Context chunk,CFI,TLI,SRMR,RMSEA
2
- chunk_0,0.5,0.6169250000000001,0.544575,0.5
3
- chunk_1,0.1534,0.128475,0.783325,0.7724500000000001
4
- chunk_2,0.25,0.311025,0.7661,0.75
5
- chunk_3,0.872575,0.9396749999999999,0.10157500000000001,0.031775
6
- chunk_4,0.25,-2.110625,0.77445,0.75
7
- chunk_chess_0,0.55545,0.480525,0.333425,0.32897499999999996
8
- chunk_grammar_1,0.415225,0.39135,0.551875,0.5199750000000001
9
- chunk_no_conv,0.673625,0.63965,0.31382499999999997,0.33115
10
- chunk_svs_no_conv,0.0,0.0,1.0,1.0
 
 
 
 
 
 
 
 
 
 
 
static/models_data/Llama-3.1-70B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.659275,0.61805,0.316875,0.35955000000000004
3
+ chunk_1,0.8779,0.84055,0.09045,0.11662499999999999
4
+ chunk_2,0.610225,0.5619,0.33725,0.36717500000000003
5
+ chunk_3,0.818225,0.75735,0.102325,0.165825
6
+ chunk_4,0.8365499999999999,0.77925,0.13745000000000002,0.16645
7
+ chunk_chess_0,0.78105,0.7010750000000001,0.11615,0.19895
8
+ chunk_grammar_1,0.638325,0.597175,0.32765,0.362925
9
+ chunk_no_conv,0.8015,0.7305499999999999,0.11454999999999999,0.188975
10
+ chunk_svs_no_conv,0.641725,0.6108750000000001,0.320125,0.3457
static/models_data/Llama-3.1-70B-Instruct/matrix.svg ADDED
static/models_data/Llama-3.1-70B-Instruct/ranks.svg ADDED
static/models_data/Llama-3.1-70B-Instruct/structure.svg ADDED
static/models_data/Llama-3.1-8B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.46950000000000003,0.457525,0.536825,0.541325
3
+ chunk_1,0.834625,0.801925,0.093175,0.10890000000000001
4
+ chunk_2,0.6906,0.6863250000000001,0.31127499999999997,0.3049
5
+ chunk_3,0.64705,0.604875,0.3275,0.327375
6
+ chunk_4,0.478575,0.471375,0.5346500000000001,0.529275
7
+ chunk_chess_0,0.8464499999999999,0.7905249999999999,0.09815,0.123975
8
+ chunk_grammar_1,0.211,0.187,0.775575,0.7922
9
+ chunk_no_conv,0.87235,0.827925,0.09872500000000001,0.117875
10
+ chunk_svs_no_conv,0.532675,0.482175,0.350875,0.32797499999999996
static/models_data/Llama-3.1-8B-Instruct/matrix.svg ADDED
static/models_data/Llama-3.1-8B-Instruct/ranks.svg ADDED
static/models_data/Llama-3.1-8B-Instruct/structure.svg ADDED
static/models_data/Llama-3.1-Nemotron-70B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.870175,0.82485,0.09887500000000002,0.129325
3
+ chunk_1,0.8354,0.7823749999999999,0.10077499999999999,0.133375
4
+ chunk_2,0.63475,0.5933,0.33215,0.35714999999999997
5
+ chunk_3,0.8190000000000001,0.755875,0.11109999999999999,0.157775
6
+ chunk_4,0.843875,0.784075,0.109125,0.163825
7
+ chunk_chess_0,0.814025,0.750975,0.117375,0.17820000000000003
8
+ chunk_grammar_1,0.8116999999999999,0.74505,0.103225,0.179075
9
+ chunk_no_conv,0.5703750000000001,0.5028,0.3414,0.40054999999999996
10
+ chunk_svs_no_conv,0.7952250000000001,0.74225,0.1033,0.14855000000000002
static/models_data/Llama-3.1-Nemotron-70B-Instruct/matrix.svg ADDED
static/models_data/Llama-3.1-Nemotron-70B-Instruct/ranks.svg ADDED
static/models_data/Llama-3.1-Nemotron-70B-Instruct/structure.svg ADDED
static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.477175,0.009975000000000012,0.5413,0.5053000000000001
3
+ chunk_1,0.09135,0.040025,0.78305,0.7784
4
+ chunk_2,0.1812,0.15895,0.77625,0.763925
5
+ chunk_3,0.5,0.19799999999999998,0.54235,0.5
6
+ chunk_4,0.25,-0.07114999999999999,0.550325,0.5077499999999999
7
+ chunk_chess_0,0.0,0.54195,0.774675,0.7621
8
+ chunk_grammar_1,0.25,-0.010425,0.772375,0.75
9
+ chunk_no_conv,0.11025,0.06505,0.77855,0.771725
10
+ chunk_svs_no_conv,0.2604,0.19649999999999998,0.55775,0.538625
static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/matrix.svg ADDED
static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/ranks.svg ADDED
static/models_data/Llama-3.1-Nemotron-Nano-8B-v1/structure.svg ADDED
static/models_data/Llama-3.2-1B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.7139,2.865475,0.3244,0.261475
3
+ chunk_1,0.75,0.7970249999999999,0.319,0.25
4
+ chunk_2,0.543725,0.599475,0.32595,0.28645
5
+ chunk_3,0.25,0.43175,0.770575,0.75
6
+ chunk_4,0.5526,1.1410750000000003,0.3246,0.2695
7
+ chunk_chess_0,0.381375,-1.134175,0.5457,0.5083
8
+ chunk_grammar_1,0.25,-3.727875,0.765125,0.75
9
+ chunk_no_conv,0.25,0.459775,0.77015,0.75
10
+ chunk_svs_no_conv,0.42200000000000004,0.334725,0.3392,0.2974
static/models_data/Llama-3.2-1B-Instruct/matrix.svg ADDED
static/models_data/Llama-3.2-1B-Instruct/ranks.svg ADDED
static/models_data/Llama-3.2-1B-Instruct/structure.svg ADDED
static/models_data/Llama-3.2-3B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.7052499999999999,0.8022499999999999,0.31672500000000003,0.279375
3
+ chunk_1,0.99315,1.40095,0.075375,0.005125
4
+ chunk_2,0.85895,0.885975,0.084375,0.054075
5
+ chunk_3,0.25,0.337375,0.762,0.75
6
+ chunk_4,0.7219249999999999,0.7086250000000001,0.317575,0.2792
7
+ chunk_chess_0,0.490925,0.508575,0.5343,0.51295
8
+ chunk_grammar_1,0.68695,0.7239,0.30717500000000003,0.280125
9
+ chunk_no_conv,0.6418250000000001,0.58755,0.337525,0.33997499999999997
10
+ chunk_svs_no_conv,0.41335,0.388175,0.569175,0.56235
static/models_data/Llama-3.2-3B-Instruct/matrix.svg ADDED
static/models_data/Llama-3.2-3B-Instruct/ranks.svg ADDED
static/models_data/Llama-3.2-3B-Instruct/structure.svg ADDED
static/models_data/Llama-3.3-70B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.8892,0.8514,0.088575,0.11497500000000001
3
+ chunk_1,0.885375,0.8481,0.0946,0.1254
4
+ chunk_2,0.83465,0.77855,0.10145,0.1533
5
+ chunk_3,0.8037,0.748975,0.108825,0.159825
6
+ chunk_4,0.422975,0.402,0.5508,0.557975
7
+ chunk_chess_0,0.850175,0.799675,0.10902499999999998,0.1548
8
+ chunk_grammar_1,0.5704250000000001,0.5144500000000001,0.335675,0.39255
9
+ chunk_no_conv,0.42762500000000003,0.38805,0.5442,0.598175
10
+ chunk_svs_no_conv,0.6795,0.6697249999999999,0.303175,0.3286
static/models_data/Llama-3.3-70B-Instruct/matrix.svg ADDED
static/models_data/Llama-3.3-70B-Instruct/ranks.svg ADDED
static/models_data/Llama-3.3-70B-Instruct/structure.svg ADDED
static/models_data/Mistral-Large-Instruct-2407/cfa_metrics.csv CHANGED
@@ -1,10 +1,10 @@
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
- chunk_0,0.6065,0.5587500000000001,0.34195000000000003,0.3913
3
- chunk_1,0.452225,0.435025,0.5444,0.5771999999999999
4
- chunk_2,0.604625,0.5611,0.34990000000000004,0.341325
5
- chunk_3,0.438675,0.42252500000000004,0.5419,0.535475
6
- chunk_4,0.6441250000000001,0.6033499999999999,0.35117499999999996,0.37655
7
- chunk_chess_0,0.63125,0.5975,0.330325,0.348275
8
- chunk_grammar_1,0.8351500000000001,0.7926250000000001,0.116725,0.13017499999999999
9
- chunk_no_conv,0.8900250000000001,0.85555,0.09050000000000001,0.115975
10
- chunk_svs_no_conv,0.7571,0.7157250000000001,0.1257,0.15184999999999998
 
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.88825,0.8611500000000001,0.10297500000000001,0.0879
3
+ chunk_1,0.6653,0.64005,0.322325,0.32034999999999997
4
+ chunk_2,0.88135,0.845875,0.10322500000000001,0.09537499999999999
5
+ chunk_3,0.8513000000000001,0.8011750000000001,0.1016,0.13755
6
+ chunk_4,0.657325,0.6169,0.32847499999999996,0.361575
7
+ chunk_chess_0,0.696275,0.675125,0.30765000000000003,0.33030000000000004
8
+ chunk_grammar_1,0.85815,0.8123250000000001,0.09375,0.12637500000000002
9
+ chunk_no_conv,0.678375,0.643325,0.3224250000000001,0.35389999999999994
10
+ chunk_svs_no_conv,0.8055,0.7531,0.1052,0.14085
static/models_data/Mistral-Large-Instruct-2407/matrix.svg CHANGED
static/models_data/Mistral-Large-Instruct-2407/ranks.svg CHANGED
static/models_data/Mistral-Large-Instruct-2407/structure.svg CHANGED
static/models_data/Mistral-Large-Instruct-2411/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.617125,0.571775,0.31995,0.346875
3
+ chunk_1,0.646825,0.6022,0.33882499999999993,0.35167499999999996
4
+ chunk_2,0.7015,0.6911499999999999,0.31934999999999997,0.2742
5
+ chunk_3,0.643225,0.5982500000000001,0.317075,0.35515
6
+ chunk_4,0.657825,0.618025,0.3123,0.35797500000000004
7
+ chunk_chess_0,0.69135,0.66455,0.314025,0.3507
8
+ chunk_grammar_1,0.866175,0.8204,0.09325,0.12295
9
+ chunk_no_conv,0.871775,0.8370249999999999,0.10275000000000001,0.08895
10
+ chunk_svs_no_conv,0.8181,0.78025,0.1027,0.10952500000000001
static/models_data/Mistral-Large-Instruct-2411/matrix.svg ADDED
static/models_data/Mistral-Large-Instruct-2411/ranks.svg ADDED
static/models_data/Mistral-Large-Instruct-2411/structure.svg ADDED
static/models_data/Mistral-Nemo-Instruct-2407/cfa_metrics.csv CHANGED
@@ -1,10 +1,10 @@
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
- chunk_0,0.25,1.392025,0.772925,0.75
3
- chunk_1,0.9480999999999999,1.2264249999999999,0.08925,0.017525
4
- chunk_2,0.2768,0.18375,0.5579999999999999,0.5459750000000001
5
- chunk_3,0.2818,0.21995,0.558425,0.5501
6
- chunk_4,0.923175,1.5393999999999999,0.09607500000000001,0.03105
7
- chunk_chess_0,0.356975,0.2954,0.548775,0.539325
8
- chunk_grammar_1,0.485425,1.534625,0.545725,0.508975
9
- chunk_no_conv,0.477075,0.471075,0.54905,0.51215
10
- chunk_svs_no_conv,0.673175,0.64615,0.324675,0.3215
 
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.408725,0.37925,0.562725,0.5481
3
+ chunk_1,0.8297749999999999,0.7647999999999999,0.101275,0.102525
4
+ chunk_2,0.6814749999999999,0.6552250000000001,0.30922499999999997,0.31779999999999997
5
+ chunk_3,0.6405000000000001,0.637225,0.3251,0.2992
6
+ chunk_4,0.227,0.21765,0.776025,0.772025
7
+ chunk_chess_0,0.48875,0.483025,0.5343000000000001,0.5262749999999999
8
+ chunk_grammar_1,0.492425,0.397925,0.3525,0.33387500000000003
9
+ chunk_no_conv,0.6493749999999999,0.6050500000000001,0.3225,0.356925
10
+ chunk_svs_no_conv,0.67125,0.68245,0.320575,0.307125
static/models_data/Mistral-Nemo-Instruct-2407/matrix.svg CHANGED
static/models_data/Mistral-Nemo-Instruct-2407/ranks.svg CHANGED
static/models_data/Mistral-Nemo-Instruct-2407/structure.svg CHANGED
static/models_data/Mistral-Small-3.1-24B-Instruct-2503/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.6691,0.641775,0.3155,0.332625
3
+ chunk_1,0.4558,0.440175,0.536275,0.55445
4
+ chunk_2,0.389175,0.350225,0.5594,0.585275
5
+ chunk_3,0.87005,0.8321000000000001,0.09435,0.107325
6
+ chunk_4,0.483775,0.475525,0.53235,0.534775
7
+ chunk_chess_0,0.828375,0.771725,0.0994,0.1497
8
+ chunk_grammar_1,0.8513249999999999,0.79325,0.100925,0.13855
9
+ chunk_no_conv,0.8732,0.83415,0.09482499999999999,0.11622499999999998
10
+ chunk_svs_no_conv,0.621575,0.587825,0.338675,0.32052499999999995
static/models_data/Mistral-Small-3.1-24B-Instruct-2503/matrix.svg ADDED
static/models_data/Mistral-Small-3.1-24B-Instruct-2503/ranks.svg ADDED
static/models_data/Mistral-Small-3.1-24B-Instruct-2503/structure.svg ADDED