Katsumata420 commited on
Commit
7d69aa6
·
verified ·
1 Parent(s): be13df6

First commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 1_Pooling/config.json +10 -0
  2. config.json +47 -0
  3. config_sentence_transformers.json +14 -0
  4. jmteb/jmteb.jsonnet +22 -0
  5. jmteb/results/Classification/scores_amazon_counterfactual_classification.json +23 -0
  6. jmteb/results/Classification/scores_amazon_review_classification.json +23 -0
  7. jmteb/results/Classification/scores_massive_intent_classification.json +23 -0
  8. jmteb/results/Classification/scores_massive_scenario_classification.json +23 -0
  9. jmteb/results/Clustering/scores_livedoor_news.json +36 -0
  10. jmteb/results/Clustering/scores_mewsc16.json +36 -0
  11. jmteb/results/PairClassification/scores_paws_x_ja.json +41 -0
  12. jmteb/results/Reranking/scores_esci.json +31 -0
  13. jmteb/results/Retrieval/scores_jagovfaqs_22k.json +43 -0
  14. jmteb/results/Retrieval/scores_jaqket.json +43 -0
  15. jmteb/results/Retrieval/scores_mrtydi.json +43 -0
  16. jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
  17. jmteb/results/Retrieval/scores_nlp_journal_title_abs.json +43 -0
  18. jmteb/results/Retrieval/scores_nlp_journal_title_intro.json +43 -0
  19. jmteb/results/STS/scores_jsick.json +31 -0
  20. jmteb/results/STS/scores_jsts.json +31 -0
  21. jmteb/results/summary.json +62 -0
  22. jmteb/tasks/amazon_counterfactual_classification.jsonnet +32 -0
  23. jmteb/tasks/amazon_review_classification.jsonnet +32 -0
  24. jmteb/tasks/esci.jsonnet +33 -0
  25. jmteb/tasks/jagovfaqs_22k.jsonnet +33 -0
  26. jmteb/tasks/jaqket.jsonnet +33 -0
  27. jmteb/tasks/jsick.jsonnet +25 -0
  28. jmteb/tasks/jsts.jsonnet +25 -0
  29. jmteb/tasks/livedoor_news.jsonnet +24 -0
  30. jmteb/tasks/massive_intent_classification.jsonnet +32 -0
  31. jmteb/tasks/massive_scenario_classification.jsonnet +32 -0
  32. jmteb/tasks/mewsc16.jsonnet +24 -0
  33. jmteb/tasks/mrtydi.jsonnet +34 -0
  34. jmteb/tasks/nlp_journal_abs_intro.jsonnet +33 -0
  35. jmteb/tasks/nlp_journal_title_abs.jsonnet +33 -0
  36. jmteb/tasks/nlp_journal_title_intro.jsonnet +33 -0
  37. jmteb/tasks/paws_x_ja.jsonnet +25 -0
  38. model.safetensors +3 -0
  39. modules.json +20 -0
  40. mteb/models/__init__.py +10 -0
  41. mteb/models/default.py +4 -0
  42. mteb/models/retrieva.py +13 -0
  43. mteb/models/retrieva_en.py +15 -0
  44. mteb/mteb_eval.py +49 -0
  45. mteb/results/AmazonCounterfactualClassification.json +95 -0
  46. mteb/results/ArXivHierarchicalClusteringP2P.json +46 -0
  47. mteb/results/ArXivHierarchicalClusteringS2S.json +46 -0
  48. mteb/results/ArguAna.json +158 -0
  49. mteb/results/AskUbuntuDupQuestions.json +26 -0
  50. mteb/results/BIOSSES.json +26 -0
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 512,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sbintuitions/modernbert-ja-130m",
3
+ "architectures": [
4
+ "ModernBertModel"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "cls",
13
+ "cls_token_id": 6,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 2,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 512,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 2048,
26
+ "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
+ "mlp_bias": false,
31
+ "mlp_dropout": 0.0,
32
+ "model_type": "modernbert",
33
+ "norm_bias": false,
34
+ "norm_eps": 1e-05,
35
+ "num_attention_heads": 8,
36
+ "num_hidden_layers": 19,
37
+ "pad_token_id": 3,
38
+ "position_embedding_type": "rope",
39
+ "reference_compile": false,
40
+ "repad_logits_with_grad": false,
41
+ "sep_token_id": 4,
42
+ "sparse_pred_ignore_index": -100,
43
+ "sparse_prediction": false,
44
+ "torch_dtype": "bfloat16",
45
+ "transformers_version": "4.49.0",
46
+ "vocab_size": 102400
47
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.49.0",
5
+ "pytorch": "2.5.1+cu121"
6
+ },
7
+ "prompts": {
8
+ "Retrieval-query": "関連した文書を探すために次の文を表現して\n",
9
+ "Retrieval-passage": "次の文章を表現して\n",
10
+ "default": "同じ意味の文を探すために次の文を表現して\n"
11
+ },
12
+ "default_prompt_name": "default",
13
+ "similarity_fn_name": "cosine"
14
+ }
jmteb/jmteb.jsonnet ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Classification
2
+ (import './tasks/amazon_review_classification.jsonnet') +
3
+ (import './tasks/amazon_counterfactual_classification.jsonnet') +
4
+ (import './tasks/massive_intent_classification.jsonnet') +
5
+ (import './tasks/massive_scenario_classification.jsonnet') +
6
+ // Clustering
7
+ (import './tasks/livedoor_news.jsonnet') +
8
+ (import './tasks/mewsc16.jsonnet') +
9
+ // STS
10
+ (import './tasks/jsts.jsonnet') +
11
+ (import './tasks/jsick.jsonnet') +
12
+ // Pair Classification
13
+ (import './tasks/paws_x_ja.jsonnet') +
14
+ // Retrieval
15
+ (import './tasks/jagovfaqs_22k.jsonnet') +
16
+ (import './tasks/mrtydi.jsonnet') +
17
+ (import './tasks/jaqket.jsonnet') +
18
+ (import './tasks/nlp_journal_title_abs.jsonnet') +
19
+ (import './tasks/nlp_journal_title_intro.jsonnet') +
20
+ (import './tasks/nlp_journal_abs_intro.jsonnet') +
21
+ // Reranking
22
+ (import './tasks/esci.jsonnet')
jmteb/results/Classification/scores_amazon_counterfactual_classification.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "macro_f1",
3
+ "metric_value": 0.7725172847265871,
4
+ "details": {
5
+ "optimal_classifier_name": "logreg",
6
+ "val_scores": {
7
+ "knn_cosine_k_2": {
8
+ "accuracy": 0.9163090128755365,
9
+ "macro_f1": 0.694795707592322
10
+ },
11
+ "logreg": {
12
+ "accuracy": 0.9291845493562232,
13
+ "macro_f1": 0.7484418707366147
14
+ }
15
+ },
16
+ "test_scores": {
17
+ "logreg": {
18
+ "accuracy": 0.9336188436830836,
19
+ "macro_f1": 0.7725172847265871
20
+ }
21
+ }
22
+ }
23
+ }
jmteb/results/Classification/scores_amazon_review_classification.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "macro_f1",
3
+ "metric_value": 0.580199599298548,
4
+ "details": {
5
+ "optimal_classifier_name": "logreg",
6
+ "val_scores": {
7
+ "knn_cosine_k_2": {
8
+ "accuracy": 0.4322,
9
+ "macro_f1": 0.4233916952995692
10
+ },
11
+ "logreg": {
12
+ "accuracy": 0.5912,
13
+ "macro_f1": 0.5856091084774833
14
+ }
15
+ },
16
+ "test_scores": {
17
+ "logreg": {
18
+ "accuracy": 0.5848,
19
+ "macro_f1": 0.580199599298548
20
+ }
21
+ }
22
+ }
23
+ }
jmteb/results/Classification/scores_massive_intent_classification.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "macro_f1",
3
+ "metric_value": 0.7965072076950149,
4
+ "details": {
5
+ "optimal_classifier_name": "logreg",
6
+ "val_scores": {
7
+ "knn_cosine_k_2": {
8
+ "accuracy": 0.8066896212493851,
9
+ "macro_f1": 0.7856631206448443
10
+ },
11
+ "logreg": {
12
+ "accuracy": 0.8578455484505657,
13
+ "macro_f1": 0.8070559236000313
14
+ }
15
+ },
16
+ "test_scores": {
17
+ "logreg": {
18
+ "accuracy": 0.859448554135844,
19
+ "macro_f1": 0.7965072076950149
20
+ }
21
+ }
22
+ }
23
+ }
jmteb/results/Classification/scores_massive_scenario_classification.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "macro_f1",
3
+ "metric_value": 0.8963544416714002,
4
+ "details": {
5
+ "optimal_classifier_name": "logreg",
6
+ "val_scores": {
7
+ "knn_cosine_k_2": {
8
+ "accuracy": 0.8745696015740285,
9
+ "macro_f1": 0.8702590755528099
10
+ },
11
+ "logreg": {
12
+ "accuracy": 0.9011313330054107,
13
+ "macro_f1": 0.894702776793441
14
+ }
15
+ },
16
+ "test_scores": {
17
+ "logreg": {
18
+ "accuracy": 0.8997982515131137,
19
+ "macro_f1": 0.8963544416714002
20
+ }
21
+ }
22
+ }
23
+ }
jmteb/results/Clustering/scores_livedoor_news.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "v_measure_score",
3
+ "metric_value": 0.5063220603646297,
4
+ "details": {
5
+ "optimal_clustering_model_name": "MiniBatchKMeans",
6
+ "val_scores": {
7
+ "MiniBatchKMeans": {
8
+ "v_measure_score": 0.5162254920293606,
9
+ "homogeneity_score": 0.5134942369453169,
10
+ "completeness_score": 0.5189859573522001
11
+ },
12
+ "AgglomerativeClustering": {
13
+ "v_measure_score": 0.4817740009110648,
14
+ "homogeneity_score": 0.47151381593215835,
15
+ "completeness_score": 0.4924906436335508
16
+ },
17
+ "BisectingKMeans": {
18
+ "v_measure_score": 0.4672899779727179,
19
+ "homogeneity_score": 0.4669032808035401,
20
+ "completeness_score": 0.4676773162110705
21
+ },
22
+ "Birch": {
23
+ "v_measure_score": 0.48380197476431497,
24
+ "homogeneity_score": 0.4759913341716145,
25
+ "completeness_score": 0.4918732245931716
26
+ }
27
+ },
28
+ "test_scores": {
29
+ "MiniBatchKMeans": {
30
+ "v_measure_score": 0.5063220603646297,
31
+ "homogeneity_score": 0.5072769713955575,
32
+ "completeness_score": 0.5053707376763799
33
+ }
34
+ }
35
+ }
36
+ }
jmteb/results/Clustering/scores_mewsc16.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "v_measure_score",
3
+ "metric_value": 0.45462727073049025,
4
+ "details": {
5
+ "optimal_clustering_model_name": "Birch",
6
+ "val_scores": {
7
+ "MiniBatchKMeans": {
8
+ "v_measure_score": 0.3842466809312565,
9
+ "homogeneity_score": 0.4226378649485537,
10
+ "completeness_score": 0.3522493803950809
11
+ },
12
+ "AgglomerativeClustering": {
13
+ "v_measure_score": 0.4377888516765032,
14
+ "homogeneity_score": 0.4772841182693872,
15
+ "completeness_score": 0.40433049804468363
16
+ },
17
+ "BisectingKMeans": {
18
+ "v_measure_score": 0.40122932757998875,
19
+ "homogeneity_score": 0.43755915162610337,
20
+ "completeness_score": 0.3704698214056897
21
+ },
22
+ "Birch": {
23
+ "v_measure_score": 0.46841918020711176,
24
+ "homogeneity_score": 0.506935954769718,
25
+ "completeness_score": 0.4353420774727962
26
+ }
27
+ },
28
+ "test_scores": {
29
+ "Birch": {
30
+ "v_measure_score": 0.45462727073049025,
31
+ "homogeneity_score": 0.4852439372487074,
32
+ "completeness_score": 0.42764484284900073
33
+ }
34
+ }
35
+ }
36
+ }
jmteb/results/PairClassification/scores_paws_x_ja.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "binary_f1",
3
+ "metric_value": 0.6403061224489796,
4
+ "details": {
5
+ "optimal_distance_metric": "euclidean_distances",
6
+ "val_scores": {
7
+ "cosine_distances": {
8
+ "accuracy": 0.5725,
9
+ "accuracy_threshold": -0.12011593580245972,
10
+ "binary_f1": 0.5979670522257273,
11
+ "binary_f1_threshold": 1.0
12
+ },
13
+ "manhatten_distances": {
14
+ "accuracy": 0.6505,
15
+ "accuracy_threshold": 5.353497505187988,
16
+ "binary_f1": 0.6209476309226932,
17
+ "binary_f1_threshold": 10.806973457336426
18
+ },
19
+ "euclidean_distances": {
20
+ "accuracy": 0.653,
21
+ "accuracy_threshold": 0.30937591195106506,
22
+ "binary_f1": 0.6221858370855505,
23
+ "binary_f1_threshold": 0.6454310417175293
24
+ },
25
+ "dot_similarities": {
26
+ "accuracy": 0.654,
27
+ "accuracy_threshold": 0.9534672498703003,
28
+ "binary_f1": 0.6213355048859935,
29
+ "binary_f1_threshold": 0.7887746095657349
30
+ }
31
+ },
32
+ "test_scores": {
33
+ "euclidean_distances": {
34
+ "accuracy": 0.6085,
35
+ "accuracy_threshold": 0.30937591195106506,
36
+ "binary_f1": 0.6403061224489796,
37
+ "binary_f1_threshold": 0.6454310417175293
38
+ }
39
+ }
40
+ }
41
+ }
jmteb/results/Reranking/scores_esci.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "ndcg@10",
3
+ "metric_value": 0.9326535342404364,
4
+ "details": {
5
+ "optimal_distance_metric": "cosine_similarity",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "ndcg@10": 0.9457387910643943,
9
+ "ndcg@20": 0.9574971659413306,
10
+ "ndcg@40": 0.9650000580183328
11
+ },
12
+ "dot_score": {
13
+ "ndcg@10": 0.9457186021682247,
14
+ "ndcg@20": 0.957477691902003,
15
+ "ndcg@40": 0.9649985588722803
16
+ },
17
+ "euclidean_distance": {
18
+ "ndcg@10": 0.9455185072273512,
19
+ "ndcg@20": 0.9573786712685046,
20
+ "ndcg@40": 0.9649006041448088
21
+ }
22
+ },
23
+ "test_scores": {
24
+ "cosine_similarity": {
25
+ "ndcg@10": 0.9326535342404364,
26
+ "ndcg@20": 0.9490780973799793,
27
+ "ndcg@40": 0.9582022320743613
28
+ }
29
+ }
30
+ }
31
+ }
jmteb/results/Retrieval/scores_jagovfaqs_22k.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "ndcg@10",
3
+ "metric_value": 0.7037560169204234,
4
+ "details": {
5
+ "optimal_distance_metric": "euclidean_distance",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "accuracy@1": 0.5621526762211173,
9
+ "accuracy@3": 0.7458321146534074,
10
+ "accuracy@5": 0.7987715706346885,
11
+ "accuracy@10": 0.8581456566247441,
12
+ "ndcg@10": 0.7123343291094127,
13
+ "mrr@10": 0.6653833386723116
14
+ },
15
+ "dot_score": {
16
+ "accuracy@1": 0.5627376425855514,
17
+ "accuracy@3": 0.7461245978356245,
18
+ "accuracy@5": 0.7978941210880375,
19
+ "accuracy@10": 0.8587306229891781,
20
+ "ndcg@10": 0.712624535627044,
21
+ "mrr@10": 0.6656209232254854
22
+ },
23
+ "euclidean_distance": {
24
+ "accuracy@1": 0.5633226089499854,
25
+ "accuracy@3": 0.7455396314711904,
26
+ "accuracy@5": 0.7987715706346885,
27
+ "accuracy@10": 0.8578531734425271,
28
+ "ndcg@10": 0.7126592570676551,
29
+ "mrr@10": 0.6659067907166759
30
+ }
31
+ },
32
+ "test_scores": {
33
+ "euclidean_distance": {
34
+ "accuracy@1": 0.5675438596491228,
35
+ "accuracy@3": 0.7292397660818714,
36
+ "accuracy@5": 0.7839181286549708,
37
+ "accuracy@10": 0.8403508771929824,
38
+ "ndcg@10": 0.7037560169204234,
39
+ "mrr@10": 0.660028311519539
40
+ }
41
+ }
42
+ }
43
+ }
jmteb/results/Retrieval/scores_jaqket.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "ndcg@10",
3
+ "metric_value": 0.61373516847512,
4
+ "details": {
5
+ "optimal_distance_metric": "cosine_similarity",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "accuracy@1": 0.4562814070351759,
9
+ "accuracy@3": 0.6572864321608041,
10
+ "accuracy@5": 0.7045226130653266,
11
+ "accuracy@10": 0.7628140703517587,
12
+ "ndcg@10": 0.6130067237476119,
13
+ "mrr@10": 0.5645469410544787
14
+ },
15
+ "dot_score": {
16
+ "accuracy@1": 0.457286432160804,
17
+ "accuracy@3": 0.6552763819095477,
18
+ "accuracy@5": 0.7055276381909548,
19
+ "accuracy@10": 0.7618090452261307,
20
+ "ndcg@10": 0.6127513989155836,
21
+ "mrr@10": 0.5645381670256041
22
+ },
23
+ "euclidean_distance": {
24
+ "accuracy@1": 0.4552763819095477,
25
+ "accuracy@3": 0.6572864321608041,
26
+ "accuracy@5": 0.7055276381909548,
27
+ "accuracy@10": 0.7628140703517587,
28
+ "ndcg@10": 0.612740626009351,
29
+ "mrr@10": 0.5641572944085506
30
+ }
31
+ },
32
+ "test_scores": {
33
+ "cosine_similarity": {
34
+ "accuracy@1": 0.4493480441323972,
35
+ "accuracy@3": 0.6288866599799399,
36
+ "accuracy@5": 0.7021063189568706,
37
+ "accuracy@10": 0.7903711133400201,
38
+ "ndcg@10": 0.61373516847512,
39
+ "mrr@10": 0.5578975020298991
40
+ }
41
+ }
42
+ }
43
+ }
jmteb/results/Retrieval/scores_mrtydi.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "ndcg@10",
3
+ "metric_value": 0.365136589573548,
4
+ "details": {
5
+ "optimal_distance_metric": "dot_score",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "accuracy@1": 0.22629310344827586,
9
+ "accuracy@3": 0.4331896551724138,
10
+ "accuracy@5": 0.5064655172413793,
11
+ "accuracy@10": 0.59375,
12
+ "ndcg@10": 0.40613921488749105,
13
+ "mrr@10": 0.34660774151614676
14
+ },
15
+ "dot_score": {
16
+ "accuracy@1": 0.22952586206896552,
17
+ "accuracy@3": 0.43211206896551724,
18
+ "accuracy@5": 0.5064655172413793,
19
+ "accuracy@10": 0.5948275862068966,
20
+ "ndcg@10": 0.4072380263572043,
21
+ "mrr@10": 0.34782686781609207
22
+ },
23
+ "euclidean_distance": {
24
+ "accuracy@1": 0.22629310344827586,
25
+ "accuracy@3": 0.4353448275862069,
26
+ "accuracy@5": 0.5075431034482759,
27
+ "accuracy@10": 0.5959051724137931,
28
+ "ndcg@10": 0.4071683076743109,
29
+ "mrr@10": 0.34731544198139036
30
+ }
31
+ },
32
+ "test_scores": {
33
+ "dot_score": {
34
+ "accuracy@1": 0.23472222222222222,
35
+ "accuracy@3": 0.4027777777777778,
36
+ "accuracy@5": 0.4847222222222222,
37
+ "accuracy@10": 0.5861111111111111,
38
+ "ndcg@10": 0.365136589573548,
39
+ "mrr@10": 0.3406740520282186
40
+ }
41
+ }
42
+ }
43
+ }
jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "ndcg@10",
3
+ "metric_value": 0.9312261091012943,
4
+ "details": {
5
+ "optimal_distance_metric": "cosine_similarity",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "accuracy@1": 0.94,
9
+ "accuracy@3": 0.96,
10
+ "accuracy@5": 0.97,
11
+ "accuracy@10": 0.98,
12
+ "ndcg@10": 0.9589493964497818,
13
+ "mrr@10": 0.9522619047619049
14
+ },
15
+ "dot_score": {
16
+ "accuracy@1": 0.94,
17
+ "accuracy@3": 0.96,
18
+ "accuracy@5": 0.97,
19
+ "accuracy@10": 0.98,
20
+ "ndcg@10": 0.9589493964497818,
21
+ "mrr@10": 0.9522619047619049
22
+ },
23
+ "euclidean_distance": {
24
+ "accuracy@1": 0.94,
25
+ "accuracy@3": 0.96,
26
+ "accuracy@5": 0.97,
27
+ "accuracy@10": 0.98,
28
+ "ndcg@10": 0.9589493964497818,
29
+ "mrr@10": 0.9522619047619049
30
+ }
31
+ },
32
+ "test_scores": {
33
+ "cosine_similarity": {
34
+ "accuracy@1": 0.8762376237623762,
35
+ "accuracy@3": 0.9455445544554455,
36
+ "accuracy@5": 0.9678217821782178,
37
+ "accuracy@10": 0.9826732673267327,
38
+ "ndcg@10": 0.9312261091012943,
39
+ "mrr@10": 0.914452695269527
40
+ }
41
+ }
42
+ }
43
+ }
jmteb/results/Retrieval/scores_nlp_journal_title_abs.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "ndcg@10",
3
+ "metric_value": 0.9660376057905824,
4
+ "details": {
5
+ "optimal_distance_metric": "cosine_similarity",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "accuracy@1": 0.92,
9
+ "accuracy@3": 0.98,
10
+ "accuracy@5": 0.99,
11
+ "accuracy@10": 1.0,
12
+ "ndcg@10": 0.964415325130387,
13
+ "mrr@10": 0.9525000000000001
14
+ },
15
+ "dot_score": {
16
+ "accuracy@1": 0.91,
17
+ "accuracy@3": 0.98,
18
+ "accuracy@5": 0.99,
19
+ "accuracy@10": 1.0,
20
+ "ndcg@10": 0.9607246226661015,
21
+ "mrr@10": 0.9475000000000001
22
+ },
23
+ "euclidean_distance": {
24
+ "accuracy@1": 0.92,
25
+ "accuracy@3": 0.98,
26
+ "accuracy@5": 0.99,
27
+ "accuracy@10": 1.0,
28
+ "ndcg@10": 0.964415325130387,
29
+ "mrr@10": 0.9525000000000001
30
+ }
31
+ },
32
+ "test_scores": {
33
+ "cosine_similarity": {
34
+ "accuracy@1": 0.9331683168316832,
35
+ "accuracy@3": 0.9727722772277227,
36
+ "accuracy@5": 0.995049504950495,
37
+ "accuracy@10": 0.9975247524752475,
38
+ "ndcg@10": 0.9660376057905824,
39
+ "mrr@10": 0.9556930693069304
40
+ }
41
+ }
42
+ }
43
+ }
jmteb/results/Retrieval/scores_nlp_journal_title_intro.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "ndcg@10",
3
+ "metric_value": 0.8239125997597097,
4
+ "details": {
5
+ "optimal_distance_metric": "cosine_similarity",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "accuracy@1": 0.79,
9
+ "accuracy@3": 0.87,
10
+ "accuracy@5": 0.92,
11
+ "accuracy@10": 0.94,
12
+ "ndcg@10": 0.8625804492635228,
13
+ "mrr@10": 0.8376666666666667
14
+ },
15
+ "dot_score": {
16
+ "accuracy@1": 0.79,
17
+ "accuracy@3": 0.87,
18
+ "accuracy@5": 0.92,
19
+ "accuracy@10": 0.94,
20
+ "ndcg@10": 0.8625804492635228,
21
+ "mrr@10": 0.8376666666666667
22
+ },
23
+ "euclidean_distance": {
24
+ "accuracy@1": 0.79,
25
+ "accuracy@3": 0.86,
26
+ "accuracy@5": 0.92,
27
+ "accuracy@10": 0.94,
28
+ "ndcg@10": 0.8618872148442569,
29
+ "mrr@10": 0.8368333333333333
30
+ }
31
+ },
32
+ "test_scores": {
33
+ "cosine_similarity": {
34
+ "accuracy@1": 0.7153465346534653,
35
+ "accuracy@3": 0.8589108910891089,
36
+ "accuracy@5": 0.8910891089108911,
37
+ "accuracy@10": 0.9183168316831684,
38
+ "ndcg@10": 0.8239125997597097,
39
+ "mrr@10": 0.7929278642149928
40
+ }
41
+ }
42
+ }
43
+ }
jmteb/results/STS/scores_jsick.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "spearman",
3
+ "metric_value": 0.7228774332572527,
4
+ "details": {
5
+ "optimal_similarity_metric": "dot_score",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "pearson": 0.7787902785127275,
9
+ "spearman": 0.7463203269165151
10
+ },
11
+ "manhatten_distance": {
12
+ "pearson": 0.7696693385531954,
13
+ "spearman": 0.73658437615329
14
+ },
15
+ "euclidean_distance": {
16
+ "pearson": 0.7696693385531954,
17
+ "spearman": 0.73658437615329
18
+ },
19
+ "dot_score": {
20
+ "pearson": 0.7788735665737948,
21
+ "spearman": 0.7464361803709411
22
+ }
23
+ },
24
+ "test_scores": {
25
+ "dot_score": {
26
+ "pearson": 0.7579853118271599,
27
+ "spearman": 0.7228774332572527
28
+ }
29
+ }
30
+ }
31
+ }
jmteb/results/STS/scores_jsts.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metric_name": "spearman",
3
+ "metric_value": 0.8334009117886643,
4
+ "details": {
5
+ "optimal_similarity_metric": "cosine_similarity",
6
+ "val_scores": {
7
+ "cosine_similarity": {
8
+ "pearson": 0.8575300900249121,
9
+ "spearman": 0.8130300377229739
10
+ },
11
+ "manhatten_distance": {
12
+ "pearson": 0.8516427332926884,
13
+ "spearman": 0.8129478229587098
14
+ },
15
+ "euclidean_distance": {
16
+ "pearson": 0.8516427332926884,
17
+ "spearman": 0.8129478229587098
18
+ },
19
+ "dot_score": {
20
+ "pearson": 0.8575076226058185,
21
+ "spearman": 0.8130035755745093
22
+ }
23
+ },
24
+ "test_scores": {
25
+ "cosine_similarity": {
26
+ "pearson": 0.8764356710949792,
27
+ "spearman": 0.8334009117886643
28
+ }
29
+ }
30
+ }
31
+ }
jmteb/results/summary.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Classification": {
3
+ "amazon_counterfactual_classification": {
4
+ "macro_f1": 0.7725172847265871
5
+ },
6
+ "amazon_review_classification": {
7
+ "macro_f1": 0.580199599298548
8
+ },
9
+ "massive_intent_classification": {
10
+ "macro_f1": 0.7965072076950149
11
+ },
12
+ "massive_scenario_classification": {
13
+ "macro_f1": 0.8963544416714002
14
+ }
15
+ },
16
+ "Reranking": {
17
+ "esci": {
18
+ "ndcg@10": 0.9326535342404364
19
+ }
20
+ },
21
+ "Retrieval": {
22
+ "jagovfaqs_22k": {
23
+ "ndcg@10": 0.7037560169204234
24
+ },
25
+ "jaqket": {
26
+ "ndcg@10": 0.61373516847512
27
+ },
28
+ "mrtydi": {
29
+ "ndcg@10": 0.365136589573548
30
+ },
31
+ "nlp_journal_abs_intro": {
32
+ "ndcg@10": 0.9312261091012943
33
+ },
34
+ "nlp_journal_title_abs": {
35
+ "ndcg@10": 0.9660376057905824
36
+ },
37
+ "nlp_journal_title_intro": {
38
+ "ndcg@10": 0.8239125997597097
39
+ }
40
+ },
41
+ "STS": {
42
+ "jsick": {
43
+ "spearman": 0.7228774332572527
44
+ },
45
+ "jsts": {
46
+ "spearman": 0.8334009117886643
47
+ }
48
+ },
49
+ "Clustering": {
50
+ "livedoor_news": {
51
+ "v_measure_score": 0.5063220603646297
52
+ },
53
+ "mewsc16": {
54
+ "v_measure_score": 0.45462727073049025
55
+ }
56
+ },
57
+ "PairClassification": {
58
+ "paws_x_ja": {
59
+ "binary_f1": 0.6403061224489796
60
+ }
61
+ }
62
+ }
jmteb/tasks/amazon_counterfactual_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ amazon_counterfactual_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ train_dataset: {
6
+ class_path: 'HfClassificationDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'train',
10
+ name: 'amazon_counterfactual_classification',
11
+ },
12
+ },
13
+ val_dataset: {
14
+ class_path: 'HfClassificationDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'validation',
18
+ name: 'amazon_counterfactual_classification',
19
+ },
20
+ },
21
+ test_dataset: {
22
+ class_path: 'HfClassificationDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'test',
26
+ name: 'amazon_counterfactual_classification',
27
+ },
28
+ },
29
+ prefix: '同じクラスに属する文を探すために次の文を表現して\n',
30
+ },
31
+ },
32
+ }
jmteb/tasks/amazon_review_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ amazon_review_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ train_dataset: {
6
+ class_path: 'HfClassificationDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'train',
10
+ name: 'amazon_review_classification',
11
+ },
12
+ },
13
+ val_dataset: {
14
+ class_path: 'HfClassificationDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'validation',
18
+ name: 'amazon_review_classification',
19
+ },
20
+ },
21
+ test_dataset: {
22
+ class_path: 'HfClassificationDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'test',
26
+ name: 'amazon_review_classification',
27
+ },
28
+ },
29
+ prefix: '同じクラスに属する文を探すために次の文を表現して\n',
30
+ },
31
+ },
32
+ }
jmteb/tasks/esci.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ esci: {
3
+ class_path: 'RerankingEvaluator',
4
+ init_args: {
5
+ val_query_dataset: {
6
+ class_path: 'HfRerankingQueryDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'esci-query',
11
+ },
12
+ },
13
+ test_query_dataset: {
14
+ class_path: 'HfRerankingQueryDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'esci-query',
19
+ },
20
+ },
21
+ doc_dataset: {
22
+ class_path: 'HfRerankingDocDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'corpus',
26
+ name: 'esci-corpus',
27
+ },
28
+ },
29
+ query_prefix: '関連した文書を探すために次の文を表現して\n',
30
+ doc_prefix: '次の文章を表現して\n',
31
+ },
32
+ },
33
+ }
jmteb/tasks/jagovfaqs_22k.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jagovfaqs_22k: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ val_query_dataset: {
6
+ class_path: 'HfRetrievalQueryDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'jagovfaqs_22k-query',
11
+ },
12
+ },
13
+ test_query_dataset: {
14
+ class_path: 'HfRetrievalQueryDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'jagovfaqs_22k-query',
19
+ },
20
+ },
21
+ doc_dataset: {
22
+ class_path: 'HfRetrievalDocDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'corpus',
26
+ name: 'jagovfaqs_22k-corpus',
27
+ },
28
+ },
29
+ query_prefix: '関連した文書を探すために次の文を表現して\n',
30
+ doc_prefix: '次の文章を表現して\n',
31
+ },
32
+ },
33
+ }
jmteb/tasks/jaqket.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jaqket: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ val_query_dataset: {
6
+ class_path: 'HfRetrievalQueryDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'jaqket-query',
11
+ },
12
+ },
13
+ test_query_dataset: {
14
+ class_path: 'HfRetrievalQueryDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'jaqket-query',
19
+ },
20
+ },
21
+ doc_dataset: {
22
+ class_path: 'HfRetrievalDocDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'corpus',
26
+ name: 'jaqket-corpus',
27
+ },
28
+ },
29
+ query_prefix: '関連した文書を探すために次の文を表現して\n',
30
+ doc_prefix: '次の文章を表現して\n',
31
+ },
32
+ },
33
+ }
jmteb/tasks/jsick.jsonnet ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jsick: {
3
+ class_path: 'STSEvaluator',
4
+ init_args: {
5
+ val_dataset: {
6
+ class_path: 'HfSTSDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'jsick',
11
+ },
12
+ },
13
+ test_dataset: {
14
+ class_path: 'HfSTSDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'jsick',
19
+ },
20
+ },
21
+ sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
22
+ sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
23
+ },
24
+ },
25
+ }
jmteb/tasks/jsts.jsonnet ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jsts: {
3
+ class_path: 'STSEvaluator',
4
+ init_args: {
5
+ val_dataset: {
6
+ class_path: 'HfSTSDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'train',
10
+ name: 'jsts',
11
+ },
12
+ },
13
+ test_dataset: {
14
+ class_path: 'HfSTSDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'jsts',
19
+ },
20
+ },
21
+ sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
22
+ sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
23
+ },
24
+ },
25
+ }
jmteb/tasks/livedoor_news.jsonnet ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ livedoor_news: {
3
+ class_path: 'ClusteringEvaluator',
4
+ init_args: {
5
+ val_dataset: {
6
+ class_path: 'HfClusteringDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'livedoor_news',
11
+ },
12
+ },
13
+ test_dataset: {
14
+ class_path: 'HfClusteringDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'livedoor_news',
19
+ },
20
+ },
21
+ prefix: '類似した文を探すために次の文を表現して\n',
22
+ },
23
+ },
24
+ }
jmteb/tasks/massive_intent_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ massive_intent_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ train_dataset: {
6
+ class_path: 'HfClassificationDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'train',
10
+ name: 'massive_intent_classification',
11
+ },
12
+ },
13
+ val_dataset: {
14
+ class_path: 'HfClassificationDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'validation',
18
+ name: 'massive_intent_classification',
19
+ },
20
+ },
21
+ test_dataset: {
22
+ class_path: 'HfClassificationDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'test',
26
+ name: 'massive_intent_classification',
27
+ },
28
+ },
29
+ prefix: '同じクラスに属する文を探すために次の文を表現して\n',
30
+ },
31
+ },
32
+ }
jmteb/tasks/massive_scenario_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ massive_scenario_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ train_dataset: {
6
+ class_path: 'HfClassificationDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'train',
10
+ name: 'massive_scenario_classification',
11
+ },
12
+ },
13
+ val_dataset: {
14
+ class_path: 'HfClassificationDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'validation',
18
+ name: 'massive_scenario_classification',
19
+ },
20
+ },
21
+ test_dataset: {
22
+ class_path: 'HfClassificationDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'test',
26
+ name: 'massive_scenario_classification',
27
+ },
28
+ },
29
+ prefix: '同じクラスに属する文を探すために次の文を表現して\n',
30
+ },
31
+ },
32
+ }
jmteb/tasks/mewsc16.jsonnet ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ mewsc16: {
3
+ class_path: 'ClusteringEvaluator',
4
+ init_args: {
5
+ val_dataset: {
6
+ class_path: 'HfClusteringDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'mewsc16_ja',
11
+ },
12
+ },
13
+ test_dataset: {
14
+ class_path: 'HfClusteringDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'mewsc16_ja',
19
+ },
20
+ },
21
+ prefix: '類似した文を探すために次の文を表現して\n',
22
+ },
23
+ },
24
+ }
jmteb/tasks/mrtydi.jsonnet ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ mrtydi: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ val_query_dataset: {
6
+ class_path: 'HfRetrievalQueryDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'mrtydi-query',
11
+ },
12
+ },
13
+ test_query_dataset: {
14
+ class_path: 'HfRetrievalQueryDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'mrtydi-query',
19
+ },
20
+ },
21
+ doc_dataset: {
22
+ class_path: 'HfRetrievalDocDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'corpus',
26
+ name: 'mrtydi-corpus',
27
+ },
28
+ },
29
+ "doc_chunk_size":10000,
30
+ query_prefix: '関連した文書を探すために次の文を表現して\n',
31
+ doc_prefix: '次の文章を表現して\n',
32
+ },
33
+ },
34
+ }
jmteb/tasks/nlp_journal_abs_intro.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ nlp_journal_abs_intro: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ val_query_dataset: {
6
+ class_path: 'HfRetrievalQueryDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'nlp_journal_abs_intro-query',
11
+ },
12
+ },
13
+ test_query_dataset: {
14
+ class_path: 'HfRetrievalQueryDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'nlp_journal_abs_intro-query',
19
+ },
20
+ },
21
+ doc_dataset: {
22
+ class_path: 'HfRetrievalDocDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'corpus',
26
+ name: 'nlp_journal_abs_intro-corpus',
27
+ },
28
+ },
29
+ query_prefix: '関連した文書を探すために次の文を表現して\n',
30
+ doc_prefix: '次の文章を表現して\n',
31
+ },
32
+ },
33
+ }
jmteb/tasks/nlp_journal_title_abs.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ nlp_journal_title_abs: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ val_query_dataset: {
6
+ class_path: 'HfRetrievalQueryDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'nlp_journal_title_abs-query',
11
+ },
12
+ },
13
+ test_query_dataset: {
14
+ class_path: 'HfRetrievalQueryDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'nlp_journal_title_abs-query',
19
+ },
20
+ },
21
+ doc_dataset: {
22
+ class_path: 'HfRetrievalDocDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'corpus',
26
+ name: 'nlp_journal_title_abs-corpus',
27
+ },
28
+ },
29
+ query_prefix: '関連した文書を探すために次の文を表現して\n',
30
+ doc_prefix: '次の文章を表現して\n',
31
+ },
32
+ },
33
+ }
jmteb/tasks/nlp_journal_title_intro.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ nlp_journal_title_intro: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ val_query_dataset: {
6
+ class_path: 'HfRetrievalQueryDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'nlp_journal_title_intro-query',
11
+ },
12
+ },
13
+ test_query_dataset: {
14
+ class_path: 'HfRetrievalQueryDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'nlp_journal_title_intro-query',
19
+ },
20
+ },
21
+ doc_dataset: {
22
+ class_path: 'HfRetrievalDocDataset',
23
+ init_args: {
24
+ path: 'sbintuitions/JMTEB',
25
+ split: 'corpus',
26
+ name: 'nlp_journal_title_intro-corpus',
27
+ },
28
+ },
29
+ query_prefix: '関連した文書を探すために次の文を表現して\n',
30
+ doc_prefix: '次の文章を表現して\n',
31
+ },
32
+ },
33
+ }
jmteb/tasks/paws_x_ja.jsonnet ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ paws_x_ja: {
3
+ class_path: 'PairClassificationEvaluator',
4
+ init_args: {
5
+ val_dataset: {
6
+ class_path: 'HfPairClassificationDataset',
7
+ init_args: {
8
+ path: 'sbintuitions/JMTEB',
9
+ split: 'validation',
10
+ name: 'paws_x_ja',
11
+ },
12
+ },
13
+ test_dataset: {
14
+ class_path: 'HfPairClassificationDataset',
15
+ init_args: {
16
+ path: 'sbintuitions/JMTEB',
17
+ split: 'test',
18
+ name: 'paws_x_ja',
19
+ },
20
+ },
21
+ sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
22
+ sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
23
+ },
24
+ },
25
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d43aafff5dda1ece7a456d98bbc22f404a33ca569c56656fbc8a9b6ab5c8beb4
3
+ size 264292496
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
mteb/models/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .default import PROMPT as default_prompt
2
+ from .retrieva import PROMPT as retrieva_prompt
3
+ from .retrieva_en import PROMPT as retrieva_en_prompt
4
+
5
+
6
+ PROMPTS = {
7
+ "default": default_prompt,
8
+ "retrieva": retrieva_prompt,
9
+ "retrieva-en": retrieva_en_prompt,
10
+ }
mteb/models/default.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PROMPT = {
2
+ "query": "query: ",
3
+ "passage": "passage: ",
4
+ }
mteb/models/retrieva.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROMPT = {
2
+ "STS": "同じ意味の文を探すために次の文を表現して\n",
3
+ "Summarization": "次の記事またはタイトルを表現して\n",
4
+ "BitextMining": "次の文を表現して\n",
5
+ "Classification": "同じクラスに属する文を探すために次の文を表現して\n",
6
+ "Clustering": "類似した文を探すために次の文を表現して\n",
7
+ "Reranking-query": "関連した文書を探すために次の文を表現して\n",
8
+ "Reranking-passage": "次の文章を表現して\n",
9
+ "Retrieval-query": "関連した文書を探すために次の文を表現して\n",
10
+ "Retrieval-passage": "次の文章を表現して\n",
11
+ "InstructionRetrieval": "",
12
+ "PairClassification": "同じ意味の文を探すために次の文を表現して\n",
13
+ }
mteb/models/retrieva_en.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROMPT = {
2
+ "STS": "Represent the sentence for retrieving duplicate sentences:\n",
3
+ "Summarization": "Represent the news article or news title for retrieval:\n",
4
+ "BitextMining": "Represent the sentence\n",
5
+ "Classification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
6
+ "Clustering": "Represent the sentence to find similar sentences:\n",
7
+ "Reranking-query": "Represent the question:\n",
8
+ "Reranking-passage": "Represent the following text:\n",
9
+ "Retrieval-query": "Represent the question:\n",
10
+ "Retrieval-passage": "Represent the following text:\n",
11
+ "InstructionRetrieval": "Retrieve text based on user query:\n",
12
+ "PairClassification": "Represent the sentence for retrieving duplicate sentences:\n",
13
+ "MultilabelClassification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
14
+ "Speed": "",
15
+ }
mteb/mteb_eval.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Evaluate AMBER models"""
2
+
3
+ import argparse
4
+
5
+ import mteb
6
+
7
+ from models import PROMPTS
8
+
9
+ BENCHMARKS = {
10
+ "en": "MTEB(eng, v2)",
11
+ "ja": "MTEB(jpn, v1)",
12
+ }
13
+
14
+
15
+ def get_args() -> argparse.Namespace:
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--model_type", type=str, required=True, help="Model name", choices=PROMPTS.keys())
18
+ parser.add_argument("--model_name_or_path", type=str, required=True)
19
+ parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
20
+ parser.add_argument("--output_dir", type=str, required=True, help="Output directory")
21
+ parser.add_argument("--benchmark", type=str, required=True, choices=BENCHMARKS.keys())
22
+ parser.add_argument("--corpus_chunk_size", type=int, default=50000)
23
+ parser.add_argument("--convert_to_tensor", action="store_true")
24
+ return parser.parse_args()
25
+
26
+
27
+ def main():
28
+ args = get_args()
29
+ prompt = PROMPTS[args.model_type]
30
+ model = mteb.get_model(args.model_name_or_path, model_prompts=prompt)
31
+
32
+ tasks = mteb.get_benchmark(BENCHMARKS[args.benchmark])
33
+ evaluation = mteb.MTEB(tasks=tasks)
34
+
35
+ encode_kwargs = {
36
+ "batch_size": args.batch_size,
37
+ "convert_to_tensor": args.convert_to_tensor,
38
+ }
39
+
40
+ evaluation.run(
41
+ model,
42
+ output_folder=args.output_dir,
43
+ encode_kwargs=encode_kwargs,
44
+ corpus_chunk_size=args.corpus_chunk_size,
45
+ )
46
+
47
+
48
+ if __name__ == "__main__":
49
+ main()
mteb/results/AmazonCounterfactualClassification.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "e8379541af4e31359cca9fbcf4b00f2671dba205",
3
+ "task_name": "AmazonCounterfactualClassification",
4
+ "mteb_version": "1.36.1",
5
+ "scores": {
6
+ "test": [
7
+ {
8
+ "accuracy": 0.681642,
9
+ "f1": 0.619811,
10
+ "f1_weighted": 0.712157,
11
+ "ap": 0.306541,
12
+ "ap_weighted": 0.306541,
13
+ "scores_per_experiment": [
14
+ {
15
+ "accuracy": 0.676119,
16
+ "f1": 0.613878,
17
+ "f1_weighted": 0.708281,
18
+ "ap": 0.297508,
19
+ "ap_weighted": 0.297508
20
+ },
21
+ {
22
+ "accuracy": 0.725373,
23
+ "f1": 0.651787,
24
+ "f1_weighted": 0.749265,
25
+ "ap": 0.324443,
26
+ "ap_weighted": 0.324443
27
+ },
28
+ {
29
+ "accuracy": 0.598507,
30
+ "f1": 0.56537,
31
+ "f1_weighted": 0.638451,
32
+ "ap": 0.285849,
33
+ "ap_weighted": 0.285849
34
+ },
35
+ {
36
+ "accuracy": 0.69403,
37
+ "f1": 0.641244,
38
+ "f1_weighted": 0.725044,
39
+ "ap": 0.333082,
40
+ "ap_weighted": 0.333082
41
+ },
42
+ {
43
+ "accuracy": 0.683582,
44
+ "f1": 0.632402,
45
+ "f1_weighted": 0.715928,
46
+ "ap": 0.32599,
47
+ "ap_weighted": 0.32599
48
+ },
49
+ {
50
+ "accuracy": 0.677612,
51
+ "f1": 0.609393,
52
+ "f1_weighted": 0.708798,
53
+ "ap": 0.288755,
54
+ "ap_weighted": 0.288755
55
+ },
56
+ {
57
+ "accuracy": 0.755224,
58
+ "f1": 0.67807,
59
+ "f1_weighted": 0.774042,
60
+ "ap": 0.349254,
61
+ "ap_weighted": 0.349254
62
+ },
63
+ {
64
+ "accuracy": 0.735821,
65
+ "f1": 0.658379,
66
+ "f1_weighted": 0.757427,
67
+ "ap": 0.328302,
68
+ "ap_weighted": 0.328302
69
+ },
70
+ {
71
+ "accuracy": 0.665672,
72
+ "f1": 0.605319,
73
+ "f1_weighted": 0.699304,
74
+ "ap": 0.291461,
75
+ "ap_weighted": 0.291461
76
+ },
77
+ {
78
+ "accuracy": 0.604478,
79
+ "f1": 0.542268,
80
+ "f1_weighted": 0.645027,
81
+ "ap": 0.240767,
82
+ "ap_weighted": 0.240767
83
+ }
84
+ ],
85
+ "main_score": 0.681642,
86
+ "hf_subset": "en",
87
+ "languages": [
88
+ "eng-Latn"
89
+ ]
90
+ }
91
+ ]
92
+ },
93
+ "evaluation_time": 10.654787302017212,
94
+ "kg_co2_emissions": null
95
+ }
mteb/results/ArXivHierarchicalClusteringP2P.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "0bbdb47bcbe3a90093699aefeed338a0f28a7ee8",
3
+ "task_name": "ArXivHierarchicalClusteringP2P",
4
+ "mteb_version": "1.36.1",
5
+ "scores": {
6
+ "test": [
7
+ {
8
+ "v_measures": {
9
+ "Level 0": [
10
+ 0.507858,
11
+ 0.514541,
12
+ 0.544659,
13
+ 0.512061,
14
+ 0.542216,
15
+ 0.524017,
16
+ 0.49424,
17
+ 0.536251,
18
+ 0.558261,
19
+ 0.55925
20
+ ],
21
+ "Level 1": [
22
+ 0.590244,
23
+ 0.610952,
24
+ 0.551394,
25
+ 0.585449,
26
+ 0.58945,
27
+ 0.581477,
28
+ 0.581684,
29
+ 0.570883,
30
+ 0.574588,
31
+ 0.60155
32
+ ]
33
+ },
34
+ "v_measure": 0.556551,
35
+ "v_measure_std": 0.032919,
36
+ "main_score": 0.556551,
37
+ "hf_subset": "default",
38
+ "languages": [
39
+ "eng-Latn"
40
+ ]
41
+ }
42
+ ]
43
+ },
44
+ "evaluation_time": 6.287527084350586,
45
+ "kg_co2_emissions": null
46
+ }
mteb/results/ArXivHierarchicalClusteringS2S.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "b73bd54100e5abfa6e3a23dcafb46fe4d2438dc3",
3
+ "task_name": "ArXivHierarchicalClusteringS2S",
4
+ "mteb_version": "1.36.1",
5
+ "scores": {
6
+ "test": [
7
+ {
8
+ "v_measures": {
9
+ "Level 0": [
10
+ 0.502992,
11
+ 0.485244,
12
+ 0.477917,
13
+ 0.495744,
14
+ 0.477376,
15
+ 0.539086,
16
+ 0.544394,
17
+ 0.547524,
18
+ 0.522253,
19
+ 0.518454
20
+ ],
21
+ "Level 1": [
22
+ 0.563453,
23
+ 0.553718,
24
+ 0.559457,
25
+ 0.564745,
26
+ 0.538357,
27
+ 0.579393,
28
+ 0.548295,
29
+ 0.573239,
30
+ 0.562891,
31
+ 0.575329
32
+ ]
33
+ },
34
+ "v_measure": 0.536493,
35
+ "v_measure_std": 0.032359,
36
+ "main_score": 0.536493,
37
+ "hf_subset": "default",
38
+ "languages": [
39
+ "eng-Latn"
40
+ ]
41
+ }
42
+ ]
43
+ },
44
+ "evaluation_time": 5.862490892410278,
45
+ "kg_co2_emissions": null
46
+ }
mteb/results/ArguAna.json ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "c22ab2a51041ffd869aaddef7af8d8215647e41a",
3
+ "task_name": "ArguAna",
4
+ "mteb_version": "1.36.1",
5
+ "scores": {
6
+ "test": [
7
+ {
8
+ "ndcg_at_1": 0.25249,
9
+ "ndcg_at_3": 0.38056,
10
+ "ndcg_at_5": 0.43124,
11
+ "ndcg_at_10": 0.48068,
12
+ "ndcg_at_20": 0.51461,
13
+ "ndcg_at_100": 0.53158,
14
+ "ndcg_at_1000": 0.5338,
15
+ "map_at_1": 0.25249,
16
+ "map_at_3": 0.34803,
17
+ "map_at_5": 0.37598,
18
+ "map_at_10": 0.39611,
19
+ "map_at_20": 0.40569,
20
+ "map_at_100": 0.40821,
21
+ "map_at_1000": 0.4083,
22
+ "recall_at_1": 0.25249,
23
+ "recall_at_3": 0.47511,
24
+ "recall_at_5": 0.59886,
25
+ "recall_at_10": 0.7532,
26
+ "recall_at_20": 0.88549,
27
+ "recall_at_100": 0.9744,
28
+ "recall_at_1000": 0.99147,
29
+ "precision_at_1": 0.25249,
30
+ "precision_at_3": 0.15837,
31
+ "precision_at_5": 0.11977,
32
+ "precision_at_10": 0.07532,
33
+ "precision_at_20": 0.04427,
34
+ "precision_at_100": 0.00974,
35
+ "precision_at_1000": 0.00099,
36
+ "mrr_at_1": 0.258179,
37
+ "mrr_at_3": 0.349692,
38
+ "mrr_at_5": 0.377928,
39
+ "mrr_at_10": 0.398238,
40
+ "mrr_at_20": 0.407844,
41
+ "mrr_at_100": 0.410403,
42
+ "mrr_at_1000": 0.410495,
43
+ "nauc_ndcg_at_1_max": -0.026569,
44
+ "nauc_ndcg_at_1_std": -0.024726,
45
+ "nauc_ndcg_at_1_diff1": 0.102597,
46
+ "nauc_ndcg_at_3_max": -0.008151,
47
+ "nauc_ndcg_at_3_std": -0.033642,
48
+ "nauc_ndcg_at_3_diff1": 0.078841,
49
+ "nauc_ndcg_at_5_max": -0.003906,
50
+ "nauc_ndcg_at_5_std": -0.024619,
51
+ "nauc_ndcg_at_5_diff1": 0.07558,
52
+ "nauc_ndcg_at_10_max": 0.010935,
53
+ "nauc_ndcg_at_10_std": -0.018625,
54
+ "nauc_ndcg_at_10_diff1": 0.080503,
55
+ "nauc_ndcg_at_20_max": 0.013164,
56
+ "nauc_ndcg_at_20_std": -0.013407,
57
+ "nauc_ndcg_at_20_diff1": 0.078992,
58
+ "nauc_ndcg_at_100_max": 0.008316,
59
+ "nauc_ndcg_at_100_std": -0.008725,
60
+ "nauc_ndcg_at_100_diff1": 0.085633,
61
+ "nauc_ndcg_at_1000_max": 0.0045,
62
+ "nauc_ndcg_at_1000_std": -0.014357,
63
+ "nauc_ndcg_at_1000_diff1": 0.084438,
64
+ "nauc_map_at_1_max": -0.026569,
65
+ "nauc_map_at_1_std": -0.024726,
66
+ "nauc_map_at_1_diff1": 0.102597,
67
+ "nauc_map_at_3_max": -0.013567,
68
+ "nauc_map_at_3_std": -0.03222,
69
+ "nauc_map_at_3_diff1": 0.083557,
70
+ "nauc_map_at_5_max": -0.01162,
71
+ "nauc_map_at_5_std": -0.027384,
72
+ "nauc_map_at_5_diff1": 0.081184,
73
+ "nauc_map_at_10_max": -0.00615,
74
+ "nauc_map_at_10_std": -0.025394,
75
+ "nauc_map_at_10_diff1": 0.082831,
76
+ "nauc_map_at_20_max": -0.005492,
77
+ "nauc_map_at_20_std": -0.024076,
78
+ "nauc_map_at_20_diff1": 0.08281,
79
+ "nauc_map_at_100_max": -0.006049,
80
+ "nauc_map_at_100_std": -0.02356,
81
+ "nauc_map_at_100_diff1": 0.083933,
82
+ "nauc_map_at_1000_max": -0.006154,
83
+ "nauc_map_at_1000_std": -0.02373,
84
+ "nauc_map_at_1000_diff1": 0.083902,
85
+ "nauc_recall_at_1_max": -0.026569,
86
+ "nauc_recall_at_1_std": -0.024726,
87
+ "nauc_recall_at_1_diff1": 0.102597,
88
+ "nauc_recall_at_3_max": 0.007234,
89
+ "nauc_recall_at_3_std": -0.037315,
90
+ "nauc_recall_at_3_diff1": 0.066138,
91
+ "nauc_recall_at_5_max": 0.020847,
92
+ "nauc_recall_at_5_std": -0.014385,
93
+ "nauc_recall_at_5_diff1": 0.059428,
94
+ "nauc_recall_at_10_max": 0.092417,
95
+ "nauc_recall_at_10_std": 0.016372,
96
+ "nauc_recall_at_10_diff1": 0.076442,
97
+ "nauc_recall_at_20_max": 0.179819,
98
+ "nauc_recall_at_20_std": 0.093827,
99
+ "nauc_recall_at_20_diff1": 0.052288,
100
+ "nauc_recall_at_100_max": 0.463576,
101
+ "nauc_recall_at_100_std": 0.695314,
102
+ "nauc_recall_at_100_diff1": 0.252365,
103
+ "nauc_recall_at_1000_max": 0.473173,
104
+ "nauc_recall_at_1000_std": 0.803564,
105
+ "nauc_recall_at_1000_diff1": 0.30506,
106
+ "nauc_precision_at_1_max": -0.026569,
107
+ "nauc_precision_at_1_std": -0.024726,
108
+ "nauc_precision_at_1_diff1": 0.102597,
109
+ "nauc_precision_at_3_max": 0.007234,
110
+ "nauc_precision_at_3_std": -0.037315,
111
+ "nauc_precision_at_3_diff1": 0.066138,
112
+ "nauc_precision_at_5_max": 0.020847,
113
+ "nauc_precision_at_5_std": -0.014385,
114
+ "nauc_precision_at_5_diff1": 0.059428,
115
+ "nauc_precision_at_10_max": 0.092417,
116
+ "nauc_precision_at_10_std": 0.016372,
117
+ "nauc_precision_at_10_diff1": 0.076442,
118
+ "nauc_precision_at_20_max": 0.179819,
119
+ "nauc_precision_at_20_std": 0.093827,
120
+ "nauc_precision_at_20_diff1": 0.052288,
121
+ "nauc_precision_at_100_max": 0.463576,
122
+ "nauc_precision_at_100_std": 0.695314,
123
+ "nauc_precision_at_100_diff1": 0.252365,
124
+ "nauc_precision_at_1000_max": 0.473173,
125
+ "nauc_precision_at_1000_std": 0.803564,
126
+ "nauc_precision_at_1000_diff1": 0.30506,
127
+ "nauc_mrr_at_1_max": -0.025852,
128
+ "nauc_mrr_at_1_std": -0.027133,
129
+ "nauc_mrr_at_1_diff1": 0.083902,
130
+ "nauc_mrr_at_3_max": -0.023878,
131
+ "nauc_mrr_at_3_std": -0.031916,
132
+ "nauc_mrr_at_3_diff1": 0.06376,
133
+ "nauc_mrr_at_5_max": -0.020079,
134
+ "nauc_mrr_at_5_std": -0.029791,
135
+ "nauc_mrr_at_5_diff1": 0.063531,
136
+ "nauc_mrr_at_10_max": -0.0141,
137
+ "nauc_mrr_at_10_std": -0.027921,
138
+ "nauc_mrr_at_10_diff1": 0.065142,
139
+ "nauc_mrr_at_20_max": -0.0135,
140
+ "nauc_mrr_at_20_std": -0.026331,
141
+ "nauc_mrr_at_20_diff1": 0.064701,
142
+ "nauc_mrr_at_100_max": -0.01393,
143
+ "nauc_mrr_at_100_std": -0.025819,
144
+ "nauc_mrr_at_100_diff1": 0.065875,
145
+ "nauc_mrr_at_1000_max": -0.014037,
146
+ "nauc_mrr_at_1000_std": -0.025989,
147
+ "nauc_mrr_at_1000_diff1": 0.065838,
148
+ "main_score": 0.48068,
149
+ "hf_subset": "default",
150
+ "languages": [
151
+ "eng-Latn"
152
+ ]
153
+ }
154
+ ]
155
+ },
156
+ "evaluation_time": 27.352286100387573,
157
+ "kg_co2_emissions": null
158
+ }
mteb/results/AskUbuntuDupQuestions.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "2000358ca161889fa9c082cb41daa8dcfb161a54",
3
+ "task_name": "AskUbuntuDupQuestions",
4
+ "mteb_version": "1.36.1",
5
+ "scores": {
6
+ "test": [
7
+ {
8
+ "map": 0.565225,
9
+ "mrr": 0.705146,
10
+ "nAUC_map_max": 0.18224,
11
+ "nAUC_map_std": 0.125352,
12
+ "nAUC_map_diff1": 0.140464,
13
+ "nAUC_mrr_max": 0.286197,
14
+ "nAUC_mrr_std": 0.2169,
15
+ "nAUC_mrr_diff1": 0.158021,
16
+ "main_score": 0.565225,
17
+ "hf_subset": "default",
18
+ "languages": [
19
+ "eng-Latn"
20
+ ]
21
+ }
22
+ ]
23
+ },
24
+ "evaluation_time": 2.0422356128692627,
25
+ "kg_co2_emissions": null
26
+ }
mteb/results/BIOSSES.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "d3fb88f8f02e40887cd149695127462bbcf29b4a",
3
+ "task_name": "BIOSSES",
4
+ "mteb_version": "1.36.1",
5
+ "scores": {
6
+ "test": [
7
+ {
8
+ "pearson": 0.866855,
9
+ "spearman": 0.831736,
10
+ "cosine_pearson": 0.866855,
11
+ "cosine_spearman": 0.831736,
12
+ "manhattan_pearson": 0.855442,
13
+ "manhattan_spearman": 0.839501,
14
+ "euclidean_pearson": 0.850403,
15
+ "euclidean_spearman": 0.831736,
16
+ "main_score": 0.831736,
17
+ "hf_subset": "default",
18
+ "languages": [
19
+ "eng-Latn"
20
+ ]
21
+ }
22
+ ]
23
+ },
24
+ "evaluation_time": 0.22259831428527832,
25
+ "kg_co2_emissions": null
26
+ }