First commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 1_Pooling/config.json +10 -0
- config.json +47 -0
- config_sentence_transformers.json +14 -0
- jmteb/jmteb.jsonnet +22 -0
- jmteb/results/Classification/scores_amazon_counterfactual_classification.json +23 -0
- jmteb/results/Classification/scores_amazon_review_classification.json +23 -0
- jmteb/results/Classification/scores_massive_intent_classification.json +23 -0
- jmteb/results/Classification/scores_massive_scenario_classification.json +23 -0
- jmteb/results/Clustering/scores_livedoor_news.json +36 -0
- jmteb/results/Clustering/scores_mewsc16.json +36 -0
- jmteb/results/PairClassification/scores_paws_x_ja.json +41 -0
- jmteb/results/Reranking/scores_esci.json +31 -0
- jmteb/results/Retrieval/scores_jagovfaqs_22k.json +43 -0
- jmteb/results/Retrieval/scores_jaqket.json +43 -0
- jmteb/results/Retrieval/scores_mrtydi.json +43 -0
- jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
- jmteb/results/Retrieval/scores_nlp_journal_title_abs.json +43 -0
- jmteb/results/Retrieval/scores_nlp_journal_title_intro.json +43 -0
- jmteb/results/STS/scores_jsick.json +31 -0
- jmteb/results/STS/scores_jsts.json +31 -0
- jmteb/results/summary.json +62 -0
- jmteb/tasks/amazon_counterfactual_classification.jsonnet +32 -0
- jmteb/tasks/amazon_review_classification.jsonnet +32 -0
- jmteb/tasks/esci.jsonnet +33 -0
- jmteb/tasks/jagovfaqs_22k.jsonnet +33 -0
- jmteb/tasks/jaqket.jsonnet +33 -0
- jmteb/tasks/jsick.jsonnet +25 -0
- jmteb/tasks/jsts.jsonnet +25 -0
- jmteb/tasks/livedoor_news.jsonnet +24 -0
- jmteb/tasks/massive_intent_classification.jsonnet +32 -0
- jmteb/tasks/massive_scenario_classification.jsonnet +32 -0
- jmteb/tasks/mewsc16.jsonnet +24 -0
- jmteb/tasks/mrtydi.jsonnet +34 -0
- jmteb/tasks/nlp_journal_abs_intro.jsonnet +33 -0
- jmteb/tasks/nlp_journal_title_abs.jsonnet +33 -0
- jmteb/tasks/nlp_journal_title_intro.jsonnet +33 -0
- jmteb/tasks/paws_x_ja.jsonnet +25 -0
- model.safetensors +3 -0
- modules.json +20 -0
- mteb/models/__init__.py +10 -0
- mteb/models/default.py +4 -0
- mteb/models/retrieva.py +13 -0
- mteb/models/retrieva_en.py +15 -0
- mteb/mteb_eval.py +49 -0
- mteb/results/AmazonCounterfactualClassification.json +95 -0
- mteb/results/ArXivHierarchicalClusteringP2P.json +46 -0
- mteb/results/ArXivHierarchicalClusteringS2S.json +46 -0
- mteb/results/ArguAna.json +158 -0
- mteb/results/AskUbuntuDupQuestions.json +26 -0
- mteb/results/BIOSSES.json +26 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 512,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
config.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sbintuitions/modernbert-ja-130m",
|
3 |
+
"architectures": [
|
4 |
+
"ModernBertModel"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"classifier_activation": "gelu",
|
10 |
+
"classifier_bias": false,
|
11 |
+
"classifier_dropout": 0.0,
|
12 |
+
"classifier_pooling": "cls",
|
13 |
+
"cls_token_id": 6,
|
14 |
+
"decoder_bias": true,
|
15 |
+
"deterministic_flash_attn": false,
|
16 |
+
"embedding_dropout": 0.0,
|
17 |
+
"eos_token_id": 2,
|
18 |
+
"global_attn_every_n_layers": 3,
|
19 |
+
"global_rope_theta": 160000.0,
|
20 |
+
"gradient_checkpointing": false,
|
21 |
+
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 512,
|
23 |
+
"initializer_cutoff_factor": 2.0,
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"intermediate_size": 2048,
|
26 |
+
"layer_norm_eps": 1e-05,
|
27 |
+
"local_attention": 128,
|
28 |
+
"local_rope_theta": 10000.0,
|
29 |
+
"max_position_embeddings": 8192,
|
30 |
+
"mlp_bias": false,
|
31 |
+
"mlp_dropout": 0.0,
|
32 |
+
"model_type": "modernbert",
|
33 |
+
"norm_bias": false,
|
34 |
+
"norm_eps": 1e-05,
|
35 |
+
"num_attention_heads": 8,
|
36 |
+
"num_hidden_layers": 19,
|
37 |
+
"pad_token_id": 3,
|
38 |
+
"position_embedding_type": "rope",
|
39 |
+
"reference_compile": false,
|
40 |
+
"repad_logits_with_grad": false,
|
41 |
+
"sep_token_id": 4,
|
42 |
+
"sparse_pred_ignore_index": -100,
|
43 |
+
"sparse_prediction": false,
|
44 |
+
"torch_dtype": "bfloat16",
|
45 |
+
"transformers_version": "4.49.0",
|
46 |
+
"vocab_size": 102400
|
47 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.49.0",
|
5 |
+
"pytorch": "2.5.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"Retrieval-query": "関連した文書を探すために次の文を表現して\n",
|
9 |
+
"Retrieval-passage": "次の文章を表現して\n",
|
10 |
+
"default": "同じ意味の文を探すために次の文を表現して\n"
|
11 |
+
},
|
12 |
+
"default_prompt_name": "default",
|
13 |
+
"similarity_fn_name": "cosine"
|
14 |
+
}
|
jmteb/jmteb.jsonnet
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Classification
|
2 |
+
(import './tasks/amazon_review_classification.jsonnet') +
|
3 |
+
(import './tasks/amazon_counterfactual_classification.jsonnet') +
|
4 |
+
(import './tasks/massive_intent_classification.jsonnet') +
|
5 |
+
(import './tasks/massive_scenario_classification.jsonnet') +
|
6 |
+
// Clustering
|
7 |
+
(import './tasks/livedoor_news.jsonnet') +
|
8 |
+
(import './tasks/mewsc16.jsonnet') +
|
9 |
+
// STS
|
10 |
+
(import './tasks/jsts.jsonnet') +
|
11 |
+
(import './tasks/jsick.jsonnet') +
|
12 |
+
// Pair Classification
|
13 |
+
(import './tasks/paws_x_ja.jsonnet') +
|
14 |
+
// Retrieval
|
15 |
+
(import './tasks/jagovfaqs_22k.jsonnet') +
|
16 |
+
(import './tasks/mrtydi.jsonnet') +
|
17 |
+
(import './tasks/jaqket.jsonnet') +
|
18 |
+
(import './tasks/nlp_journal_title_abs.jsonnet') +
|
19 |
+
(import './tasks/nlp_journal_title_intro.jsonnet') +
|
20 |
+
(import './tasks/nlp_journal_abs_intro.jsonnet') +
|
21 |
+
// Reranking
|
22 |
+
(import './tasks/esci.jsonnet')
|
jmteb/results/Classification/scores_amazon_counterfactual_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.7725172847265871,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.9163090128755365,
|
9 |
+
"macro_f1": 0.694795707592322
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.9291845493562232,
|
13 |
+
"macro_f1": 0.7484418707366147
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.9336188436830836,
|
19 |
+
"macro_f1": 0.7725172847265871
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
jmteb/results/Classification/scores_amazon_review_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.580199599298548,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.4322,
|
9 |
+
"macro_f1": 0.4233916952995692
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.5912,
|
13 |
+
"macro_f1": 0.5856091084774833
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.5848,
|
19 |
+
"macro_f1": 0.580199599298548
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
jmteb/results/Classification/scores_massive_intent_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.7965072076950149,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.8066896212493851,
|
9 |
+
"macro_f1": 0.7856631206448443
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.8578455484505657,
|
13 |
+
"macro_f1": 0.8070559236000313
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.859448554135844,
|
19 |
+
"macro_f1": 0.7965072076950149
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
jmteb/results/Classification/scores_massive_scenario_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.8963544416714002,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.8745696015740285,
|
9 |
+
"macro_f1": 0.8702590755528099
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.9011313330054107,
|
13 |
+
"macro_f1": 0.894702776793441
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.8997982515131137,
|
19 |
+
"macro_f1": 0.8963544416714002
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
jmteb/results/Clustering/scores_livedoor_news.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "v_measure_score",
|
3 |
+
"metric_value": 0.5063220603646297,
|
4 |
+
"details": {
|
5 |
+
"optimal_clustering_model_name": "MiniBatchKMeans",
|
6 |
+
"val_scores": {
|
7 |
+
"MiniBatchKMeans": {
|
8 |
+
"v_measure_score": 0.5162254920293606,
|
9 |
+
"homogeneity_score": 0.5134942369453169,
|
10 |
+
"completeness_score": 0.5189859573522001
|
11 |
+
},
|
12 |
+
"AgglomerativeClustering": {
|
13 |
+
"v_measure_score": 0.4817740009110648,
|
14 |
+
"homogeneity_score": 0.47151381593215835,
|
15 |
+
"completeness_score": 0.4924906436335508
|
16 |
+
},
|
17 |
+
"BisectingKMeans": {
|
18 |
+
"v_measure_score": 0.4672899779727179,
|
19 |
+
"homogeneity_score": 0.4669032808035401,
|
20 |
+
"completeness_score": 0.4676773162110705
|
21 |
+
},
|
22 |
+
"Birch": {
|
23 |
+
"v_measure_score": 0.48380197476431497,
|
24 |
+
"homogeneity_score": 0.4759913341716145,
|
25 |
+
"completeness_score": 0.4918732245931716
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"test_scores": {
|
29 |
+
"MiniBatchKMeans": {
|
30 |
+
"v_measure_score": 0.5063220603646297,
|
31 |
+
"homogeneity_score": 0.5072769713955575,
|
32 |
+
"completeness_score": 0.5053707376763799
|
33 |
+
}
|
34 |
+
}
|
35 |
+
}
|
36 |
+
}
|
jmteb/results/Clustering/scores_mewsc16.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "v_measure_score",
|
3 |
+
"metric_value": 0.45462727073049025,
|
4 |
+
"details": {
|
5 |
+
"optimal_clustering_model_name": "Birch",
|
6 |
+
"val_scores": {
|
7 |
+
"MiniBatchKMeans": {
|
8 |
+
"v_measure_score": 0.3842466809312565,
|
9 |
+
"homogeneity_score": 0.4226378649485537,
|
10 |
+
"completeness_score": 0.3522493803950809
|
11 |
+
},
|
12 |
+
"AgglomerativeClustering": {
|
13 |
+
"v_measure_score": 0.4377888516765032,
|
14 |
+
"homogeneity_score": 0.4772841182693872,
|
15 |
+
"completeness_score": 0.40433049804468363
|
16 |
+
},
|
17 |
+
"BisectingKMeans": {
|
18 |
+
"v_measure_score": 0.40122932757998875,
|
19 |
+
"homogeneity_score": 0.43755915162610337,
|
20 |
+
"completeness_score": 0.3704698214056897
|
21 |
+
},
|
22 |
+
"Birch": {
|
23 |
+
"v_measure_score": 0.46841918020711176,
|
24 |
+
"homogeneity_score": 0.506935954769718,
|
25 |
+
"completeness_score": 0.4353420774727962
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"test_scores": {
|
29 |
+
"Birch": {
|
30 |
+
"v_measure_score": 0.45462727073049025,
|
31 |
+
"homogeneity_score": 0.4852439372487074,
|
32 |
+
"completeness_score": 0.42764484284900073
|
33 |
+
}
|
34 |
+
}
|
35 |
+
}
|
36 |
+
}
|
jmteb/results/PairClassification/scores_paws_x_ja.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "binary_f1",
|
3 |
+
"metric_value": 0.6403061224489796,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "euclidean_distances",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_distances": {
|
8 |
+
"accuracy": 0.5725,
|
9 |
+
"accuracy_threshold": -0.12011593580245972,
|
10 |
+
"binary_f1": 0.5979670522257273,
|
11 |
+
"binary_f1_threshold": 1.0
|
12 |
+
},
|
13 |
+
"manhatten_distances": {
|
14 |
+
"accuracy": 0.6505,
|
15 |
+
"accuracy_threshold": 5.353497505187988,
|
16 |
+
"binary_f1": 0.6209476309226932,
|
17 |
+
"binary_f1_threshold": 10.806973457336426
|
18 |
+
},
|
19 |
+
"euclidean_distances": {
|
20 |
+
"accuracy": 0.653,
|
21 |
+
"accuracy_threshold": 0.30937591195106506,
|
22 |
+
"binary_f1": 0.6221858370855505,
|
23 |
+
"binary_f1_threshold": 0.6454310417175293
|
24 |
+
},
|
25 |
+
"dot_similarities": {
|
26 |
+
"accuracy": 0.654,
|
27 |
+
"accuracy_threshold": 0.9534672498703003,
|
28 |
+
"binary_f1": 0.6213355048859935,
|
29 |
+
"binary_f1_threshold": 0.7887746095657349
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"euclidean_distances": {
|
34 |
+
"accuracy": 0.6085,
|
35 |
+
"accuracy_threshold": 0.30937591195106506,
|
36 |
+
"binary_f1": 0.6403061224489796,
|
37 |
+
"binary_f1_threshold": 0.6454310417175293
|
38 |
+
}
|
39 |
+
}
|
40 |
+
}
|
41 |
+
}
|
jmteb/results/Reranking/scores_esci.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.9326535342404364,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"ndcg@10": 0.9457387910643943,
|
9 |
+
"ndcg@20": 0.9574971659413306,
|
10 |
+
"ndcg@40": 0.9650000580183328
|
11 |
+
},
|
12 |
+
"dot_score": {
|
13 |
+
"ndcg@10": 0.9457186021682247,
|
14 |
+
"ndcg@20": 0.957477691902003,
|
15 |
+
"ndcg@40": 0.9649985588722803
|
16 |
+
},
|
17 |
+
"euclidean_distance": {
|
18 |
+
"ndcg@10": 0.9455185072273512,
|
19 |
+
"ndcg@20": 0.9573786712685046,
|
20 |
+
"ndcg@40": 0.9649006041448088
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"test_scores": {
|
24 |
+
"cosine_similarity": {
|
25 |
+
"ndcg@10": 0.9326535342404364,
|
26 |
+
"ndcg@20": 0.9490780973799793,
|
27 |
+
"ndcg@40": 0.9582022320743613
|
28 |
+
}
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
jmteb/results/Retrieval/scores_jagovfaqs_22k.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.7037560169204234,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "euclidean_distance",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.5621526762211173,
|
9 |
+
"accuracy@3": 0.7458321146534074,
|
10 |
+
"accuracy@5": 0.7987715706346885,
|
11 |
+
"accuracy@10": 0.8581456566247441,
|
12 |
+
"ndcg@10": 0.7123343291094127,
|
13 |
+
"mrr@10": 0.6653833386723116
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.5627376425855514,
|
17 |
+
"accuracy@3": 0.7461245978356245,
|
18 |
+
"accuracy@5": 0.7978941210880375,
|
19 |
+
"accuracy@10": 0.8587306229891781,
|
20 |
+
"ndcg@10": 0.712624535627044,
|
21 |
+
"mrr@10": 0.6656209232254854
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.5633226089499854,
|
25 |
+
"accuracy@3": 0.7455396314711904,
|
26 |
+
"accuracy@5": 0.7987715706346885,
|
27 |
+
"accuracy@10": 0.8578531734425271,
|
28 |
+
"ndcg@10": 0.7126592570676551,
|
29 |
+
"mrr@10": 0.6659067907166759
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"euclidean_distance": {
|
34 |
+
"accuracy@1": 0.5675438596491228,
|
35 |
+
"accuracy@3": 0.7292397660818714,
|
36 |
+
"accuracy@5": 0.7839181286549708,
|
37 |
+
"accuracy@10": 0.8403508771929824,
|
38 |
+
"ndcg@10": 0.7037560169204234,
|
39 |
+
"mrr@10": 0.660028311519539
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
jmteb/results/Retrieval/scores_jaqket.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.61373516847512,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.4562814070351759,
|
9 |
+
"accuracy@3": 0.6572864321608041,
|
10 |
+
"accuracy@5": 0.7045226130653266,
|
11 |
+
"accuracy@10": 0.7628140703517587,
|
12 |
+
"ndcg@10": 0.6130067237476119,
|
13 |
+
"mrr@10": 0.5645469410544787
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.457286432160804,
|
17 |
+
"accuracy@3": 0.6552763819095477,
|
18 |
+
"accuracy@5": 0.7055276381909548,
|
19 |
+
"accuracy@10": 0.7618090452261307,
|
20 |
+
"ndcg@10": 0.6127513989155836,
|
21 |
+
"mrr@10": 0.5645381670256041
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.4552763819095477,
|
25 |
+
"accuracy@3": 0.6572864321608041,
|
26 |
+
"accuracy@5": 0.7055276381909548,
|
27 |
+
"accuracy@10": 0.7628140703517587,
|
28 |
+
"ndcg@10": 0.612740626009351,
|
29 |
+
"mrr@10": 0.5641572944085506
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.4493480441323972,
|
35 |
+
"accuracy@3": 0.6288866599799399,
|
36 |
+
"accuracy@5": 0.7021063189568706,
|
37 |
+
"accuracy@10": 0.7903711133400201,
|
38 |
+
"ndcg@10": 0.61373516847512,
|
39 |
+
"mrr@10": 0.5578975020298991
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
jmteb/results/Retrieval/scores_mrtydi.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.365136589573548,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "dot_score",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.22629310344827586,
|
9 |
+
"accuracy@3": 0.4331896551724138,
|
10 |
+
"accuracy@5": 0.5064655172413793,
|
11 |
+
"accuracy@10": 0.59375,
|
12 |
+
"ndcg@10": 0.40613921488749105,
|
13 |
+
"mrr@10": 0.34660774151614676
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.22952586206896552,
|
17 |
+
"accuracy@3": 0.43211206896551724,
|
18 |
+
"accuracy@5": 0.5064655172413793,
|
19 |
+
"accuracy@10": 0.5948275862068966,
|
20 |
+
"ndcg@10": 0.4072380263572043,
|
21 |
+
"mrr@10": 0.34782686781609207
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.22629310344827586,
|
25 |
+
"accuracy@3": 0.4353448275862069,
|
26 |
+
"accuracy@5": 0.5075431034482759,
|
27 |
+
"accuracy@10": 0.5959051724137931,
|
28 |
+
"ndcg@10": 0.4071683076743109,
|
29 |
+
"mrr@10": 0.34731544198139036
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"dot_score": {
|
34 |
+
"accuracy@1": 0.23472222222222222,
|
35 |
+
"accuracy@3": 0.4027777777777778,
|
36 |
+
"accuracy@5": 0.4847222222222222,
|
37 |
+
"accuracy@10": 0.5861111111111111,
|
38 |
+
"ndcg@10": 0.365136589573548,
|
39 |
+
"mrr@10": 0.3406740520282186
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.9312261091012943,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.94,
|
9 |
+
"accuracy@3": 0.96,
|
10 |
+
"accuracy@5": 0.97,
|
11 |
+
"accuracy@10": 0.98,
|
12 |
+
"ndcg@10": 0.9589493964497818,
|
13 |
+
"mrr@10": 0.9522619047619049
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.94,
|
17 |
+
"accuracy@3": 0.96,
|
18 |
+
"accuracy@5": 0.97,
|
19 |
+
"accuracy@10": 0.98,
|
20 |
+
"ndcg@10": 0.9589493964497818,
|
21 |
+
"mrr@10": 0.9522619047619049
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.94,
|
25 |
+
"accuracy@3": 0.96,
|
26 |
+
"accuracy@5": 0.97,
|
27 |
+
"accuracy@10": 0.98,
|
28 |
+
"ndcg@10": 0.9589493964497818,
|
29 |
+
"mrr@10": 0.9522619047619049
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.8762376237623762,
|
35 |
+
"accuracy@3": 0.9455445544554455,
|
36 |
+
"accuracy@5": 0.9678217821782178,
|
37 |
+
"accuracy@10": 0.9826732673267327,
|
38 |
+
"ndcg@10": 0.9312261091012943,
|
39 |
+
"mrr@10": 0.914452695269527
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
jmteb/results/Retrieval/scores_nlp_journal_title_abs.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.9660376057905824,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.92,
|
9 |
+
"accuracy@3": 0.98,
|
10 |
+
"accuracy@5": 0.99,
|
11 |
+
"accuracy@10": 1.0,
|
12 |
+
"ndcg@10": 0.964415325130387,
|
13 |
+
"mrr@10": 0.9525000000000001
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.91,
|
17 |
+
"accuracy@3": 0.98,
|
18 |
+
"accuracy@5": 0.99,
|
19 |
+
"accuracy@10": 1.0,
|
20 |
+
"ndcg@10": 0.9607246226661015,
|
21 |
+
"mrr@10": 0.9475000000000001
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.92,
|
25 |
+
"accuracy@3": 0.98,
|
26 |
+
"accuracy@5": 0.99,
|
27 |
+
"accuracy@10": 1.0,
|
28 |
+
"ndcg@10": 0.964415325130387,
|
29 |
+
"mrr@10": 0.9525000000000001
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.9331683168316832,
|
35 |
+
"accuracy@3": 0.9727722772277227,
|
36 |
+
"accuracy@5": 0.995049504950495,
|
37 |
+
"accuracy@10": 0.9975247524752475,
|
38 |
+
"ndcg@10": 0.9660376057905824,
|
39 |
+
"mrr@10": 0.9556930693069304
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
jmteb/results/Retrieval/scores_nlp_journal_title_intro.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.8239125997597097,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.79,
|
9 |
+
"accuracy@3": 0.87,
|
10 |
+
"accuracy@5": 0.92,
|
11 |
+
"accuracy@10": 0.94,
|
12 |
+
"ndcg@10": 0.8625804492635228,
|
13 |
+
"mrr@10": 0.8376666666666667
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.79,
|
17 |
+
"accuracy@3": 0.87,
|
18 |
+
"accuracy@5": 0.92,
|
19 |
+
"accuracy@10": 0.94,
|
20 |
+
"ndcg@10": 0.8625804492635228,
|
21 |
+
"mrr@10": 0.8376666666666667
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.79,
|
25 |
+
"accuracy@3": 0.86,
|
26 |
+
"accuracy@5": 0.92,
|
27 |
+
"accuracy@10": 0.94,
|
28 |
+
"ndcg@10": 0.8618872148442569,
|
29 |
+
"mrr@10": 0.8368333333333333
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.7153465346534653,
|
35 |
+
"accuracy@3": 0.8589108910891089,
|
36 |
+
"accuracy@5": 0.8910891089108911,
|
37 |
+
"accuracy@10": 0.9183168316831684,
|
38 |
+
"ndcg@10": 0.8239125997597097,
|
39 |
+
"mrr@10": 0.7929278642149928
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
jmteb/results/STS/scores_jsick.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "spearman",
|
3 |
+
"metric_value": 0.7228774332572527,
|
4 |
+
"details": {
|
5 |
+
"optimal_similarity_metric": "dot_score",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"pearson": 0.7787902785127275,
|
9 |
+
"spearman": 0.7463203269165151
|
10 |
+
},
|
11 |
+
"manhatten_distance": {
|
12 |
+
"pearson": 0.7696693385531954,
|
13 |
+
"spearman": 0.73658437615329
|
14 |
+
},
|
15 |
+
"euclidean_distance": {
|
16 |
+
"pearson": 0.7696693385531954,
|
17 |
+
"spearman": 0.73658437615329
|
18 |
+
},
|
19 |
+
"dot_score": {
|
20 |
+
"pearson": 0.7788735665737948,
|
21 |
+
"spearman": 0.7464361803709411
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"test_scores": {
|
25 |
+
"dot_score": {
|
26 |
+
"pearson": 0.7579853118271599,
|
27 |
+
"spearman": 0.7228774332572527
|
28 |
+
}
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
jmteb/results/STS/scores_jsts.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "spearman",
|
3 |
+
"metric_value": 0.8334009117886643,
|
4 |
+
"details": {
|
5 |
+
"optimal_similarity_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"pearson": 0.8575300900249121,
|
9 |
+
"spearman": 0.8130300377229739
|
10 |
+
},
|
11 |
+
"manhatten_distance": {
|
12 |
+
"pearson": 0.8516427332926884,
|
13 |
+
"spearman": 0.8129478229587098
|
14 |
+
},
|
15 |
+
"euclidean_distance": {
|
16 |
+
"pearson": 0.8516427332926884,
|
17 |
+
"spearman": 0.8129478229587098
|
18 |
+
},
|
19 |
+
"dot_score": {
|
20 |
+
"pearson": 0.8575076226058185,
|
21 |
+
"spearman": 0.8130035755745093
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"test_scores": {
|
25 |
+
"cosine_similarity": {
|
26 |
+
"pearson": 0.8764356710949792,
|
27 |
+
"spearman": 0.8334009117886643
|
28 |
+
}
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
jmteb/results/summary.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Classification": {
|
3 |
+
"amazon_counterfactual_classification": {
|
4 |
+
"macro_f1": 0.7725172847265871
|
5 |
+
},
|
6 |
+
"amazon_review_classification": {
|
7 |
+
"macro_f1": 0.580199599298548
|
8 |
+
},
|
9 |
+
"massive_intent_classification": {
|
10 |
+
"macro_f1": 0.7965072076950149
|
11 |
+
},
|
12 |
+
"massive_scenario_classification": {
|
13 |
+
"macro_f1": 0.8963544416714002
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"Reranking": {
|
17 |
+
"esci": {
|
18 |
+
"ndcg@10": 0.9326535342404364
|
19 |
+
}
|
20 |
+
},
|
21 |
+
"Retrieval": {
|
22 |
+
"jagovfaqs_22k": {
|
23 |
+
"ndcg@10": 0.7037560169204234
|
24 |
+
},
|
25 |
+
"jaqket": {
|
26 |
+
"ndcg@10": 0.61373516847512
|
27 |
+
},
|
28 |
+
"mrtydi": {
|
29 |
+
"ndcg@10": 0.365136589573548
|
30 |
+
},
|
31 |
+
"nlp_journal_abs_intro": {
|
32 |
+
"ndcg@10": 0.9312261091012943
|
33 |
+
},
|
34 |
+
"nlp_journal_title_abs": {
|
35 |
+
"ndcg@10": 0.9660376057905824
|
36 |
+
},
|
37 |
+
"nlp_journal_title_intro": {
|
38 |
+
"ndcg@10": 0.8239125997597097
|
39 |
+
}
|
40 |
+
},
|
41 |
+
"STS": {
|
42 |
+
"jsick": {
|
43 |
+
"spearman": 0.7228774332572527
|
44 |
+
},
|
45 |
+
"jsts": {
|
46 |
+
"spearman": 0.8334009117886643
|
47 |
+
}
|
48 |
+
},
|
49 |
+
"Clustering": {
|
50 |
+
"livedoor_news": {
|
51 |
+
"v_measure_score": 0.5063220603646297
|
52 |
+
},
|
53 |
+
"mewsc16": {
|
54 |
+
"v_measure_score": 0.45462727073049025
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"PairClassification": {
|
58 |
+
"paws_x_ja": {
|
59 |
+
"binary_f1": 0.6403061224489796
|
60 |
+
}
|
61 |
+
}
|
62 |
+
}
|
jmteb/tasks/amazon_counterfactual_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
amazon_counterfactual_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
train_dataset: {
|
6 |
+
class_path: 'HfClassificationDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'train',
|
10 |
+
name: 'amazon_counterfactual_classification',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
val_dataset: {
|
14 |
+
class_path: 'HfClassificationDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'validation',
|
18 |
+
name: 'amazon_counterfactual_classification',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
test_dataset: {
|
22 |
+
class_path: 'HfClassificationDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'test',
|
26 |
+
name: 'amazon_counterfactual_classification',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
prefix: '同じクラスに属する文を探すために次の文を表現して\n',
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb/tasks/amazon_review_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
amazon_review_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
train_dataset: {
|
6 |
+
class_path: 'HfClassificationDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'train',
|
10 |
+
name: 'amazon_review_classification',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
val_dataset: {
|
14 |
+
class_path: 'HfClassificationDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'validation',
|
18 |
+
name: 'amazon_review_classification',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
test_dataset: {
|
22 |
+
class_path: 'HfClassificationDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'test',
|
26 |
+
name: 'amazon_review_classification',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
prefix: '同じクラスに属する文を探すために次の文を表現して\n',
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb/tasks/esci.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
esci: {
|
3 |
+
class_path: 'RerankingEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_query_dataset: {
|
6 |
+
class_path: 'HfRerankingQueryDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'esci-query',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_query_dataset: {
|
14 |
+
class_path: 'HfRerankingQueryDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'esci-query',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
doc_dataset: {
|
22 |
+
class_path: 'HfRerankingDocDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'corpus',
|
26 |
+
name: 'esci-corpus',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
query_prefix: '関連した文書を探すために次の文を表現して\n',
|
30 |
+
doc_prefix: '次の文章を表現して\n',
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb/tasks/jagovfaqs_22k.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jagovfaqs_22k: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_query_dataset: {
|
6 |
+
class_path: 'HfRetrievalQueryDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'jagovfaqs_22k-query',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_query_dataset: {
|
14 |
+
class_path: 'HfRetrievalQueryDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'jagovfaqs_22k-query',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
doc_dataset: {
|
22 |
+
class_path: 'HfRetrievalDocDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'corpus',
|
26 |
+
name: 'jagovfaqs_22k-corpus',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
query_prefix: '関連した文書を探すために次の文を表現して\n',
|
30 |
+
doc_prefix: '次の文章を表現して\n',
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb/tasks/jaqket.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jaqket: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_query_dataset: {
|
6 |
+
class_path: 'HfRetrievalQueryDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'jaqket-query',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_query_dataset: {
|
14 |
+
class_path: 'HfRetrievalQueryDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'jaqket-query',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
doc_dataset: {
|
22 |
+
class_path: 'HfRetrievalDocDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'corpus',
|
26 |
+
name: 'jaqket-corpus',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
query_prefix: '関連した文書を探すために次の文を表現して\n',
|
30 |
+
doc_prefix: '次の文章を表現して\n',
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb/tasks/jsick.jsonnet
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jsick: {
|
3 |
+
class_path: 'STSEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_dataset: {
|
6 |
+
class_path: 'HfSTSDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'jsick',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_dataset: {
|
14 |
+
class_path: 'HfSTSDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'jsick',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
|
22 |
+
sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
|
23 |
+
},
|
24 |
+
},
|
25 |
+
}
|
jmteb/tasks/jsts.jsonnet
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jsts: {
|
3 |
+
class_path: 'STSEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_dataset: {
|
6 |
+
class_path: 'HfSTSDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'train',
|
10 |
+
name: 'jsts',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_dataset: {
|
14 |
+
class_path: 'HfSTSDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'jsts',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
|
22 |
+
sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
|
23 |
+
},
|
24 |
+
},
|
25 |
+
}
|
jmteb/tasks/livedoor_news.jsonnet
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
livedoor_news: {
|
3 |
+
class_path: 'ClusteringEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_dataset: {
|
6 |
+
class_path: 'HfClusteringDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'livedoor_news',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_dataset: {
|
14 |
+
class_path: 'HfClusteringDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'livedoor_news',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
prefix: '類似した文を探すために次の文を表現して\n',
|
22 |
+
},
|
23 |
+
},
|
24 |
+
}
|
jmteb/tasks/massive_intent_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
massive_intent_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
train_dataset: {
|
6 |
+
class_path: 'HfClassificationDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'train',
|
10 |
+
name: 'massive_intent_classification',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
val_dataset: {
|
14 |
+
class_path: 'HfClassificationDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'validation',
|
18 |
+
name: 'massive_intent_classification',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
test_dataset: {
|
22 |
+
class_path: 'HfClassificationDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'test',
|
26 |
+
name: 'massive_intent_classification',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
prefix: '同じクラスに属する文を探すために次の文を表現して\n',
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb/tasks/massive_scenario_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
massive_scenario_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
train_dataset: {
|
6 |
+
class_path: 'HfClassificationDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'train',
|
10 |
+
name: 'massive_scenario_classification',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
val_dataset: {
|
14 |
+
class_path: 'HfClassificationDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'validation',
|
18 |
+
name: 'massive_scenario_classification',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
test_dataset: {
|
22 |
+
class_path: 'HfClassificationDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'test',
|
26 |
+
name: 'massive_scenario_classification',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
prefix: '同じクラスに属する文を探すために次の文を表現して\n',
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb/tasks/mewsc16.jsonnet
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
mewsc16: {
|
3 |
+
class_path: 'ClusteringEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_dataset: {
|
6 |
+
class_path: 'HfClusteringDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'mewsc16_ja',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_dataset: {
|
14 |
+
class_path: 'HfClusteringDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'mewsc16_ja',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
prefix: '類似した文を探すために次の文を表現して\n',
|
22 |
+
},
|
23 |
+
},
|
24 |
+
}
|
jmteb/tasks/mrtydi.jsonnet
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
mrtydi: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_query_dataset: {
|
6 |
+
class_path: 'HfRetrievalQueryDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'mrtydi-query',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_query_dataset: {
|
14 |
+
class_path: 'HfRetrievalQueryDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'mrtydi-query',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
doc_dataset: {
|
22 |
+
class_path: 'HfRetrievalDocDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'corpus',
|
26 |
+
name: 'mrtydi-corpus',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
"doc_chunk_size":10000,
|
30 |
+
query_prefix: '関連した文書を探すために次の文を表現して\n',
|
31 |
+
doc_prefix: '次の文章を表現して\n',
|
32 |
+
},
|
33 |
+
},
|
34 |
+
}
|
jmteb/tasks/nlp_journal_abs_intro.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
nlp_journal_abs_intro: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_query_dataset: {
|
6 |
+
class_path: 'HfRetrievalQueryDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'nlp_journal_abs_intro-query',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_query_dataset: {
|
14 |
+
class_path: 'HfRetrievalQueryDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'nlp_journal_abs_intro-query',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
doc_dataset: {
|
22 |
+
class_path: 'HfRetrievalDocDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'corpus',
|
26 |
+
name: 'nlp_journal_abs_intro-corpus',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
query_prefix: '関連した文書を探すために次の文を表現して\n',
|
30 |
+
doc_prefix: '次の文章を表現して\n',
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb/tasks/nlp_journal_title_abs.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
nlp_journal_title_abs: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_query_dataset: {
|
6 |
+
class_path: 'HfRetrievalQueryDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'nlp_journal_title_abs-query',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_query_dataset: {
|
14 |
+
class_path: 'HfRetrievalQueryDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'nlp_journal_title_abs-query',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
doc_dataset: {
|
22 |
+
class_path: 'HfRetrievalDocDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'corpus',
|
26 |
+
name: 'nlp_journal_title_abs-corpus',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
query_prefix: '関連した文書を探すために次の文を表現して\n',
|
30 |
+
doc_prefix: '次の文章を表現して\n',
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb/tasks/nlp_journal_title_intro.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
nlp_journal_title_intro: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_query_dataset: {
|
6 |
+
class_path: 'HfRetrievalQueryDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'nlp_journal_title_intro-query',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_query_dataset: {
|
14 |
+
class_path: 'HfRetrievalQueryDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'nlp_journal_title_intro-query',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
doc_dataset: {
|
22 |
+
class_path: 'HfRetrievalDocDataset',
|
23 |
+
init_args: {
|
24 |
+
path: 'sbintuitions/JMTEB',
|
25 |
+
split: 'corpus',
|
26 |
+
name: 'nlp_journal_title_intro-corpus',
|
27 |
+
},
|
28 |
+
},
|
29 |
+
query_prefix: '関連した文書を探すために次の文を表現して\n',
|
30 |
+
doc_prefix: '次の文章を表現して\n',
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb/tasks/paws_x_ja.jsonnet
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
paws_x_ja: {
|
3 |
+
class_path: 'PairClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
val_dataset: {
|
6 |
+
class_path: 'HfPairClassificationDataset',
|
7 |
+
init_args: {
|
8 |
+
path: 'sbintuitions/JMTEB',
|
9 |
+
split: 'validation',
|
10 |
+
name: 'paws_x_ja',
|
11 |
+
},
|
12 |
+
},
|
13 |
+
test_dataset: {
|
14 |
+
class_path: 'HfPairClassificationDataset',
|
15 |
+
init_args: {
|
16 |
+
path: 'sbintuitions/JMTEB',
|
17 |
+
split: 'test',
|
18 |
+
name: 'paws_x_ja',
|
19 |
+
},
|
20 |
+
},
|
21 |
+
sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
|
22 |
+
sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
|
23 |
+
},
|
24 |
+
},
|
25 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d43aafff5dda1ece7a456d98bbc22f404a33ca569c56656fbc8a9b6ab5c8beb4
|
3 |
+
size 264292496
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
mteb/models/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .default import PROMPT as default_prompt
|
2 |
+
from .retrieva import PROMPT as retrieva_prompt
|
3 |
+
from .retrieva_en import PROMPT as retrieva_en_prompt
|
4 |
+
|
5 |
+
|
6 |
+
PROMPTS = {
|
7 |
+
"default": default_prompt,
|
8 |
+
"retrieva": retrieva_prompt,
|
9 |
+
"retrieva-en": retrieva_en_prompt,
|
10 |
+
}
|
mteb/models/default.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROMPT = {
|
2 |
+
"query": "query: ",
|
3 |
+
"passage": "passage: ",
|
4 |
+
}
|
mteb/models/retrieva.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROMPT = {
|
2 |
+
"STS": "同じ意味の文を探すために次の文を表現して\n",
|
3 |
+
"Summarization": "次の記事またはタイトルを表現して\n",
|
4 |
+
"BitextMining": "次の文を表現して\n",
|
5 |
+
"Classification": "同じクラスに属する文を探すために次の文を表現して\n",
|
6 |
+
"Clustering": "類似した文を探すために次の文を表現して\n",
|
7 |
+
"Reranking-query": "関連した文書を探すために次の文を表現して\n",
|
8 |
+
"Reranking-passage": "次の文章を表現して\n",
|
9 |
+
"Retrieval-query": "関連した文書を探すために次の文を表現して\n",
|
10 |
+
"Retrieval-passage": "次の文章を表現して\n",
|
11 |
+
"InstructionRetrieval": "",
|
12 |
+
"PairClassification": "同じ意味の文を探すために次の文を表現して\n",
|
13 |
+
}
|
mteb/models/retrieva_en.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROMPT = {
|
2 |
+
"STS": "Represent the sentence for retrieving duplicate sentences:\n",
|
3 |
+
"Summarization": "Represent the news article or news title for retrieval:\n",
|
4 |
+
"BitextMining": "Represent the sentence\n",
|
5 |
+
"Classification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
|
6 |
+
"Clustering": "Represent the sentence to find similar sentences:\n",
|
7 |
+
"Reranking-query": "Represent the question:\n",
|
8 |
+
"Reranking-passage": "Represent the following text:\n",
|
9 |
+
"Retrieval-query": "Represent the question:\n",
|
10 |
+
"Retrieval-passage": "Represent the following text:\n",
|
11 |
+
"InstructionRetrieval": "Retrieve text based on user query:\n",
|
12 |
+
"PairClassification": "Represent the sentence for retrieving duplicate sentences:\n",
|
13 |
+
"MultilabelClassification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
|
14 |
+
"Speed": "",
|
15 |
+
}
|
mteb/mteb_eval.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Evaluate AMBER models"""
|
2 |
+
|
3 |
+
import argparse
|
4 |
+
|
5 |
+
import mteb
|
6 |
+
|
7 |
+
from models import PROMPTS
|
8 |
+
|
9 |
+
BENCHMARKS = {
|
10 |
+
"en": "MTEB(eng, v2)",
|
11 |
+
"ja": "MTEB(jpn, v1)",
|
12 |
+
}
|
13 |
+
|
14 |
+
|
15 |
+
def get_args() -> argparse.Namespace:
|
16 |
+
parser = argparse.ArgumentParser()
|
17 |
+
parser.add_argument("--model_type", type=str, required=True, help="Model name", choices=PROMPTS.keys())
|
18 |
+
parser.add_argument("--model_name_or_path", type=str, required=True)
|
19 |
+
parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
|
20 |
+
parser.add_argument("--output_dir", type=str, required=True, help="Output directory")
|
21 |
+
parser.add_argument("--benchmark", type=str, required=True, choices=BENCHMARKS.keys())
|
22 |
+
parser.add_argument("--corpus_chunk_size", type=int, default=50000)
|
23 |
+
parser.add_argument("--convert_to_tensor", action="store_true")
|
24 |
+
return parser.parse_args()
|
25 |
+
|
26 |
+
|
27 |
+
def main():
|
28 |
+
args = get_args()
|
29 |
+
prompt = PROMPTS[args.model_type]
|
30 |
+
model = mteb.get_model(args.model_name_or_path, model_prompts=prompt)
|
31 |
+
|
32 |
+
tasks = mteb.get_benchmark(BENCHMARKS[args.benchmark])
|
33 |
+
evaluation = mteb.MTEB(tasks=tasks)
|
34 |
+
|
35 |
+
encode_kwargs = {
|
36 |
+
"batch_size": args.batch_size,
|
37 |
+
"convert_to_tensor": args.convert_to_tensor,
|
38 |
+
}
|
39 |
+
|
40 |
+
evaluation.run(
|
41 |
+
model,
|
42 |
+
output_folder=args.output_dir,
|
43 |
+
encode_kwargs=encode_kwargs,
|
44 |
+
corpus_chunk_size=args.corpus_chunk_size,
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
main()
|
mteb/results/AmazonCounterfactualClassification.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_revision": "e8379541af4e31359cca9fbcf4b00f2671dba205",
|
3 |
+
"task_name": "AmazonCounterfactualClassification",
|
4 |
+
"mteb_version": "1.36.1",
|
5 |
+
"scores": {
|
6 |
+
"test": [
|
7 |
+
{
|
8 |
+
"accuracy": 0.681642,
|
9 |
+
"f1": 0.619811,
|
10 |
+
"f1_weighted": 0.712157,
|
11 |
+
"ap": 0.306541,
|
12 |
+
"ap_weighted": 0.306541,
|
13 |
+
"scores_per_experiment": [
|
14 |
+
{
|
15 |
+
"accuracy": 0.676119,
|
16 |
+
"f1": 0.613878,
|
17 |
+
"f1_weighted": 0.708281,
|
18 |
+
"ap": 0.297508,
|
19 |
+
"ap_weighted": 0.297508
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"accuracy": 0.725373,
|
23 |
+
"f1": 0.651787,
|
24 |
+
"f1_weighted": 0.749265,
|
25 |
+
"ap": 0.324443,
|
26 |
+
"ap_weighted": 0.324443
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"accuracy": 0.598507,
|
30 |
+
"f1": 0.56537,
|
31 |
+
"f1_weighted": 0.638451,
|
32 |
+
"ap": 0.285849,
|
33 |
+
"ap_weighted": 0.285849
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"accuracy": 0.69403,
|
37 |
+
"f1": 0.641244,
|
38 |
+
"f1_weighted": 0.725044,
|
39 |
+
"ap": 0.333082,
|
40 |
+
"ap_weighted": 0.333082
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"accuracy": 0.683582,
|
44 |
+
"f1": 0.632402,
|
45 |
+
"f1_weighted": 0.715928,
|
46 |
+
"ap": 0.32599,
|
47 |
+
"ap_weighted": 0.32599
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"accuracy": 0.677612,
|
51 |
+
"f1": 0.609393,
|
52 |
+
"f1_weighted": 0.708798,
|
53 |
+
"ap": 0.288755,
|
54 |
+
"ap_weighted": 0.288755
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"accuracy": 0.755224,
|
58 |
+
"f1": 0.67807,
|
59 |
+
"f1_weighted": 0.774042,
|
60 |
+
"ap": 0.349254,
|
61 |
+
"ap_weighted": 0.349254
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"accuracy": 0.735821,
|
65 |
+
"f1": 0.658379,
|
66 |
+
"f1_weighted": 0.757427,
|
67 |
+
"ap": 0.328302,
|
68 |
+
"ap_weighted": 0.328302
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"accuracy": 0.665672,
|
72 |
+
"f1": 0.605319,
|
73 |
+
"f1_weighted": 0.699304,
|
74 |
+
"ap": 0.291461,
|
75 |
+
"ap_weighted": 0.291461
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"accuracy": 0.604478,
|
79 |
+
"f1": 0.542268,
|
80 |
+
"f1_weighted": 0.645027,
|
81 |
+
"ap": 0.240767,
|
82 |
+
"ap_weighted": 0.240767
|
83 |
+
}
|
84 |
+
],
|
85 |
+
"main_score": 0.681642,
|
86 |
+
"hf_subset": "en",
|
87 |
+
"languages": [
|
88 |
+
"eng-Latn"
|
89 |
+
]
|
90 |
+
}
|
91 |
+
]
|
92 |
+
},
|
93 |
+
"evaluation_time": 10.654787302017212,
|
94 |
+
"kg_co2_emissions": null
|
95 |
+
}
|
mteb/results/ArXivHierarchicalClusteringP2P.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_revision": "0bbdb47bcbe3a90093699aefeed338a0f28a7ee8",
|
3 |
+
"task_name": "ArXivHierarchicalClusteringP2P",
|
4 |
+
"mteb_version": "1.36.1",
|
5 |
+
"scores": {
|
6 |
+
"test": [
|
7 |
+
{
|
8 |
+
"v_measures": {
|
9 |
+
"Level 0": [
|
10 |
+
0.507858,
|
11 |
+
0.514541,
|
12 |
+
0.544659,
|
13 |
+
0.512061,
|
14 |
+
0.542216,
|
15 |
+
0.524017,
|
16 |
+
0.49424,
|
17 |
+
0.536251,
|
18 |
+
0.558261,
|
19 |
+
0.55925
|
20 |
+
],
|
21 |
+
"Level 1": [
|
22 |
+
0.590244,
|
23 |
+
0.610952,
|
24 |
+
0.551394,
|
25 |
+
0.585449,
|
26 |
+
0.58945,
|
27 |
+
0.581477,
|
28 |
+
0.581684,
|
29 |
+
0.570883,
|
30 |
+
0.574588,
|
31 |
+
0.60155
|
32 |
+
]
|
33 |
+
},
|
34 |
+
"v_measure": 0.556551,
|
35 |
+
"v_measure_std": 0.032919,
|
36 |
+
"main_score": 0.556551,
|
37 |
+
"hf_subset": "default",
|
38 |
+
"languages": [
|
39 |
+
"eng-Latn"
|
40 |
+
]
|
41 |
+
}
|
42 |
+
]
|
43 |
+
},
|
44 |
+
"evaluation_time": 6.287527084350586,
|
45 |
+
"kg_co2_emissions": null
|
46 |
+
}
|
mteb/results/ArXivHierarchicalClusteringS2S.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_revision": "b73bd54100e5abfa6e3a23dcafb46fe4d2438dc3",
|
3 |
+
"task_name": "ArXivHierarchicalClusteringS2S",
|
4 |
+
"mteb_version": "1.36.1",
|
5 |
+
"scores": {
|
6 |
+
"test": [
|
7 |
+
{
|
8 |
+
"v_measures": {
|
9 |
+
"Level 0": [
|
10 |
+
0.502992,
|
11 |
+
0.485244,
|
12 |
+
0.477917,
|
13 |
+
0.495744,
|
14 |
+
0.477376,
|
15 |
+
0.539086,
|
16 |
+
0.544394,
|
17 |
+
0.547524,
|
18 |
+
0.522253,
|
19 |
+
0.518454
|
20 |
+
],
|
21 |
+
"Level 1": [
|
22 |
+
0.563453,
|
23 |
+
0.553718,
|
24 |
+
0.559457,
|
25 |
+
0.564745,
|
26 |
+
0.538357,
|
27 |
+
0.579393,
|
28 |
+
0.548295,
|
29 |
+
0.573239,
|
30 |
+
0.562891,
|
31 |
+
0.575329
|
32 |
+
]
|
33 |
+
},
|
34 |
+
"v_measure": 0.536493,
|
35 |
+
"v_measure_std": 0.032359,
|
36 |
+
"main_score": 0.536493,
|
37 |
+
"hf_subset": "default",
|
38 |
+
"languages": [
|
39 |
+
"eng-Latn"
|
40 |
+
]
|
41 |
+
}
|
42 |
+
]
|
43 |
+
},
|
44 |
+
"evaluation_time": 5.862490892410278,
|
45 |
+
"kg_co2_emissions": null
|
46 |
+
}
|
mteb/results/ArguAna.json
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_revision": "c22ab2a51041ffd869aaddef7af8d8215647e41a",
|
3 |
+
"task_name": "ArguAna",
|
4 |
+
"mteb_version": "1.36.1",
|
5 |
+
"scores": {
|
6 |
+
"test": [
|
7 |
+
{
|
8 |
+
"ndcg_at_1": 0.25249,
|
9 |
+
"ndcg_at_3": 0.38056,
|
10 |
+
"ndcg_at_5": 0.43124,
|
11 |
+
"ndcg_at_10": 0.48068,
|
12 |
+
"ndcg_at_20": 0.51461,
|
13 |
+
"ndcg_at_100": 0.53158,
|
14 |
+
"ndcg_at_1000": 0.5338,
|
15 |
+
"map_at_1": 0.25249,
|
16 |
+
"map_at_3": 0.34803,
|
17 |
+
"map_at_5": 0.37598,
|
18 |
+
"map_at_10": 0.39611,
|
19 |
+
"map_at_20": 0.40569,
|
20 |
+
"map_at_100": 0.40821,
|
21 |
+
"map_at_1000": 0.4083,
|
22 |
+
"recall_at_1": 0.25249,
|
23 |
+
"recall_at_3": 0.47511,
|
24 |
+
"recall_at_5": 0.59886,
|
25 |
+
"recall_at_10": 0.7532,
|
26 |
+
"recall_at_20": 0.88549,
|
27 |
+
"recall_at_100": 0.9744,
|
28 |
+
"recall_at_1000": 0.99147,
|
29 |
+
"precision_at_1": 0.25249,
|
30 |
+
"precision_at_3": 0.15837,
|
31 |
+
"precision_at_5": 0.11977,
|
32 |
+
"precision_at_10": 0.07532,
|
33 |
+
"precision_at_20": 0.04427,
|
34 |
+
"precision_at_100": 0.00974,
|
35 |
+
"precision_at_1000": 0.00099,
|
36 |
+
"mrr_at_1": 0.258179,
|
37 |
+
"mrr_at_3": 0.349692,
|
38 |
+
"mrr_at_5": 0.377928,
|
39 |
+
"mrr_at_10": 0.398238,
|
40 |
+
"mrr_at_20": 0.407844,
|
41 |
+
"mrr_at_100": 0.410403,
|
42 |
+
"mrr_at_1000": 0.410495,
|
43 |
+
"nauc_ndcg_at_1_max": -0.026569,
|
44 |
+
"nauc_ndcg_at_1_std": -0.024726,
|
45 |
+
"nauc_ndcg_at_1_diff1": 0.102597,
|
46 |
+
"nauc_ndcg_at_3_max": -0.008151,
|
47 |
+
"nauc_ndcg_at_3_std": -0.033642,
|
48 |
+
"nauc_ndcg_at_3_diff1": 0.078841,
|
49 |
+
"nauc_ndcg_at_5_max": -0.003906,
|
50 |
+
"nauc_ndcg_at_5_std": -0.024619,
|
51 |
+
"nauc_ndcg_at_5_diff1": 0.07558,
|
52 |
+
"nauc_ndcg_at_10_max": 0.010935,
|
53 |
+
"nauc_ndcg_at_10_std": -0.018625,
|
54 |
+
"nauc_ndcg_at_10_diff1": 0.080503,
|
55 |
+
"nauc_ndcg_at_20_max": 0.013164,
|
56 |
+
"nauc_ndcg_at_20_std": -0.013407,
|
57 |
+
"nauc_ndcg_at_20_diff1": 0.078992,
|
58 |
+
"nauc_ndcg_at_100_max": 0.008316,
|
59 |
+
"nauc_ndcg_at_100_std": -0.008725,
|
60 |
+
"nauc_ndcg_at_100_diff1": 0.085633,
|
61 |
+
"nauc_ndcg_at_1000_max": 0.0045,
|
62 |
+
"nauc_ndcg_at_1000_std": -0.014357,
|
63 |
+
"nauc_ndcg_at_1000_diff1": 0.084438,
|
64 |
+
"nauc_map_at_1_max": -0.026569,
|
65 |
+
"nauc_map_at_1_std": -0.024726,
|
66 |
+
"nauc_map_at_1_diff1": 0.102597,
|
67 |
+
"nauc_map_at_3_max": -0.013567,
|
68 |
+
"nauc_map_at_3_std": -0.03222,
|
69 |
+
"nauc_map_at_3_diff1": 0.083557,
|
70 |
+
"nauc_map_at_5_max": -0.01162,
|
71 |
+
"nauc_map_at_5_std": -0.027384,
|
72 |
+
"nauc_map_at_5_diff1": 0.081184,
|
73 |
+
"nauc_map_at_10_max": -0.00615,
|
74 |
+
"nauc_map_at_10_std": -0.025394,
|
75 |
+
"nauc_map_at_10_diff1": 0.082831,
|
76 |
+
"nauc_map_at_20_max": -0.005492,
|
77 |
+
"nauc_map_at_20_std": -0.024076,
|
78 |
+
"nauc_map_at_20_diff1": 0.08281,
|
79 |
+
"nauc_map_at_100_max": -0.006049,
|
80 |
+
"nauc_map_at_100_std": -0.02356,
|
81 |
+
"nauc_map_at_100_diff1": 0.083933,
|
82 |
+
"nauc_map_at_1000_max": -0.006154,
|
83 |
+
"nauc_map_at_1000_std": -0.02373,
|
84 |
+
"nauc_map_at_1000_diff1": 0.083902,
|
85 |
+
"nauc_recall_at_1_max": -0.026569,
|
86 |
+
"nauc_recall_at_1_std": -0.024726,
|
87 |
+
"nauc_recall_at_1_diff1": 0.102597,
|
88 |
+
"nauc_recall_at_3_max": 0.007234,
|
89 |
+
"nauc_recall_at_3_std": -0.037315,
|
90 |
+
"nauc_recall_at_3_diff1": 0.066138,
|
91 |
+
"nauc_recall_at_5_max": 0.020847,
|
92 |
+
"nauc_recall_at_5_std": -0.014385,
|
93 |
+
"nauc_recall_at_5_diff1": 0.059428,
|
94 |
+
"nauc_recall_at_10_max": 0.092417,
|
95 |
+
"nauc_recall_at_10_std": 0.016372,
|
96 |
+
"nauc_recall_at_10_diff1": 0.076442,
|
97 |
+
"nauc_recall_at_20_max": 0.179819,
|
98 |
+
"nauc_recall_at_20_std": 0.093827,
|
99 |
+
"nauc_recall_at_20_diff1": 0.052288,
|
100 |
+
"nauc_recall_at_100_max": 0.463576,
|
101 |
+
"nauc_recall_at_100_std": 0.695314,
|
102 |
+
"nauc_recall_at_100_diff1": 0.252365,
|
103 |
+
"nauc_recall_at_1000_max": 0.473173,
|
104 |
+
"nauc_recall_at_1000_std": 0.803564,
|
105 |
+
"nauc_recall_at_1000_diff1": 0.30506,
|
106 |
+
"nauc_precision_at_1_max": -0.026569,
|
107 |
+
"nauc_precision_at_1_std": -0.024726,
|
108 |
+
"nauc_precision_at_1_diff1": 0.102597,
|
109 |
+
"nauc_precision_at_3_max": 0.007234,
|
110 |
+
"nauc_precision_at_3_std": -0.037315,
|
111 |
+
"nauc_precision_at_3_diff1": 0.066138,
|
112 |
+
"nauc_precision_at_5_max": 0.020847,
|
113 |
+
"nauc_precision_at_5_std": -0.014385,
|
114 |
+
"nauc_precision_at_5_diff1": 0.059428,
|
115 |
+
"nauc_precision_at_10_max": 0.092417,
|
116 |
+
"nauc_precision_at_10_std": 0.016372,
|
117 |
+
"nauc_precision_at_10_diff1": 0.076442,
|
118 |
+
"nauc_precision_at_20_max": 0.179819,
|
119 |
+
"nauc_precision_at_20_std": 0.093827,
|
120 |
+
"nauc_precision_at_20_diff1": 0.052288,
|
121 |
+
"nauc_precision_at_100_max": 0.463576,
|
122 |
+
"nauc_precision_at_100_std": 0.695314,
|
123 |
+
"nauc_precision_at_100_diff1": 0.252365,
|
124 |
+
"nauc_precision_at_1000_max": 0.473173,
|
125 |
+
"nauc_precision_at_1000_std": 0.803564,
|
126 |
+
"nauc_precision_at_1000_diff1": 0.30506,
|
127 |
+
"nauc_mrr_at_1_max": -0.025852,
|
128 |
+
"nauc_mrr_at_1_std": -0.027133,
|
129 |
+
"nauc_mrr_at_1_diff1": 0.083902,
|
130 |
+
"nauc_mrr_at_3_max": -0.023878,
|
131 |
+
"nauc_mrr_at_3_std": -0.031916,
|
132 |
+
"nauc_mrr_at_3_diff1": 0.06376,
|
133 |
+
"nauc_mrr_at_5_max": -0.020079,
|
134 |
+
"nauc_mrr_at_5_std": -0.029791,
|
135 |
+
"nauc_mrr_at_5_diff1": 0.063531,
|
136 |
+
"nauc_mrr_at_10_max": -0.0141,
|
137 |
+
"nauc_mrr_at_10_std": -0.027921,
|
138 |
+
"nauc_mrr_at_10_diff1": 0.065142,
|
139 |
+
"nauc_mrr_at_20_max": -0.0135,
|
140 |
+
"nauc_mrr_at_20_std": -0.026331,
|
141 |
+
"nauc_mrr_at_20_diff1": 0.064701,
|
142 |
+
"nauc_mrr_at_100_max": -0.01393,
|
143 |
+
"nauc_mrr_at_100_std": -0.025819,
|
144 |
+
"nauc_mrr_at_100_diff1": 0.065875,
|
145 |
+
"nauc_mrr_at_1000_max": -0.014037,
|
146 |
+
"nauc_mrr_at_1000_std": -0.025989,
|
147 |
+
"nauc_mrr_at_1000_diff1": 0.065838,
|
148 |
+
"main_score": 0.48068,
|
149 |
+
"hf_subset": "default",
|
150 |
+
"languages": [
|
151 |
+
"eng-Latn"
|
152 |
+
]
|
153 |
+
}
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"evaluation_time": 27.352286100387573,
|
157 |
+
"kg_co2_emissions": null
|
158 |
+
}
|
mteb/results/AskUbuntuDupQuestions.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_revision": "2000358ca161889fa9c082cb41daa8dcfb161a54",
|
3 |
+
"task_name": "AskUbuntuDupQuestions",
|
4 |
+
"mteb_version": "1.36.1",
|
5 |
+
"scores": {
|
6 |
+
"test": [
|
7 |
+
{
|
8 |
+
"map": 0.565225,
|
9 |
+
"mrr": 0.705146,
|
10 |
+
"nAUC_map_max": 0.18224,
|
11 |
+
"nAUC_map_std": 0.125352,
|
12 |
+
"nAUC_map_diff1": 0.140464,
|
13 |
+
"nAUC_mrr_max": 0.286197,
|
14 |
+
"nAUC_mrr_std": 0.2169,
|
15 |
+
"nAUC_mrr_diff1": 0.158021,
|
16 |
+
"main_score": 0.565225,
|
17 |
+
"hf_subset": "default",
|
18 |
+
"languages": [
|
19 |
+
"eng-Latn"
|
20 |
+
]
|
21 |
+
}
|
22 |
+
]
|
23 |
+
},
|
24 |
+
"evaluation_time": 2.0422356128692627,
|
25 |
+
"kg_co2_emissions": null
|
26 |
+
}
|
mteb/results/BIOSSES.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_revision": "d3fb88f8f02e40887cd149695127462bbcf29b4a",
|
3 |
+
"task_name": "BIOSSES",
|
4 |
+
"mteb_version": "1.36.1",
|
5 |
+
"scores": {
|
6 |
+
"test": [
|
7 |
+
{
|
8 |
+
"pearson": 0.866855,
|
9 |
+
"spearman": 0.831736,
|
10 |
+
"cosine_pearson": 0.866855,
|
11 |
+
"cosine_spearman": 0.831736,
|
12 |
+
"manhattan_pearson": 0.855442,
|
13 |
+
"manhattan_spearman": 0.839501,
|
14 |
+
"euclidean_pearson": 0.850403,
|
15 |
+
"euclidean_spearman": 0.831736,
|
16 |
+
"main_score": 0.831736,
|
17 |
+
"hf_subset": "default",
|
18 |
+
"languages": [
|
19 |
+
"eng-Latn"
|
20 |
+
]
|
21 |
+
}
|
22 |
+
]
|
23 |
+
},
|
24 |
+
"evaluation_time": 0.22259831428527832,
|
25 |
+
"kg_co2_emissions": null
|
26 |
+
}
|