Katsumata420 commited on Mar 7

Commit

7d69aa6

verified ·

1 Parent(s): be13df6

First commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

1_Pooling/config.json +10 -0
config.json +47 -0
config_sentence_transformers.json +14 -0
jmteb/jmteb.jsonnet +22 -0
jmteb/results/Classification/scores_amazon_counterfactual_classification.json +23 -0
jmteb/results/Classification/scores_amazon_review_classification.json +23 -0
jmteb/results/Classification/scores_massive_intent_classification.json +23 -0
jmteb/results/Classification/scores_massive_scenario_classification.json +23 -0
jmteb/results/Clustering/scores_livedoor_news.json +36 -0
jmteb/results/Clustering/scores_mewsc16.json +36 -0
jmteb/results/PairClassification/scores_paws_x_ja.json +41 -0
jmteb/results/Reranking/scores_esci.json +31 -0
jmteb/results/Retrieval/scores_jagovfaqs_22k.json +43 -0
jmteb/results/Retrieval/scores_jaqket.json +43 -0
jmteb/results/Retrieval/scores_mrtydi.json +43 -0
jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
jmteb/results/Retrieval/scores_nlp_journal_title_abs.json +43 -0
jmteb/results/Retrieval/scores_nlp_journal_title_intro.json +43 -0
jmteb/results/STS/scores_jsick.json +31 -0
jmteb/results/STS/scores_jsts.json +31 -0
jmteb/results/summary.json +62 -0
jmteb/tasks/amazon_counterfactual_classification.jsonnet +32 -0
jmteb/tasks/amazon_review_classification.jsonnet +32 -0
jmteb/tasks/esci.jsonnet +33 -0
jmteb/tasks/jagovfaqs_22k.jsonnet +33 -0
jmteb/tasks/jaqket.jsonnet +33 -0
jmteb/tasks/jsick.jsonnet +25 -0
jmteb/tasks/jsts.jsonnet +25 -0
jmteb/tasks/livedoor_news.jsonnet +24 -0
jmteb/tasks/massive_intent_classification.jsonnet +32 -0
jmteb/tasks/massive_scenario_classification.jsonnet +32 -0
jmteb/tasks/mewsc16.jsonnet +24 -0
jmteb/tasks/mrtydi.jsonnet +34 -0
jmteb/tasks/nlp_journal_abs_intro.jsonnet +33 -0
jmteb/tasks/nlp_journal_title_abs.jsonnet +33 -0
jmteb/tasks/nlp_journal_title_intro.jsonnet +33 -0
jmteb/tasks/paws_x_ja.jsonnet +25 -0
model.safetensors +3 -0
modules.json +20 -0
mteb/models/__init__.py +10 -0
mteb/models/default.py +4 -0
mteb/models/retrieva.py +13 -0
mteb/models/retrieva_en.py +15 -0
mteb/mteb_eval.py +49 -0
mteb/results/AmazonCounterfactualClassification.json +95 -0
mteb/results/ArXivHierarchicalClusteringP2P.json +46 -0
mteb/results/ArXivHierarchicalClusteringS2S.json +46 -0
mteb/results/ArguAna.json +158 -0
mteb/results/AskUbuntuDupQuestions.json +26 -0
mteb/results/BIOSSES.json +26 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 512,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "sbintuitions/modernbert-ja-130m",
+  "architectures": [
+    "ModernBertModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "cls",
+  "cls_token_id": 6,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "embedding_dropout": 0.0,
+  "eos_token_id": 2,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000.0,
+  "gradient_checkpointing": false,
+  "hidden_activation": "gelu",
+  "hidden_size": 512,
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_norm_eps": 1e-05,
+  "local_attention": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 8,
+  "num_hidden_layers": 19,
+  "pad_token_id": 3,
+  "position_embedding_type": "rope",
+  "reference_compile": false,
+  "repad_logits_with_grad": false,
+  "sep_token_id": 4,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0",
+  "vocab_size": 102400
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.4.1",
+    "transformers": "4.49.0",
+    "pytorch": "2.5.1+cu121"
+  },
+  "prompts": {
+    "Retrieval-query": "関連した文書を探すために次の文を表現して\n",
+    "Retrieval-passage": "次の文章を表現して\n",
+    "default": "同じ意味の文を探すために次の文を表現して\n"
+  },
+  "default_prompt_name": "default",
+  "similarity_fn_name": "cosine"
+}

jmteb/jmteb.jsonnet ADDED Viewed

	@@ -0,0 +1,22 @@

+// Classification
+(import './tasks/amazon_review_classification.jsonnet') +
+(import './tasks/amazon_counterfactual_classification.jsonnet') +
+(import './tasks/massive_intent_classification.jsonnet') +
+(import './tasks/massive_scenario_classification.jsonnet') +
+// Clustering
+(import './tasks/livedoor_news.jsonnet') +
+(import './tasks/mewsc16.jsonnet') +
+// STS
+(import './tasks/jsts.jsonnet') +
+(import './tasks/jsick.jsonnet') +
+// Pair Classification
+(import './tasks/paws_x_ja.jsonnet') +
+// Retrieval
+(import './tasks/jagovfaqs_22k.jsonnet') +
+(import './tasks/mrtydi.jsonnet') +
+(import './tasks/jaqket.jsonnet') +
+(import './tasks/nlp_journal_title_abs.jsonnet') +
+(import './tasks/nlp_journal_title_intro.jsonnet') +
+(import './tasks/nlp_journal_abs_intro.jsonnet') +
+// Reranking
+(import './tasks/esci.jsonnet')

jmteb/results/Classification/scores_amazon_counterfactual_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.7725172847265871,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.9163090128755365,
+                "macro_f1": 0.694795707592322
+            },
+            "logreg": {
+                "accuracy": 0.9291845493562232,
+                "macro_f1": 0.7484418707366147
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.9336188436830836,
+                "macro_f1": 0.7725172847265871
+            }
+        }
+    }
+}

jmteb/results/Classification/scores_amazon_review_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.580199599298548,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.4322,
+                "macro_f1": 0.4233916952995692
+            },
+            "logreg": {
+                "accuracy": 0.5912,
+                "macro_f1": 0.5856091084774833
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.5848,
+                "macro_f1": 0.580199599298548
+            }
+        }
+    }
+}

jmteb/results/Classification/scores_massive_intent_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.7965072076950149,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.8066896212493851,
+                "macro_f1": 0.7856631206448443
+            },
+            "logreg": {
+                "accuracy": 0.8578455484505657,
+                "macro_f1": 0.8070559236000313
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.859448554135844,
+                "macro_f1": 0.7965072076950149
+            }
+        }
+    }
+}

jmteb/results/Classification/scores_massive_scenario_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8963544416714002,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.8745696015740285,
+                "macro_f1": 0.8702590755528099
+            },
+            "logreg": {
+                "accuracy": 0.9011313330054107,
+                "macro_f1": 0.894702776793441
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.8997982515131137,
+                "macro_f1": 0.8963544416714002
+            }
+        }
+    }
+}

jmteb/results/Clustering/scores_livedoor_news.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.5063220603646297,
+    "details": {
+        "optimal_clustering_model_name": "MiniBatchKMeans",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.5162254920293606,
+                "homogeneity_score": 0.5134942369453169,
+                "completeness_score": 0.5189859573522001
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.4817740009110648,
+                "homogeneity_score": 0.47151381593215835,
+                "completeness_score": 0.4924906436335508
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.4672899779727179,
+                "homogeneity_score": 0.4669032808035401,
+                "completeness_score": 0.4676773162110705
+            },
+            "Birch": {
+                "v_measure_score": 0.48380197476431497,
+                "homogeneity_score": 0.4759913341716145,
+                "completeness_score": 0.4918732245931716
+            }
+        },
+        "test_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.5063220603646297,
+                "homogeneity_score": 0.5072769713955575,
+                "completeness_score": 0.5053707376763799
+            }
+        }
+    }
+}

jmteb/results/Clustering/scores_mewsc16.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.45462727073049025,
+    "details": {
+        "optimal_clustering_model_name": "Birch",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.3842466809312565,
+                "homogeneity_score": 0.4226378649485537,
+                "completeness_score": 0.3522493803950809
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.4377888516765032,
+                "homogeneity_score": 0.4772841182693872,
+                "completeness_score": 0.40433049804468363
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.40122932757998875,
+                "homogeneity_score": 0.43755915162610337,
+                "completeness_score": 0.3704698214056897
+            },
+            "Birch": {
+                "v_measure_score": 0.46841918020711176,
+                "homogeneity_score": 0.506935954769718,
+                "completeness_score": 0.4353420774727962
+            }
+        },
+        "test_scores": {
+            "Birch": {
+                "v_measure_score": 0.45462727073049025,
+                "homogeneity_score": 0.4852439372487074,
+                "completeness_score": 0.42764484284900073
+            }
+        }
+    }
+}

jmteb/results/PairClassification/scores_paws_x_ja.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "metric_name": "binary_f1",
+    "metric_value": 0.6403061224489796,
+    "details": {
+        "optimal_distance_metric": "euclidean_distances",
+        "val_scores": {
+            "cosine_distances": {
+                "accuracy": 0.5725,
+                "accuracy_threshold": -0.12011593580245972,
+                "binary_f1": 0.5979670522257273,
+                "binary_f1_threshold": 1.0
+            },
+            "manhatten_distances": {
+                "accuracy": 0.6505,
+                "accuracy_threshold": 5.353497505187988,
+                "binary_f1": 0.6209476309226932,
+                "binary_f1_threshold": 10.806973457336426
+            },
+            "euclidean_distances": {
+                "accuracy": 0.653,
+                "accuracy_threshold": 0.30937591195106506,
+                "binary_f1": 0.6221858370855505,
+                "binary_f1_threshold": 0.6454310417175293
+            },
+            "dot_similarities": {
+                "accuracy": 0.654,
+                "accuracy_threshold": 0.9534672498703003,
+                "binary_f1": 0.6213355048859935,
+                "binary_f1_threshold": 0.7887746095657349
+            }
+        },
+        "test_scores": {
+            "euclidean_distances": {
+                "accuracy": 0.6085,
+                "accuracy_threshold": 0.30937591195106506,
+                "binary_f1": 0.6403061224489796,
+                "binary_f1_threshold": 0.6454310417175293
+            }
+        }
+    }
+}

jmteb/results/Reranking/scores_esci.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9326535342404364,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9457387910643943,
+                "ndcg@20": 0.9574971659413306,
+                "ndcg@40": 0.9650000580183328
+            },
+            "dot_score": {
+                "ndcg@10": 0.9457186021682247,
+                "ndcg@20": 0.957477691902003,
+                "ndcg@40": 0.9649985588722803
+            },
+            "euclidean_distance": {
+                "ndcg@10": 0.9455185072273512,
+                "ndcg@20": 0.9573786712685046,
+                "ndcg@40": 0.9649006041448088
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9326535342404364,
+                "ndcg@20": 0.9490780973799793,
+                "ndcg@40": 0.9582022320743613
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_jagovfaqs_22k.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.7037560169204234,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.5621526762211173,
+                "accuracy@3": 0.7458321146534074,
+                "accuracy@5": 0.7987715706346885,
+                "accuracy@10": 0.8581456566247441,
+                "ndcg@10": 0.7123343291094127,
+                "mrr@10": 0.6653833386723116
+            },
+            "dot_score": {
+                "accuracy@1": 0.5627376425855514,
+                "accuracy@3": 0.7461245978356245,
+                "accuracy@5": 0.7978941210880375,
+                "accuracy@10": 0.8587306229891781,
+                "ndcg@10": 0.712624535627044,
+                "mrr@10": 0.6656209232254854
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.5633226089499854,
+                "accuracy@3": 0.7455396314711904,
+                "accuracy@5": 0.7987715706346885,
+                "accuracy@10": 0.8578531734425271,
+                "ndcg@10": 0.7126592570676551,
+                "mrr@10": 0.6659067907166759
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.5675438596491228,
+                "accuracy@3": 0.7292397660818714,
+                "accuracy@5": 0.7839181286549708,
+                "accuracy@10": 0.8403508771929824,
+                "ndcg@10": 0.7037560169204234,
+                "mrr@10": 0.660028311519539
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_jaqket.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.61373516847512,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.4562814070351759,
+                "accuracy@3": 0.6572864321608041,
+                "accuracy@5": 0.7045226130653266,
+                "accuracy@10": 0.7628140703517587,
+                "ndcg@10": 0.6130067237476119,
+                "mrr@10": 0.5645469410544787
+            },
+            "dot_score": {
+                "accuracy@1": 0.457286432160804,
+                "accuracy@3": 0.6552763819095477,
+                "accuracy@5": 0.7055276381909548,
+                "accuracy@10": 0.7618090452261307,
+                "ndcg@10": 0.6127513989155836,
+                "mrr@10": 0.5645381670256041
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.4552763819095477,
+                "accuracy@3": 0.6572864321608041,
+                "accuracy@5": 0.7055276381909548,
+                "accuracy@10": 0.7628140703517587,
+                "ndcg@10": 0.612740626009351,
+                "mrr@10": 0.5641572944085506
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.4493480441323972,
+                "accuracy@3": 0.6288866599799399,
+                "accuracy@5": 0.7021063189568706,
+                "accuracy@10": 0.7903711133400201,
+                "ndcg@10": 0.61373516847512,
+                "mrr@10": 0.5578975020298991
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_mrtydi.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.365136589573548,
+    "details": {
+        "optimal_distance_metric": "dot_score",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.22629310344827586,
+                "accuracy@3": 0.4331896551724138,
+                "accuracy@5": 0.5064655172413793,
+                "accuracy@10": 0.59375,
+                "ndcg@10": 0.40613921488749105,
+                "mrr@10": 0.34660774151614676
+            },
+            "dot_score": {
+                "accuracy@1": 0.22952586206896552,
+                "accuracy@3": 0.43211206896551724,
+                "accuracy@5": 0.5064655172413793,
+                "accuracy@10": 0.5948275862068966,
+                "ndcg@10": 0.4072380263572043,
+                "mrr@10": 0.34782686781609207
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.22629310344827586,
+                "accuracy@3": 0.4353448275862069,
+                "accuracy@5": 0.5075431034482759,
+                "accuracy@10": 0.5959051724137931,
+                "ndcg@10": 0.4071683076743109,
+                "mrr@10": 0.34731544198139036
+            }
+        },
+        "test_scores": {
+            "dot_score": {
+                "accuracy@1": 0.23472222222222222,
+                "accuracy@3": 0.4027777777777778,
+                "accuracy@5": 0.4847222222222222,
+                "accuracy@10": 0.5861111111111111,
+                "ndcg@10": 0.365136589573548,
+                "mrr@10": 0.3406740520282186
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9312261091012943,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.94,
+                "accuracy@3": 0.96,
+                "accuracy@5": 0.97,
+                "accuracy@10": 0.98,
+                "ndcg@10": 0.9589493964497818,
+                "mrr@10": 0.9522619047619049
+            },
+            "dot_score": {
+                "accuracy@1": 0.94,
+                "accuracy@3": 0.96,
+                "accuracy@5": 0.97,
+                "accuracy@10": 0.98,
+                "ndcg@10": 0.9589493964497818,
+                "mrr@10": 0.9522619047619049
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.94,
+                "accuracy@3": 0.96,
+                "accuracy@5": 0.97,
+                "accuracy@10": 0.98,
+                "ndcg@10": 0.9589493964497818,
+                "mrr@10": 0.9522619047619049
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.8762376237623762,
+                "accuracy@3": 0.9455445544554455,
+                "accuracy@5": 0.9678217821782178,
+                "accuracy@10": 0.9826732673267327,
+                "ndcg@10": 0.9312261091012943,
+                "mrr@10": 0.914452695269527
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_nlp_journal_title_abs.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9660376057905824,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.92,
+                "accuracy@3": 0.98,
+                "accuracy@5": 0.99,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.964415325130387,
+                "mrr@10": 0.9525000000000001
+            },
+            "dot_score": {
+                "accuracy@1": 0.91,
+                "accuracy@3": 0.98,
+                "accuracy@5": 0.99,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9607246226661015,
+                "mrr@10": 0.9475000000000001
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.92,
+                "accuracy@3": 0.98,
+                "accuracy@5": 0.99,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.964415325130387,
+                "mrr@10": 0.9525000000000001
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9331683168316832,
+                "accuracy@3": 0.9727722772277227,
+                "accuracy@5": 0.995049504950495,
+                "accuracy@10": 0.9975247524752475,
+                "ndcg@10": 0.9660376057905824,
+                "mrr@10": 0.9556930693069304
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_nlp_journal_title_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.8239125997597097,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.79,
+                "accuracy@3": 0.87,
+                "accuracy@5": 0.92,
+                "accuracy@10": 0.94,
+                "ndcg@10": 0.8625804492635228,
+                "mrr@10": 0.8376666666666667
+            },
+            "dot_score": {
+                "accuracy@1": 0.79,
+                "accuracy@3": 0.87,
+                "accuracy@5": 0.92,
+                "accuracy@10": 0.94,
+                "ndcg@10": 0.8625804492635228,
+                "mrr@10": 0.8376666666666667
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.79,
+                "accuracy@3": 0.86,
+                "accuracy@5": 0.92,
+                "accuracy@10": 0.94,
+                "ndcg@10": 0.8618872148442569,
+                "mrr@10": 0.8368333333333333
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.7153465346534653,
+                "accuracy@3": 0.8589108910891089,
+                "accuracy@5": 0.8910891089108911,
+                "accuracy@10": 0.9183168316831684,
+                "ndcg@10": 0.8239125997597097,
+                "mrr@10": 0.7929278642149928
+            }
+        }
+    }
+}

jmteb/results/STS/scores_jsick.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.7228774332572527,
+    "details": {
+        "optimal_similarity_metric": "dot_score",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.7787902785127275,
+                "spearman": 0.7463203269165151
+            },
+            "manhatten_distance": {
+                "pearson": 0.7696693385531954,
+                "spearman": 0.73658437615329
+            },
+            "euclidean_distance": {
+                "pearson": 0.7696693385531954,
+                "spearman": 0.73658437615329
+            },
+            "dot_score": {
+                "pearson": 0.7788735665737948,
+                "spearman": 0.7464361803709411
+            }
+        },
+        "test_scores": {
+            "dot_score": {
+                "pearson": 0.7579853118271599,
+                "spearman": 0.7228774332572527
+            }
+        }
+    }
+}

jmteb/results/STS/scores_jsts.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.8334009117886643,
+    "details": {
+        "optimal_similarity_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8575300900249121,
+                "spearman": 0.8130300377229739
+            },
+            "manhatten_distance": {
+                "pearson": 0.8516427332926884,
+                "spearman": 0.8129478229587098
+            },
+            "euclidean_distance": {
+                "pearson": 0.8516427332926884,
+                "spearman": 0.8129478229587098
+            },
+            "dot_score": {
+                "pearson": 0.8575076226058185,
+                "spearman": 0.8130035755745093
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8764356710949792,
+                "spearman": 0.8334009117886643
+            }
+        }
+    }
+}

jmteb/results/summary.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+    "Classification": {
+        "amazon_counterfactual_classification": {
+            "macro_f1": 0.7725172847265871
+        },
+        "amazon_review_classification": {
+            "macro_f1": 0.580199599298548
+        },
+        "massive_intent_classification": {
+            "macro_f1": 0.7965072076950149
+        },
+        "massive_scenario_classification": {
+            "macro_f1": 0.8963544416714002
+        }
+    },
+    "Reranking": {
+        "esci": {
+            "ndcg@10": 0.9326535342404364
+        }
+    },
+    "Retrieval": {
+        "jagovfaqs_22k": {
+            "ndcg@10": 0.7037560169204234
+        },
+        "jaqket": {
+            "ndcg@10": 0.61373516847512
+        },
+        "mrtydi": {
+            "ndcg@10": 0.365136589573548
+        },
+        "nlp_journal_abs_intro": {
+            "ndcg@10": 0.9312261091012943
+        },
+        "nlp_journal_title_abs": {
+            "ndcg@10": 0.9660376057905824
+        },
+        "nlp_journal_title_intro": {
+            "ndcg@10": 0.8239125997597097
+        }
+    },
+    "STS": {
+        "jsick": {
+            "spearman": 0.7228774332572527
+        },
+        "jsts": {
+            "spearman": 0.8334009117886643
+        }
+    },
+    "Clustering": {
+        "livedoor_news": {
+            "v_measure_score": 0.5063220603646297
+        },
+        "mewsc16": {
+            "v_measure_score": 0.45462727073049025
+        }
+    },
+    "PairClassification": {
+        "paws_x_ja": {
+            "binary_f1": 0.6403061224489796
+        }
+    }
+}

jmteb/tasks/amazon_counterfactual_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  amazon_counterfactual_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'amazon_counterfactual_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'amazon_counterfactual_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'amazon_counterfactual_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/amazon_review_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  amazon_review_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'amazon_review_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'amazon_review_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'amazon_review_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/esci.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  esci: {
+    class_path: 'RerankingEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRerankingQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'esci-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRerankingQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'esci-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRerankingDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'esci-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/jagovfaqs_22k.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  jagovfaqs_22k: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'jagovfaqs_22k-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jagovfaqs_22k-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'jagovfaqs_22k-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/jaqket.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  jaqket: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'jaqket-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jaqket-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'jaqket-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/jsick.jsonnet ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  jsick: {
+    class_path: 'STSEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'jsick',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jsick',
+        },
+      },
+      sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
+      sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/jsts.jsonnet ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  jsts: {
+    class_path: 'STSEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'jsts',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jsts',
+        },
+      },
+      sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
+      sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/livedoor_news.jsonnet ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  livedoor_news: {
+    class_path: 'ClusteringEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'livedoor_news',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'livedoor_news',
+        },
+      },
+      prefix: '類似した文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/massive_intent_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  massive_intent_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'massive_intent_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'massive_intent_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'massive_intent_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/massive_scenario_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  massive_scenario_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'massive_scenario_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'massive_scenario_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'massive_scenario_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/mewsc16.jsonnet ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  mewsc16: {
+    class_path: 'ClusteringEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'mewsc16_ja',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'mewsc16_ja',
+        },
+      },
+      prefix: '類似した文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/mrtydi.jsonnet ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  mrtydi: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'mrtydi-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'mrtydi-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'mrtydi-corpus',
+        },
+      },
+      "doc_chunk_size":10000,
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/nlp_journal_abs_intro.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  nlp_journal_abs_intro: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'nlp_journal_abs_intro-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'nlp_journal_abs_intro-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'nlp_journal_abs_intro-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/nlp_journal_title_abs.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  nlp_journal_title_abs: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'nlp_journal_title_abs-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'nlp_journal_title_abs-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'nlp_journal_title_abs-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/nlp_journal_title_intro.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  nlp_journal_title_intro: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'nlp_journal_title_intro-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'nlp_journal_title_intro-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'nlp_journal_title_intro-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/paws_x_ja.jsonnet ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  paws_x_ja: {
+    class_path: 'PairClassificationEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfPairClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'paws_x_ja',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfPairClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'paws_x_ja',
+        },
+      },
+      sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
+      sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
+    },
+  },
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d43aafff5dda1ece7a456d98bbc22f404a33ca569c56656fbc8a9b6ab5c8beb4
+size 264292496

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

mteb/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .default import PROMPT as default_prompt
+from .retrieva import PROMPT as retrieva_prompt
+from .retrieva_en import PROMPT as retrieva_en_prompt
+PROMPTS = {
+    "default": default_prompt,
+    "retrieva": retrieva_prompt,
+    "retrieva-en": retrieva_en_prompt,
+}

mteb/models/default.py ADDED Viewed

	@@ -0,0 +1,4 @@

+PROMPT = {
+    "query": "query: ",
+    "passage": "passage: ",
+}

mteb/models/retrieva.py ADDED Viewed

	@@ -0,0 +1,13 @@

+PROMPT = {
+    "STS": "同じ意味の文を探すために次の文を表現して\n",
+    "Summarization": "次の記事またはタイトルを表現して\n",
+    "BitextMining": "次の文を表現して\n",
+    "Classification": "同じクラスに属する文を探すために次の文を表現して\n",
+    "Clustering": "類似した文を探すために次の文を表現して\n",
+    "Reranking-query": "関連した文書を探すために次の文を表現して\n",
+    "Reranking-passage": "次の文章を表現して\n",
+    "Retrieval-query": "関連した文書を探すために次の文を表現して\n",
+    "Retrieval-passage": "次の文章を表現して\n",
+    "InstructionRetrieval": "",
+    "PairClassification": "同じ意味の文を探すために次の文を表現して\n",
+}

mteb/models/retrieva_en.py ADDED Viewed

	@@ -0,0 +1,15 @@

+PROMPT = {
+    "STS": "Represent the sentence for retrieving duplicate sentences:\n",
+    "Summarization": "Represent the news article or news title for retrieval:\n",
+    "BitextMining": "Represent the sentence\n",
+    "Classification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
+    "Clustering": "Represent the sentence to find similar sentences:\n",
+    "Reranking-query": "Represent the question:\n",
+    "Reranking-passage": "Represent the following text:\n",
+    "Retrieval-query": "Represent the question:\n",
+    "Retrieval-passage": "Represent the following text:\n",
+    "InstructionRetrieval": "Retrieve text based on user query:\n",
+    "PairClassification": "Represent the sentence for retrieving duplicate sentences:\n",
+    "MultilabelClassification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
+    "Speed": "",
+}

mteb/mteb_eval.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Evaluate AMBER models"""
+import argparse
+import mteb
+from models import PROMPTS
+BENCHMARKS = {
+    "en": "MTEB(eng, v2)",
+    "ja": "MTEB(jpn, v1)",
+}
+def get_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_type", type=str, required=True, help="Model name", choices=PROMPTS.keys())
+    parser.add_argument("--model_name_or_path", type=str, required=True)
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
+    parser.add_argument("--output_dir", type=str, required=True, help="Output directory")
+    parser.add_argument("--benchmark", type=str, required=True, choices=BENCHMARKS.keys())
+    parser.add_argument("--corpus_chunk_size", type=int, default=50000)
+    parser.add_argument("--convert_to_tensor", action="store_true")
+    return parser.parse_args()
+def main():
+    args = get_args()
+    prompt = PROMPTS[args.model_type]
+    model = mteb.get_model(args.model_name_or_path, model_prompts=prompt)
+    tasks = mteb.get_benchmark(BENCHMARKS[args.benchmark])
+    evaluation = mteb.MTEB(tasks=tasks)
+    encode_kwargs = {
+        "batch_size": args.batch_size,
+        "convert_to_tensor": args.convert_to_tensor,
+    }
+    evaluation.run(
+        model,
+        output_folder=args.output_dir,
+        encode_kwargs=encode_kwargs,
+        corpus_chunk_size=args.corpus_chunk_size,
+    )
+if __name__ == "__main__":
+    main()

mteb/results/AmazonCounterfactualClassification.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "dataset_revision": "e8379541af4e31359cca9fbcf4b00f2671dba205",
+  "task_name": "AmazonCounterfactualClassification",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "accuracy": 0.681642,
+        "f1": 0.619811,
+        "f1_weighted": 0.712157,
+        "ap": 0.306541,
+        "ap_weighted": 0.306541,
+        "scores_per_experiment": [
+          {
+            "accuracy": 0.676119,
+            "f1": 0.613878,
+            "f1_weighted": 0.708281,
+            "ap": 0.297508,
+            "ap_weighted": 0.297508
+          },
+          {
+            "accuracy": 0.725373,
+            "f1": 0.651787,
+            "f1_weighted": 0.749265,
+            "ap": 0.324443,
+            "ap_weighted": 0.324443
+          },
+          {
+            "accuracy": 0.598507,
+            "f1": 0.56537,
+            "f1_weighted": 0.638451,
+            "ap": 0.285849,
+            "ap_weighted": 0.285849
+          },
+          {
+            "accuracy": 0.69403,
+            "f1": 0.641244,
+            "f1_weighted": 0.725044,
+            "ap": 0.333082,
+            "ap_weighted": 0.333082
+          },
+          {
+            "accuracy": 0.683582,
+            "f1": 0.632402,
+            "f1_weighted": 0.715928,
+            "ap": 0.32599,
+            "ap_weighted": 0.32599
+          },
+          {
+            "accuracy": 0.677612,
+            "f1": 0.609393,
+            "f1_weighted": 0.708798,
+            "ap": 0.288755,
+            "ap_weighted": 0.288755
+          },
+          {
+            "accuracy": 0.755224,
+            "f1": 0.67807,
+            "f1_weighted": 0.774042,
+            "ap": 0.349254,
+            "ap_weighted": 0.349254
+          },
+          {
+            "accuracy": 0.735821,
+            "f1": 0.658379,
+            "f1_weighted": 0.757427,
+            "ap": 0.328302,
+            "ap_weighted": 0.328302
+          },
+          {
+            "accuracy": 0.665672,
+            "f1": 0.605319,
+            "f1_weighted": 0.699304,
+            "ap": 0.291461,
+            "ap_weighted": 0.291461
+          },
+          {
+            "accuracy": 0.604478,
+            "f1": 0.542268,
+            "f1_weighted": 0.645027,
+            "ap": 0.240767,
+            "ap_weighted": 0.240767
+          }
+        ],
+        "main_score": 0.681642,
+        "hf_subset": "en",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 10.654787302017212,
+  "kg_co2_emissions": null
+}

mteb/results/ArXivHierarchicalClusteringP2P.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "dataset_revision": "0bbdb47bcbe3a90093699aefeed338a0f28a7ee8",
+  "task_name": "ArXivHierarchicalClusteringP2P",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "v_measures": {
+          "Level 0": [
+            0.507858,
+            0.514541,
+            0.544659,
+            0.512061,
+            0.542216,
+            0.524017,
+            0.49424,
+            0.536251,
+            0.558261,
+            0.55925
+          ],
+          "Level 1": [
+            0.590244,
+            0.610952,
+            0.551394,
+            0.585449,
+            0.58945,
+            0.581477,
+            0.581684,
+            0.570883,
+            0.574588,
+            0.60155
+          ]
+        },
+        "v_measure": 0.556551,
+        "v_measure_std": 0.032919,
+        "main_score": 0.556551,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 6.287527084350586,
+  "kg_co2_emissions": null
+}

mteb/results/ArXivHierarchicalClusteringS2S.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "dataset_revision": "b73bd54100e5abfa6e3a23dcafb46fe4d2438dc3",
+  "task_name": "ArXivHierarchicalClusteringS2S",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "v_measures": {
+          "Level 0": [
+            0.502992,
+            0.485244,
+            0.477917,
+            0.495744,
+            0.477376,
+            0.539086,
+            0.544394,
+            0.547524,
+            0.522253,
+            0.518454
+          ],
+          "Level 1": [
+            0.563453,
+            0.553718,
+            0.559457,
+            0.564745,
+            0.538357,
+            0.579393,
+            0.548295,
+            0.573239,
+            0.562891,
+            0.575329
+          ]
+        },
+        "v_measure": 0.536493,
+        "v_measure_std": 0.032359,
+        "main_score": 0.536493,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 5.862490892410278,
+  "kg_co2_emissions": null
+}

mteb/results/ArguAna.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+  "dataset_revision": "c22ab2a51041ffd869aaddef7af8d8215647e41a",
+  "task_name": "ArguAna",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "ndcg_at_1": 0.25249,
+        "ndcg_at_3": 0.38056,
+        "ndcg_at_5": 0.43124,
+        "ndcg_at_10": 0.48068,
+        "ndcg_at_20": 0.51461,
+        "ndcg_at_100": 0.53158,
+        "ndcg_at_1000": 0.5338,
+        "map_at_1": 0.25249,
+        "map_at_3": 0.34803,
+        "map_at_5": 0.37598,
+        "map_at_10": 0.39611,
+        "map_at_20": 0.40569,
+        "map_at_100": 0.40821,
+        "map_at_1000": 0.4083,
+        "recall_at_1": 0.25249,
+        "recall_at_3": 0.47511,
+        "recall_at_5": 0.59886,
+        "recall_at_10": 0.7532,
+        "recall_at_20": 0.88549,
+        "recall_at_100": 0.9744,
+        "recall_at_1000": 0.99147,
+        "precision_at_1": 0.25249,
+        "precision_at_3": 0.15837,
+        "precision_at_5": 0.11977,
+        "precision_at_10": 0.07532,
+        "precision_at_20": 0.04427,
+        "precision_at_100": 0.00974,
+        "precision_at_1000": 0.00099,
+        "mrr_at_1": 0.258179,
+        "mrr_at_3": 0.349692,
+        "mrr_at_5": 0.377928,
+        "mrr_at_10": 0.398238,
+        "mrr_at_20": 0.407844,
+        "mrr_at_100": 0.410403,
+        "mrr_at_1000": 0.410495,
+        "nauc_ndcg_at_1_max": -0.026569,
+        "nauc_ndcg_at_1_std": -0.024726,
+        "nauc_ndcg_at_1_diff1": 0.102597,
+        "nauc_ndcg_at_3_max": -0.008151,
+        "nauc_ndcg_at_3_std": -0.033642,
+        "nauc_ndcg_at_3_diff1": 0.078841,
+        "nauc_ndcg_at_5_max": -0.003906,
+        "nauc_ndcg_at_5_std": -0.024619,
+        "nauc_ndcg_at_5_diff1": 0.07558,
+        "nauc_ndcg_at_10_max": 0.010935,
+        "nauc_ndcg_at_10_std": -0.018625,
+        "nauc_ndcg_at_10_diff1": 0.080503,
+        "nauc_ndcg_at_20_max": 0.013164,
+        "nauc_ndcg_at_20_std": -0.013407,
+        "nauc_ndcg_at_20_diff1": 0.078992,
+        "nauc_ndcg_at_100_max": 0.008316,
+        "nauc_ndcg_at_100_std": -0.008725,
+        "nauc_ndcg_at_100_diff1": 0.085633,
+        "nauc_ndcg_at_1000_max": 0.0045,
+        "nauc_ndcg_at_1000_std": -0.014357,
+        "nauc_ndcg_at_1000_diff1": 0.084438,
+        "nauc_map_at_1_max": -0.026569,
+        "nauc_map_at_1_std": -0.024726,
+        "nauc_map_at_1_diff1": 0.102597,
+        "nauc_map_at_3_max": -0.013567,
+        "nauc_map_at_3_std": -0.03222,
+        "nauc_map_at_3_diff1": 0.083557,
+        "nauc_map_at_5_max": -0.01162,
+        "nauc_map_at_5_std": -0.027384,
+        "nauc_map_at_5_diff1": 0.081184,
+        "nauc_map_at_10_max": -0.00615,
+        "nauc_map_at_10_std": -0.025394,
+        "nauc_map_at_10_diff1": 0.082831,
+        "nauc_map_at_20_max": -0.005492,
+        "nauc_map_at_20_std": -0.024076,
+        "nauc_map_at_20_diff1": 0.08281,
+        "nauc_map_at_100_max": -0.006049,
+        "nauc_map_at_100_std": -0.02356,
+        "nauc_map_at_100_diff1": 0.083933,
+        "nauc_map_at_1000_max": -0.006154,
+        "nauc_map_at_1000_std": -0.02373,
+        "nauc_map_at_1000_diff1": 0.083902,
+        "nauc_recall_at_1_max": -0.026569,
+        "nauc_recall_at_1_std": -0.024726,
+        "nauc_recall_at_1_diff1": 0.102597,
+        "nauc_recall_at_3_max": 0.007234,
+        "nauc_recall_at_3_std": -0.037315,
+        "nauc_recall_at_3_diff1": 0.066138,
+        "nauc_recall_at_5_max": 0.020847,
+        "nauc_recall_at_5_std": -0.014385,
+        "nauc_recall_at_5_diff1": 0.059428,
+        "nauc_recall_at_10_max": 0.092417,
+        "nauc_recall_at_10_std": 0.016372,
+        "nauc_recall_at_10_diff1": 0.076442,
+        "nauc_recall_at_20_max": 0.179819,
+        "nauc_recall_at_20_std": 0.093827,
+        "nauc_recall_at_20_diff1": 0.052288,
+        "nauc_recall_at_100_max": 0.463576,
+        "nauc_recall_at_100_std": 0.695314,
+        "nauc_recall_at_100_diff1": 0.252365,
+        "nauc_recall_at_1000_max": 0.473173,
+        "nauc_recall_at_1000_std": 0.803564,
+        "nauc_recall_at_1000_diff1": 0.30506,
+        "nauc_precision_at_1_max": -0.026569,
+        "nauc_precision_at_1_std": -0.024726,
+        "nauc_precision_at_1_diff1": 0.102597,
+        "nauc_precision_at_3_max": 0.007234,
+        "nauc_precision_at_3_std": -0.037315,
+        "nauc_precision_at_3_diff1": 0.066138,
+        "nauc_precision_at_5_max": 0.020847,
+        "nauc_precision_at_5_std": -0.014385,
+        "nauc_precision_at_5_diff1": 0.059428,
+        "nauc_precision_at_10_max": 0.092417,
+        "nauc_precision_at_10_std": 0.016372,
+        "nauc_precision_at_10_diff1": 0.076442,
+        "nauc_precision_at_20_max": 0.179819,
+        "nauc_precision_at_20_std": 0.093827,
+        "nauc_precision_at_20_diff1": 0.052288,
+        "nauc_precision_at_100_max": 0.463576,
+        "nauc_precision_at_100_std": 0.695314,
+        "nauc_precision_at_100_diff1": 0.252365,
+        "nauc_precision_at_1000_max": 0.473173,
+        "nauc_precision_at_1000_std": 0.803564,
+        "nauc_precision_at_1000_diff1": 0.30506,
+        "nauc_mrr_at_1_max": -0.025852,
+        "nauc_mrr_at_1_std": -0.027133,
+        "nauc_mrr_at_1_diff1": 0.083902,
+        "nauc_mrr_at_3_max": -0.023878,
+        "nauc_mrr_at_3_std": -0.031916,
+        "nauc_mrr_at_3_diff1": 0.06376,
+        "nauc_mrr_at_5_max": -0.020079,
+        "nauc_mrr_at_5_std": -0.029791,
+        "nauc_mrr_at_5_diff1": 0.063531,
+        "nauc_mrr_at_10_max": -0.0141,
+        "nauc_mrr_at_10_std": -0.027921,
+        "nauc_mrr_at_10_diff1": 0.065142,
+        "nauc_mrr_at_20_max": -0.0135,
+        "nauc_mrr_at_20_std": -0.026331,
+        "nauc_mrr_at_20_diff1": 0.064701,
+        "nauc_mrr_at_100_max": -0.01393,
+        "nauc_mrr_at_100_std": -0.025819,
+        "nauc_mrr_at_100_diff1": 0.065875,
+        "nauc_mrr_at_1000_max": -0.014037,
+        "nauc_mrr_at_1000_std": -0.025989,
+        "nauc_mrr_at_1000_diff1": 0.065838,
+        "main_score": 0.48068,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 27.352286100387573,
+  "kg_co2_emissions": null
+}

mteb/results/AskUbuntuDupQuestions.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "dataset_revision": "2000358ca161889fa9c082cb41daa8dcfb161a54",
+  "task_name": "AskUbuntuDupQuestions",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "map": 0.565225,
+        "mrr": 0.705146,
+        "nAUC_map_max": 0.18224,
+        "nAUC_map_std": 0.125352,
+        "nAUC_map_diff1": 0.140464,
+        "nAUC_mrr_max": 0.286197,
+        "nAUC_mrr_std": 0.2169,
+        "nAUC_mrr_diff1": 0.158021,
+        "main_score": 0.565225,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 2.0422356128692627,
+  "kg_co2_emissions": null
+}

mteb/results/BIOSSES.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "dataset_revision": "d3fb88f8f02e40887cd149695127462bbcf29b4a",
+  "task_name": "BIOSSES",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "pearson": 0.866855,
+        "spearman": 0.831736,
+        "cosine_pearson": 0.866855,
+        "cosine_spearman": 0.831736,
+        "manhattan_pearson": 0.855442,
+        "manhattan_spearman": 0.839501,
+        "euclidean_pearson": 0.850403,
+        "euclidean_spearman": 0.831736,
+        "main_score": 0.831736,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 0.22259831428527832,
+  "kg_co2_emissions": null
+}