File size: 2,011 Bytes
8b7a945
5808d8f
8b7a945
 
 
 
3d59d51
8b7a945
3d59d51
 
 
 
5808d8f
 
3d59d51
e8879cc
9c49811
 
 
 
 
 
3d59d51
 
 
f30cbcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from src.populate import get_leaderboard_df
from src.leaderboard.read_evals import get_raw_eval_results
from pathlib import Path

cur_fp = Path(__file__)


def test_get_leaderboard_df():
    requests_path = cur_fp.parents[1] / "toydata" / "test_requests"
    results_path = cur_fp.parents[1] / "toydata" / "test_results"
    cols = ['Retrieval Model', 'Reranking Model', 'Average ⬆️', 'wiki_en', 'wiki_zh',]
    benchmark_cols = ['wiki_en', 'wiki_zh',]
    raw_data = get_raw_eval_results(results_path, requests_path)
    df = get_leaderboard_df(raw_data, cols, benchmark_cols, 'qa', 'ndcg_at_1')
    assert df.shape[0] == 2
    # the results contain only one embedding model
    for i in range(2):
        assert df["Retrieval Model"][i] == "bge-m3"
    # the results contains only two reranking model
    assert df["Reranking Model"][0] == "bge-reranker-v2-m3"
    assert df["Reranking Model"][1] == "NoReranker"
    assert df["Average ⬆️"][0] > df["Average ⬆️"][1]
    assert not df[['Average ⬆️', 'wiki_en', 'wiki_zh',]].isnull().values.any()


def test_get_leaderboard_df_long_doc():
    requests_path = cur_fp.parents[1] / "toydata" / "test_requests"
    results_path = cur_fp.parents[1] / "toydata" / "test_results"
    cols = ['Retrieval Model', 'Reranking Model', 'Average ⬆️', 'law_en_lex_files_500k_600k',]
    benchmark_cols = ['law_en_lex_files_500k_600k',]
    raw_data = get_raw_eval_results(results_path, requests_path)
    df = get_leaderboard_df(raw_data, cols, benchmark_cols, 'long_doc', 'ndcg_at_1')
    assert df.shape[0] == 2
    # the results contain only one embedding model
    for i in range(2):
        assert df["Retrieval Model"][i] == "bge-m3"
    # the results contains only two reranking model
    assert df["Reranking Model"][0] == "bge-reranker-v2-m3"
    assert df["Reranking Model"][1] == "NoReranker"
    assert df["Average ⬆️"][0] > df["Average ⬆️"][1]
    assert not df[['Average ⬆️', 'law_en_lex_files_500k_600k',]].isnull().values.any()