File size: 6,590 Bytes
79fe3d7
12f6654
79fe3d7
12f6654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79fe3d7
12f6654
 
 
 
 
 
 
79fe3d7
 
12f6654
 
 
 
 
79fe3d7
 
12f6654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import gradio as gr
import json
import pandas as pd
from urllib.request import urlopen
from urllib.error import URLError
import re
from datetime import datetime

# Constants
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
    title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
    author={OpenCompass Contributors},
    howpublished = {\url{https://github.com/open-compass/opencompass}},
    year={2023}
}"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
# 开发环境
# DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/research-rank/research-data.REALTIME."
# DATA_URL_BASE = "./s1test"
# 生产环境
DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."


def find_latest_data_url():
    """Find the latest available data URL by trying different dates."""
    today = datetime.now()
    for i in range(365):
        date = today.replace(day=today.day - i)
        date_str = date.strftime("%Y%m%d")
        url = f"{DATA_URL_BASE}{date_str}.json"
        try:
            urlopen(url)
            return url, date_str
        except URLError:
            continue
    breakpoint()
    return None, None


def get_latest_data():
    """Get latest data URL and update time"""
    data_url, update_time = find_latest_data_url()
    if not data_url:
        raise Exception("Could not find valid data URL")
    formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
    return data_url, formatted_update_time


def get_leaderboard_title(update_time):
    return f"# Supported Datasets List (Last Updated: {update_time})"


MAIN_DESCRIPTION = """## The List of Datasets Supported by OpenCompass
Testing line.
- All configurations and datsets can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
"""




def load_data(data_url):
    response = urlopen(data_url)

    with open('s1.json','r',encoding='utf8') as f:
        data = json.load(f)

    return data


def build_main_table(data):
    df = pd.DataFrame(data).transpose()
    columns = {
        'name': 'Name', 'category': 'Category', 'article': 'Article Address',
    }
    df = df[list(columns.keys())].rename(columns=columns)
    return df


DATA_CATEGORY = ['med', 'law', 'code']

def filter_table1(df, data_category):
    filtered_df = df.copy()
    if data_category:
        mask = pd.Series(False, index=filtered_df.index)
        for category in data_category:
            mask |= filtered_df['Category'] == category
        filtered_df = filtered_df[mask]

    return filtered_df


def calculate_column_widths(df):
    column_widths = []
    for column in df.columns:
        header_length = len(str(column))
        max_content_length = df[column].astype(str).map(len).max()
        width = max(header_length * 10, max_content_length * 8) + 20
        width = max(160, min(400, width))
        column_widths.append(width)
    return column_widths


class DataState:
    def __init__(self):
        self.current_df = None


data_state = DataState()


def create_interface():
    empty_df = pd.DataFrame(columns=[
        'Name', 'Category', 'Article Address'
    ])

    def load_initial_data():
        try:
            data_url, update_time = get_latest_data()
            data = load_data(data_url)
            new_df = build_main_table(data)
            data_state.current_df = new_df
            filtered_df = filter_table1(new_df, DATA_CATEGORY)
            return get_leaderboard_title(update_time), filtered_df.sort_values("Name", ascending=True)
        except Exception as e:
            print(f"Error loading initial data: {e}")
            return "# Supported Datasets List (Error loading data)", empty_df

    def refresh_data():
        try:
            data_url, update_time = get_latest_data()
            data = load_data(data_url)
            new_df = build_main_table(data)
            data_state.current_df = new_df
            filtered_df = filter_table1(new_df, DATA_CATEGORY)
            return get_leaderboard_title(update_time), filtered_df.sort_values("Name", ascending=True)
        except Exception as e:
            print(f"Error refreshing data: {e}")
            return None, None

    def update_table(category):
        if data_state.current_df is None:
            return empty_df
        filtered_df = filter_table1(data_state.current_df, category)
        return filtered_df.sort_values("Name", ascending=True)

    initial_title, initial_data = load_initial_data()

    with gr.Blocks() as demo:
        title_comp = gr.Markdown(initial_title)

        with gr.Tabs() as tabs:
            with gr.TabItem("Dataset List", elem_id='main'):
                gr.Markdown(MAIN_DESCRIPTION)

                with gr.Row():
                    with gr.Column():
                        category_filter = gr.CheckboxGroup(
                            choices=DATA_CATEGORY,
                            value=DATA_CATEGORY,
                            label='Category',
                            interactive=True,
                        )

                with gr.Column():
                    table = gr.DataFrame(
                        value=initial_data,
                        interactive=False,
                        wrap=False,
                        column_widths=calculate_column_widths(initial_data),
                    )

                refresh_button = gr.Button("Refresh Data")

                def refresh_and_update():
                    title, data = refresh_data()
                    return title, data

                refresh_button.click(
                    fn=refresh_and_update,
                    outputs=[title_comp, table],
                )

                category_filter.change(
                    fn=update_table,
                    inputs=[category_filter],
                    outputs=table,
                )


        with gr.Row():
            with gr.Accordion("Citation", open=False):
                citation_button = gr.Textbox(
                    value=CITATION_BUTTON_TEXT,
                    label=CITATION_BUTTON_LABEL,
                    elem_id='citation-button',
                    lines=6,  # 增加行数
                    max_lines=8,  # 设置最大行数
                    show_copy_button=True  # 添加复制按钮使其更方便使用
                )

    return demo


if __name__ == '__main__':
    demo = create_interface()
    demo.queue()
    demo.launch(server_name='0.0.0.0')