|
import gradio as gr |
|
import json |
|
import pandas as pd |
|
from urllib.request import urlopen |
|
from urllib.error import URLError |
|
import re |
|
from datetime import datetime |
|
|
|
|
|
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, |
|
title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, |
|
author={OpenCompass Contributors}, |
|
howpublished = {\url{https://github.com/open-compass/opencompass}}, |
|
year={2023} |
|
}""" |
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" |
|
|
|
|
|
|
|
|
|
DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME." |
|
|
|
|
|
def find_latest_data_url(): |
|
"""Find the latest available data URL by trying different dates.""" |
|
today = datetime.now() |
|
for i in range(365): |
|
date = today.replace(day=today.day - i) |
|
date_str = date.strftime("%Y%m%d") |
|
url = f"{DATA_URL_BASE}{date_str}.json" |
|
try: |
|
urlopen(url) |
|
return url, date_str |
|
except URLError: |
|
continue |
|
breakpoint() |
|
return None, None |
|
|
|
|
|
def get_latest_data(): |
|
"""Get latest data URL and update time""" |
|
data_url, update_time = find_latest_data_url() |
|
if not data_url: |
|
raise Exception("Could not find valid data URL") |
|
formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d") |
|
return data_url, formatted_update_time |
|
|
|
|
|
def get_leaderboard_title(update_time): |
|
return f"# Supported Datasets List (Last Updated: {update_time})" |
|
|
|
|
|
MAIN_DESCRIPTION = """## The List of Datasets Supported by OpenCompass |
|
Testing line. |
|
- All configurations and datsets can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆. |
|
""" |
|
|
|
|
|
|
|
|
|
def load_data(data_url): |
|
response = urlopen(data_url) |
|
|
|
with open('s1.json','r',encoding='utf8') as f: |
|
data = json.load(f) |
|
|
|
return data |
|
|
|
|
|
def build_main_table(data): |
|
df = pd.DataFrame(data).transpose() |
|
columns = { |
|
'name': 'Name', 'category': 'Category', 'article': 'Article Address', |
|
} |
|
df = df[list(columns.keys())].rename(columns=columns) |
|
return df |
|
|
|
|
|
DATA_CATEGORY = ['med', 'law', 'code'] |
|
|
|
def filter_table1(df, data_category): |
|
filtered_df = df.copy() |
|
if data_category: |
|
mask = pd.Series(False, index=filtered_df.index) |
|
for category in data_category: |
|
mask |= filtered_df['Category'] == category |
|
filtered_df = filtered_df[mask] |
|
|
|
return filtered_df |
|
|
|
|
|
def calculate_column_widths(df): |
|
column_widths = [] |
|
for column in df.columns: |
|
header_length = len(str(column)) |
|
max_content_length = df[column].astype(str).map(len).max() |
|
width = max(header_length * 10, max_content_length * 8) + 20 |
|
width = max(160, min(400, width)) |
|
column_widths.append(width) |
|
return column_widths |
|
|
|
|
|
class DataState: |
|
def __init__(self): |
|
self.current_df = None |
|
|
|
|
|
data_state = DataState() |
|
|
|
|
|
def create_interface(): |
|
empty_df = pd.DataFrame(columns=[ |
|
'Name', 'Category', 'Article Address' |
|
]) |
|
|
|
def load_initial_data(): |
|
try: |
|
data_url, update_time = get_latest_data() |
|
data = load_data(data_url) |
|
new_df = build_main_table(data) |
|
data_state.current_df = new_df |
|
filtered_df = filter_table1(new_df, DATA_CATEGORY) |
|
return get_leaderboard_title(update_time), filtered_df.sort_values("Name", ascending=True) |
|
except Exception as e: |
|
print(f"Error loading initial data: {e}") |
|
return "# Supported Datasets List (Error loading data)", empty_df |
|
|
|
def refresh_data(): |
|
try: |
|
data_url, update_time = get_latest_data() |
|
data = load_data(data_url) |
|
new_df = build_main_table(data) |
|
data_state.current_df = new_df |
|
filtered_df = filter_table1(new_df, DATA_CATEGORY) |
|
return get_leaderboard_title(update_time), filtered_df.sort_values("Name", ascending=True) |
|
except Exception as e: |
|
print(f"Error refreshing data: {e}") |
|
return None, None |
|
|
|
def update_table(category): |
|
if data_state.current_df is None: |
|
return empty_df |
|
filtered_df = filter_table1(data_state.current_df, category) |
|
return filtered_df.sort_values("Name", ascending=True) |
|
|
|
initial_title, initial_data = load_initial_data() |
|
|
|
with gr.Blocks() as demo: |
|
title_comp = gr.Markdown(initial_title) |
|
|
|
with gr.Tabs() as tabs: |
|
with gr.TabItem("Dataset List", elem_id='main'): |
|
gr.Markdown(MAIN_DESCRIPTION) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
category_filter = gr.CheckboxGroup( |
|
choices=DATA_CATEGORY, |
|
value=DATA_CATEGORY, |
|
label='Category', |
|
interactive=True, |
|
) |
|
|
|
with gr.Column(): |
|
table = gr.DataFrame( |
|
value=initial_data, |
|
interactive=False, |
|
wrap=False, |
|
column_widths=calculate_column_widths(initial_data), |
|
) |
|
|
|
refresh_button = gr.Button("Refresh Data") |
|
|
|
def refresh_and_update(): |
|
title, data = refresh_data() |
|
return title, data |
|
|
|
refresh_button.click( |
|
fn=refresh_and_update, |
|
outputs=[title_comp, table], |
|
) |
|
|
|
category_filter.change( |
|
fn=update_table, |
|
inputs=[category_filter], |
|
outputs=table, |
|
) |
|
|
|
|
|
with gr.Row(): |
|
with gr.Accordion("Citation", open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
label=CITATION_BUTTON_LABEL, |
|
elem_id='citation-button', |
|
lines=6, |
|
max_lines=8, |
|
show_copy_button=True |
|
) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == '__main__': |
|
demo = create_interface() |
|
demo.queue() |
|
demo.launch(server_name='0.0.0.0') |