guipenedo's picture
guipenedo HF staff
reformat
37f8510 unverified
import ast
import json
import urllib
import gradio as gr
import pandas as pd
from datasets import load_dataset
from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns, ColumnFilter
df = pd.read_json("https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_table.jsonl",
lines=True).explode("suite").reset_index(drop=True)
with urllib.request.urlopen(
"https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_prompt_formatting.py") as f:
tasks_prompt_functions_raw = f.read().decode('utf-8')
tree = ast.parse(tasks_prompt_functions_raw)
tasks_prompt_functions = {}
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
function_name = node.name
# Get the source code for the function node
function_code = ast.get_source_segment(tasks_prompt_functions_raw, node)
tasks_prompt_functions[function_name] = function_code
def load_task_metadata(task_id):
task_row = df.iloc[task_id]
return (task_row.to_dict(),
f"""Examples from the HF repository ([{task_row['hf_repo']}](https://huggingface.co/datasets/{task_row['hf_repo']}))""",
tasks_prompt_functions.get(task_row["prompt_function"]), "unknown")
def load_task_examples(task_id):
task_row = df.iloc[task_id]
dataset = load_dataset(task_row["hf_repo"], task_row["hf_subset"], split="+".join(task_row["evaluation_splits"]),
trust_remote_code=task_row["trust_dataset"], streaming=True)
sample_data = next(dataset.iter(20))
# dictionary of lists to list of dictionaries
return pd.DataFrame(
dict(zip(sample_data, t if not isinstance(t, dict) and not isinstance(t, list) else json.dumps(t)))
for t in zip(*sample_data.values()))
with gr.Blocks() as demo:
gr.Markdown("""
# LightEval Tasks Explorer
""")
with gr.Tabs() as tabs:
with gr.TabItem("πŸ—ƒοΈ Tasks List"):
Leaderboard(
value=df,
select_columns=SelectColumns(
default_selection=["name", "suite", "prompt_function", "hf_repo", "hf_subset", "evaluation_splits",
"metric"],
cant_deselect=["name", "suite"],
label="Columns to display"),
search_columns=SearchColumns(primary_column="name",
secondary_columns=["suite", "prompt_function", "hf_repo", "metric"],
placeholder="Search for a task by name, suite, prompt_function, hf_repo or "
"metric. To search by suite, for example, type 'suite:<query>'. Separate queries by \";\"",
label="Search"),
filter_columns=[
ColumnFilter("suite", type="dropdown", label="Select suite"),
# ColumnFilter("prompt_function", type="dropdown", label="Select prompt_function"),
# ColumnFilter("metric", type="dropdown", label="Select metric")
],
wrap=True
)
with gr.TabItem("πŸ”Ž Task Inspector"):
task_inspector_selector = gr.Dropdown(
choices=sorted(zip((df['suite'] + '|' + df['name']).tolist(), range(len(df)))),
label="Task",
info="Select a task"
)
with gr.Row():
with gr.Column():
task_metadata = gr.Json(label="Task definition")
with gr.Column():
task_prompt_function = gr.Code(label="Task prompt function", language="python", interactive=False)
task_dataset_header = gr.Markdown("Examples from the HF repository")
task_dataset = gr.Dataframe(wrap=True)
gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector],
outputs=[task_metadata, task_dataset_header, task_prompt_function], fn=load_task_metadata)
gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector], outputs=[task_dataset],
fn=load_task_examples)
if __name__ == "__main__":
demo.launch()