import os import random from collections import Counter from datasets import Dataset, load_dataset from fasthtml.common import * from fastlite import database from huggingface_hub import create_repo, login login(token=os.environ.get("HF_TOKEN")) fact_dataset = load_dataset("griffin/iclr2025_data_scores", split="train").to_list() fact_dataset = [{"example_id": i, **example} for i, example in enumerate(fact_dataset)] db = database("data/examples.db") examples = db.t.examples if examples not in db.t: examples.create( id=int, example_id=int, question_type=str, question=str, answer=str, decision=str, pk="id", ) question_types = sorted(set(ex["question_type"] for ex in fact_dataset)) def get_stats(): total_examples = Counter(ex["question_type"] for ex in fact_dataset) curated_examples = Counter(row["question_type"] for row in examples()) return { qt: {"total": total_examples[qt], "curated": curated_examples[qt]} for qt in question_types } def get_example(selected_type=None): evaluated_ids = set(row["example_id"] for row in examples()) available_examples = [ ex for ex in fact_dataset if ex["example_id"] not in evaluated_ids ] if selected_type: available_examples = [ ex for ex in available_examples if ex["question_type"] == selected_type ] if not available_examples: return None example = random.choice(available_examples) keep_keys = [ "example_id", "question_type", "question", "rationale", "answer", "log_ll", "oracle_log_ll", "oracle_advantage", "prediction", "prediction_oracle", "accuracy", "accuracy_oracle", "accuracy_status", ] return {k: example[k] for k in keep_keys if k in example} def upload_to_hf(): create_repo( "rbiswasfc/iclr-eval-examples", token=os.environ.get("HF_TOKEN"), private=True, repo_type="dataset", exist_ok=True, ) annotations = examples() hf_ds = Dataset.from_list(annotations) hf_ds.push_to_hub("rbiswasfc/iclr-eval-examples", token=os.environ.get("HF_TOKEN")) style = Style(""" body { background-color: #1e1e1e; color: #d4d4d4; font-family: Arial, sans-serif; } h1, h2, h3 { color: #61dafb; } .example-container { margin-top: 20px; } .example-table { border-collapse: collapse; width: 100%; } .example-table th, .example-table td { border: 1px solid #3a3a3a; padding: 8px; text-align: left; } .example-table th { background-color: #2a2a2a; color: #61dafb; } .example-table td { color: #d4d4d4; } #evaluation-form { margin-top: 20px; } #evaluation-form button { margin-right: 10px; background-color: #0e639c; color: white; border: none; padding: 10px 20px; cursor: pointer; } #evaluation-form button:hover { background-color: #1177bb; } select { background-color: #2a2a2a; color: #d4d4d4; border: 1px solid #3a3a3a; padding: 5px; } a { color: #61dafb; text-decoration: none; } a:hover { text-decoration: underline; } """) app = FastHTML(hdrs=(style,)) rt = app.route def render_stats(stats): return Table( Tr(Th("Question Type"), Th("Curated"), Th("Total")), *[ Tr( Td(qt), Td( f"{stats[qt]['curated']} ({stats[qt]['curated']/stats[qt]['total']:.1%})" ), Td(stats[qt]["total"]), ) for qt in question_types ], cls="stats-table", ) def render_example(example): return Div( H3("Example Details"), Table( *[Tr(Th(key), Td(str(value))) for key, value in example.items()], cls="example-table", ), id="example-details", ) @rt("/") def get(question_type: str = None): stats = get_stats() example = get_example(question_type) dropdown = Select( Option("Question Types", value="", selected=question_type is None), *[Option(qt, value=qt, selected=qt == question_type) for qt in question_types], name="question_type", hx_get="/", hx_target="body", hx_push_url="true", ) content = ( Div(H2("All examples of this type have been evaluated!"), render_stats(stats)) if example is None else Div( H2("Example"), Div( render_example(example), Form( Button( "Good Example", name="decision", value="good", hx_post="/evaluate", hx_target="#example-container", ), Button( "Bad Example", name="decision", value="bad", hx_post="/evaluate", hx_target="#example-container", ), Hidden(name="example_id", value=str(example["example_id"])), Hidden(name="question_type", value=example["question_type"]), id="evaluation-form", ), id="example-container", ), ) ) view_stats_link = A("Curation Stats", href="/stats", cls="view-stats-link") return Titled( "Example Curation", H2("Question Type Selection"), dropdown, content, view_stats_link, ) @rt("/stats") def get(): stats = get_stats() stats_table = render_stats(stats) return Titled( "Curation Statistics", Div( stats_table, A("Back to Curation", href="/", cls="back-link"), cls="container", ), ) @rt("/evaluate") def post(decision: str, example_id: str, question_type: str): example_id = int(example_id) example = next((ex for ex in fact_dataset if ex["example_id"] == example_id), None) if example: examples.insert( { "id": len(examples()) + 1, "example_id": example_id, "question_type": question_type, "question": example["question"], "answer": example["answer"], "decision": decision, } ) upload_to_hf() new_example = get_example(question_type) if new_example is None: return Div( H2("All examples of this type have been evaluated!"), render_stats(get_stats()), ) else: return Div( render_example(new_example), Form( Button( "Good Example", name="decision", value="good", hx_post="/evaluate", hx_target="#example-container", ), Button( "Bad Example", name="decision", value="bad", hx_post="/evaluate", hx_target="#example-container", ), Hidden(name="example_id", value=str(new_example["example_id"])), Hidden(name="question_type", value=new_example["question_type"]), id="evaluation-form", ), id="example-container", ) # serve() if __name__ == "__main__": import os import uvicorn uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))