Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple | |
| import os | |
| import base64 | |
| PROCESSED_DATA_DIR = Path(".") | |
| # Embed logo as a base64 data URI to avoid Gradio toolbar interactions | |
| logo_path = "rowsquared-logo-large.png" | |
| with open(logo_path, "rb") as f: | |
| logo_b64 = base64.b64encode(f.read()).decode("utf-8") | |
| # ---------------------------- | |
| # Data loading & preprocessing | |
| # ---------------------------- | |
| df_isco = ( | |
| pd.read_excel( | |
| PROCESSED_DATA_DIR / "isco_imperfect.xlsx", | |
| converters={"major": str, "sub_major": str, "minor": str, "unit": str}, | |
| )[["major_label", "sub_major_label", "minor_label", "unit_label"]] | |
| .dropna() | |
| .drop_duplicates() | |
| .reset_index(drop=True) | |
| ) | |
| # Build nested hierarchy dict: {major: {sub: {minor: [units]}}} | |
| hierarchy: Dict[str, Dict[str, Dict[str, List[str]]]] = {} | |
| for _, r in df_isco.iterrows(): | |
| hierarchy.setdefault(r.major_label, {}) \ | |
| .setdefault(r.sub_major_label, {}) \ | |
| .setdefault(r.minor_label, []) \ | |
| .append(r.unit_label) | |
| # Ensure uniqueness & sorting at leaf lists | |
| for maj in hierarchy: | |
| for sub in hierarchy[maj]: | |
| for mn in hierarchy[maj][sub]: | |
| hierarchy[maj][sub][mn] = sorted(list(dict.fromkeys(hierarchy[maj][sub][mn]))) | |
| # Fast helpers for children | |
| def majors() -> List[str]: | |
| return sorted(hierarchy.keys()) | |
| def submajors(maj: str) -> List[str]: | |
| return sorted(hierarchy.get(maj, {}).keys()) | |
| def minors(maj: str, sub: str) -> List[str]: | |
| return sorted(hierarchy.get(maj, {}).get(sub, {}).keys()) | |
| def units(maj: str, sub: str, mn: str) -> List[str]: | |
| return hierarchy.get(maj, {}).get(sub, {}).get(mn, []) | |
| # ---------------------------- | |
| # Records to annotate | |
| # ---------------------------- | |
| records = pd.read_excel(PROCESSED_DATA_DIR / "isco_predictions.xlsx").copy() | |
| for col in ["major_label", "sub_major_label", "minor_label", "unit_label"]: | |
| if col not in records: | |
| records[col] = "" | |
| if "annotated" not in records: | |
| records["annotated"] = False | |
| # ensure not views | |
| for col in ["major_label", "sub_major_label", "minor_label", "unit_label", "annotated"]: | |
| records[col] = records[col].copy() | |
| records.reset_index(drop=True, inplace=True) | |
| # ----------------------------------- | |
| # Core logic: clamp & state management | |
| # ----------------------------------- | |
| def clamp_path(maj: str, sub: str, mn: str, un: str | |
| ) -> Tuple[str, str, str, str, List[str], List[str], List[str], List[str]]: | |
| """Return a valid (maj, sub, mn, un) tuple + their choices lists. | |
| Only replace a level if it's invalid for the hierarchy.""" | |
| maj_choices = majors() | |
| if maj not in maj_choices: | |
| maj = maj_choices[0] if maj_choices else "" | |
| sub_choices = submajors(maj) if maj else [] | |
| if sub not in sub_choices: | |
| sub = sub_choices[0] if sub_choices else "" | |
| mn_choices = minors(maj, sub) if sub else [] | |
| if mn not in mn_choices: | |
| mn = mn_choices[0] if mn_choices else "" | |
| un_choices = units(maj, sub, mn) if mn else [] | |
| if un not in un_choices: | |
| un = un_choices[0] if un_choices else "" | |
| return maj, sub, mn, un, maj_choices, sub_choices, mn_choices, un_choices | |
| def save_record(i: int, maj: str, sub: str, mn: str, un: str) -> None: | |
| records.loc[i, ["major_label", "sub_major_label", "minor_label", "unit_label"]] = [maj, sub, mn, un] | |
| records.loc[i, "annotated"] = True | |
| def status_text(i: int) -> str: | |
| return f"**Status**: {'β Annotated' if records.loc[i, 'annotated'] else 'β Not Annotated'}" | |
| def load_record(i: int): | |
| rec = records.loc[i] | |
| maj, sub, mn, un, maj_c, sub_c, mn_c, un_c = clamp_path( | |
| rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"] | |
| ) | |
| # Persist clamped values back (only if changed) | |
| save_record(i, maj, sub, mn, un) | |
| record_md = f"## Occupation: {rec['occupation_title_main']}\n## Industry: {rec['industry_title_main']}" | |
| return ( | |
| record_md, | |
| status_text(i), | |
| gr.update(choices=maj_c, value=maj), | |
| gr.update(choices=sub_c, value=sub), | |
| gr.update(choices=mn_c, value=mn), | |
| gr.update(choices=un_c, value=un), | |
| ) | |
| # --------------------- | |
| # Event handler helpers | |
| # --------------------- | |
| def on_major_change(new_major: str, i: int): | |
| sub_c = submajors(new_major) | |
| sub = sub_c[0] if sub_c else "" | |
| mn_c = minors(new_major, sub) if sub else [] | |
| mn = mn_c[0] if mn_c else "" | |
| un_c = units(new_major, sub, mn) if mn else [] | |
| un = un_c[0] if un_c else "" | |
| save_record(i, new_major, sub, mn, un) | |
| return ( | |
| gr.update(choices=majors(), value=new_major), | |
| gr.update(choices=sub_c, value=sub), | |
| gr.update(choices=mn_c, value=mn), | |
| gr.update(choices=un_c, value=un), | |
| status_text(i), | |
| ) | |
| def on_sub_change(new_sub: str, i: int, major: str): | |
| mn_c = minors(major, new_sub) | |
| mn = mn_c[0] if mn_c else "" | |
| un_c = units(major, new_sub, mn) if mn else [] | |
| un = un_c[0] if un_c else "" | |
| records.loc[i, ["sub_major_label", "minor_label", "unit_label"]] = [new_sub, mn, un] | |
| records.loc[i, "annotated"] = True | |
| return ( | |
| gr.update(choices=submajors(major), value=new_sub), | |
| gr.update(choices=mn_c, value=mn), | |
| gr.update(choices=un_c, value=un), | |
| status_text(i), | |
| ) | |
| def on_minor_change(new_minor: str, i: int, major: str, sub: str): | |
| un_c = units(major, sub, new_minor) | |
| un = un_c[0] if un_c else "" | |
| records.loc[i, ["minor_label", "unit_label"]] = [new_minor, un] | |
| records.loc[i, "annotated"] = True | |
| return ( | |
| gr.update(choices=minors(major, sub), value=new_minor), | |
| gr.update(choices=un_c, value=un), | |
| status_text(i), | |
| ) | |
| def on_unit_change(new_unit: str, i: int, major: str, sub: str, mn: str): | |
| un_c = units(major, sub, mn) | |
| if new_unit not in un_c: | |
| new_unit = un_c[0] if un_c else "" | |
| records.loc[i, "unit_label"] = new_unit | |
| records.loc[i, "annotated"] = True | |
| return gr.update(choices=un_c, value=new_unit), status_text(i) | |
| def go_next(i: int) -> int: | |
| return (i + 1) % len(records) | |
| def go_prev(i: int) -> int: | |
| return (i - 1) % len(records) | |
| # ---- NAVIGATION: save + move + reload in ONE callback ---- | |
| def save_and_jump(i: int, direction: str): | |
| # Final safety net: clamp and persist whatever is currently stored | |
| rec = records.loc[i] | |
| maj, sub, mn, un, *_ = clamp_path( | |
| rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"] | |
| ) | |
| save_record(i, maj, sub, mn, un) | |
| new_i = go_next(i) if direction == "next" else go_prev(i) | |
| return (new_i,) + load_record(new_i) | |
| def download_annotations() -> str: | |
| path = PROCESSED_DATA_DIR / "annotated_output.csv" | |
| records.to_csv(path, index=False) | |
| return str(path) | |
| # -------------- | |
| # Build the UI | |
| # -------------- | |
| def build_gradio_app(): | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Static logo, non-interactive | |
| gr.HTML( | |
| f'<img src="data:image/png;base64,{logo_b64}" width="200" style="pointer-events:none; user-select:none; display:block;" />' | |
| ) | |
| with gr.Row(): | |
| gr.Markdown("# ISCO Annotation", elem_id="isco-title") | |
| gr.HTML(""" | |
| <style> | |
| #isco-title { | |
| text-align: center; | |
| width: 100%; | |
| margin: 0.5em 0; | |
| } | |
| footer { display: none !important; } | |
| .gradio-container .api-link, .gradio-container .share-link { display: none !important; } | |
| </style> | |
| """) | |
| idx_state = gr.State(0) | |
| with gr.Group(): | |
| record_md = gr.Markdown() | |
| status_md = gr.Markdown() | |
| with gr.Row(): | |
| prev_btn = gr.Button("β¬ Previous") | |
| next_btn = gr.Button("β Next") | |
| with gr.Row(): | |
| with gr.Column(): | |
| major_radio = gr.Radio(label="Level 1: Major", choices=[], interactive=True) | |
| with gr.Column(): | |
| sub_radio = gr.Radio(label="Level 2: Sub-major", choices=[], interactive=True) | |
| with gr.Column(): | |
| minor_radio = gr.Radio(label="Level 3: Minor", choices=[], interactive=True) | |
| with gr.Column(): | |
| unit_radio = gr.Radio(label="Level 4: Unit", choices=[], interactive=True) | |
| download_btn = gr.Button("π₯ Download Annotations") | |
| download_file = gr.File(label="Annotated CSV", visible=False) | |
| # Initial load | |
| demo.load( | |
| lambda: (0,) + load_record(0), | |
| outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio], | |
| ) | |
| next_btn.click(lambda i: save_and_jump(i, "next"), | |
| inputs=[idx_state], | |
| outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio]) | |
| prev_btn.click(lambda i: save_and_jump(i, "prev"), | |
| inputs=[idx_state], | |
| outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio]) | |
| # Change handlers (also update status) | |
| major_radio.change( | |
| on_major_change, | |
| inputs=[major_radio, idx_state], | |
| outputs=[major_radio, sub_radio, minor_radio, unit_radio, status_md], | |
| ) | |
| sub_radio.change( | |
| on_sub_change, | |
| inputs=[sub_radio, idx_state, major_radio], | |
| outputs=[sub_radio, minor_radio, unit_radio, status_md], | |
| ) | |
| minor_radio.change( | |
| on_minor_change, | |
| inputs=[minor_radio, idx_state, major_radio, sub_radio], | |
| outputs=[minor_radio, unit_radio, status_md], | |
| ) | |
| unit_radio.change( | |
| on_unit_change, | |
| inputs=[unit_radio, idx_state, major_radio, sub_radio, minor_radio], | |
| outputs=[unit_radio, status_md], | |
| ) | |
| # Download | |
| download_btn.click(download_annotations, outputs=[download_file]).then( | |
| lambda: gr.update(visible=True), None, [download_file] | |
| ) | |
| return demo | |
| if __name__=="__main__": | |
| demo = build_gradio_app() | |
| demo.queue().launch( | |
| show_api=False, | |
| ssr_mode=False, # β disable experimental SSR | |
| auth=(os.getenv("APP_USER",""), os.getenv("APP_PASS","")), | |
| server_name="0.0.0.0", # optional, but explicit | |
| server_port=int(os.getenv("PORT", 7860)), | |
| ) | |