gtani's picture
Update app.py
d363c2f verified
raw
history blame
10.6 kB
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Dict, List, Tuple
import os
import base64
PROCESSED_DATA_DIR = Path(".")
# Embed logo as a base64 data URI to avoid Gradio toolbar interactions
logo_path = "rowsquared-logo-large.png"
with open(logo_path, "rb") as f:
logo_b64 = base64.b64encode(f.read()).decode("utf-8")
# ----------------------------
# Data loading & preprocessing
# ----------------------------
df_isco = (
pd.read_excel(
PROCESSED_DATA_DIR / "isco_imperfect.xlsx",
converters={"major": str, "sub_major": str, "minor": str, "unit": str},
)[["major_label", "sub_major_label", "minor_label", "unit_label"]]
.dropna()
.drop_duplicates()
.reset_index(drop=True)
)
# Build nested hierarchy dict: {major: {sub: {minor: [units]}}}
hierarchy: Dict[str, Dict[str, Dict[str, List[str]]]] = {}
for _, r in df_isco.iterrows():
hierarchy.setdefault(r.major_label, {}) \
.setdefault(r.sub_major_label, {}) \
.setdefault(r.minor_label, []) \
.append(r.unit_label)
# Ensure uniqueness & sorting at leaf lists
for maj in hierarchy:
for sub in hierarchy[maj]:
for mn in hierarchy[maj][sub]:
hierarchy[maj][sub][mn] = sorted(list(dict.fromkeys(hierarchy[maj][sub][mn])))
# Fast helpers for children
def majors() -> List[str]:
return sorted(hierarchy.keys())
def submajors(maj: str) -> List[str]:
return sorted(hierarchy.get(maj, {}).keys())
def minors(maj: str, sub: str) -> List[str]:
return sorted(hierarchy.get(maj, {}).get(sub, {}).keys())
def units(maj: str, sub: str, mn: str) -> List[str]:
return hierarchy.get(maj, {}).get(sub, {}).get(mn, [])
# ----------------------------
# Records to annotate
# ----------------------------
records = pd.read_excel(PROCESSED_DATA_DIR / "isco_predictions.xlsx").copy()
for col in ["major_label", "sub_major_label", "minor_label", "unit_label"]:
if col not in records:
records[col] = ""
if "annotated" not in records:
records["annotated"] = False
# ensure not views
for col in ["major_label", "sub_major_label", "minor_label", "unit_label", "annotated"]:
records[col] = records[col].copy()
records.reset_index(drop=True, inplace=True)
# -----------------------------------
# Core logic: clamp & state management
# -----------------------------------
def clamp_path(maj: str, sub: str, mn: str, un: str
) -> Tuple[str, str, str, str, List[str], List[str], List[str], List[str]]:
"""Return a valid (maj, sub, mn, un) tuple + their choices lists.
Only replace a level if it's invalid for the hierarchy."""
maj_choices = majors()
if maj not in maj_choices:
maj = maj_choices[0] if maj_choices else ""
sub_choices = submajors(maj) if maj else []
if sub not in sub_choices:
sub = sub_choices[0] if sub_choices else ""
mn_choices = minors(maj, sub) if sub else []
if mn not in mn_choices:
mn = mn_choices[0] if mn_choices else ""
un_choices = units(maj, sub, mn) if mn else []
if un not in un_choices:
un = un_choices[0] if un_choices else ""
return maj, sub, mn, un, maj_choices, sub_choices, mn_choices, un_choices
def save_record(i: int, maj: str, sub: str, mn: str, un: str) -> None:
records.loc[i, ["major_label", "sub_major_label", "minor_label", "unit_label"]] = [maj, sub, mn, un]
records.loc[i, "annotated"] = True
def status_text(i: int) -> str:
return f"**Status**: {'βœ… Annotated' if records.loc[i, 'annotated'] else '❌ Not Annotated'}"
def load_record(i: int):
rec = records.loc[i]
maj, sub, mn, un, maj_c, sub_c, mn_c, un_c = clamp_path(
rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"]
)
# Persist clamped values back (only if changed)
save_record(i, maj, sub, mn, un)
record_md = f"## Occupation: {rec['occupation_title_main']}\n## Industry: {rec['industry_title_main']}"
return (
record_md,
status_text(i),
gr.update(choices=maj_c, value=maj),
gr.update(choices=sub_c, value=sub),
gr.update(choices=mn_c, value=mn),
gr.update(choices=un_c, value=un),
)
# ---------------------
# Event handler helpers
# ---------------------
def on_major_change(new_major: str, i: int):
sub_c = submajors(new_major)
sub = sub_c[0] if sub_c else ""
mn_c = minors(new_major, sub) if sub else []
mn = mn_c[0] if mn_c else ""
un_c = units(new_major, sub, mn) if mn else []
un = un_c[0] if un_c else ""
save_record(i, new_major, sub, mn, un)
return (
gr.update(choices=majors(), value=new_major),
gr.update(choices=sub_c, value=sub),
gr.update(choices=mn_c, value=mn),
gr.update(choices=un_c, value=un),
status_text(i),
)
def on_sub_change(new_sub: str, i: int, major: str):
mn_c = minors(major, new_sub)
mn = mn_c[0] if mn_c else ""
un_c = units(major, new_sub, mn) if mn else []
un = un_c[0] if un_c else ""
records.loc[i, ["sub_major_label", "minor_label", "unit_label"]] = [new_sub, mn, un]
records.loc[i, "annotated"] = True
return (
gr.update(choices=submajors(major), value=new_sub),
gr.update(choices=mn_c, value=mn),
gr.update(choices=un_c, value=un),
status_text(i),
)
def on_minor_change(new_minor: str, i: int, major: str, sub: str):
un_c = units(major, sub, new_minor)
un = un_c[0] if un_c else ""
records.loc[i, ["minor_label", "unit_label"]] = [new_minor, un]
records.loc[i, "annotated"] = True
return (
gr.update(choices=minors(major, sub), value=new_minor),
gr.update(choices=un_c, value=un),
status_text(i),
)
def on_unit_change(new_unit: str, i: int, major: str, sub: str, mn: str):
un_c = units(major, sub, mn)
if new_unit not in un_c:
new_unit = un_c[0] if un_c else ""
records.loc[i, "unit_label"] = new_unit
records.loc[i, "annotated"] = True
return gr.update(choices=un_c, value=new_unit), status_text(i)
def go_next(i: int) -> int:
return (i + 1) % len(records)
def go_prev(i: int) -> int:
return (i - 1) % len(records)
# ---- NAVIGATION: save + move + reload in ONE callback ----
def save_and_jump(i: int, direction: str):
# Final safety net: clamp and persist whatever is currently stored
rec = records.loc[i]
maj, sub, mn, un, *_ = clamp_path(
rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"]
)
save_record(i, maj, sub, mn, un)
new_i = go_next(i) if direction == "next" else go_prev(i)
return (new_i,) + load_record(new_i)
def download_annotations() -> str:
path = PROCESSED_DATA_DIR / "annotated_output.csv"
records.to_csv(path, index=False)
return str(path)
# --------------
# Build the UI
# --------------
def build_gradio_app():
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=1):
# Static logo, non-interactive
gr.HTML(
f'<img src="data:image/png;base64,{logo_b64}" width="200" style="pointer-events:none; user-select:none; display:block;" />'
)
with gr.Row():
gr.Markdown("# ISCO Annotation", elem_id="isco-title")
gr.HTML("""
<style>
#isco-title {
text-align: center;
width: 100%;
margin: 0.5em 0;
}
footer { display: none !important; }
.gradio-container .api-link, .gradio-container .share-link { display: none !important; }
</style>
""")
idx_state = gr.State(0)
with gr.Group():
record_md = gr.Markdown()
status_md = gr.Markdown()
with gr.Row():
prev_btn = gr.Button("β¬… Previous")
next_btn = gr.Button("βœ… Next")
with gr.Row():
with gr.Column():
major_radio = gr.Radio(label="Level 1: Major", choices=[], interactive=True)
with gr.Column():
sub_radio = gr.Radio(label="Level 2: Sub-major", choices=[], interactive=True)
with gr.Column():
minor_radio = gr.Radio(label="Level 3: Minor", choices=[], interactive=True)
with gr.Column():
unit_radio = gr.Radio(label="Level 4: Unit", choices=[], interactive=True)
download_btn = gr.Button("πŸ“₯ Download Annotations")
download_file = gr.File(label="Annotated CSV", visible=False)
# Initial load
demo.load(
lambda: (0,) + load_record(0),
outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio],
)
next_btn.click(lambda i: save_and_jump(i, "next"),
inputs=[idx_state],
outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio])
prev_btn.click(lambda i: save_and_jump(i, "prev"),
inputs=[idx_state],
outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio])
# Change handlers (also update status)
major_radio.change(
on_major_change,
inputs=[major_radio, idx_state],
outputs=[major_radio, sub_radio, minor_radio, unit_radio, status_md],
)
sub_radio.change(
on_sub_change,
inputs=[sub_radio, idx_state, major_radio],
outputs=[sub_radio, minor_radio, unit_radio, status_md],
)
minor_radio.change(
on_minor_change,
inputs=[minor_radio, idx_state, major_radio, sub_radio],
outputs=[minor_radio, unit_radio, status_md],
)
unit_radio.change(
on_unit_change,
inputs=[unit_radio, idx_state, major_radio, sub_radio, minor_radio],
outputs=[unit_radio, status_md],
)
# Download
download_btn.click(download_annotations, outputs=[download_file]).then(
lambda: gr.update(visible=True), None, [download_file]
)
return demo
if __name__=="__main__":
demo = build_gradio_app()
demo.queue().launch(
show_api=False,
ssr_mode=False, # ← disable experimental SSR
auth=(os.getenv("APP_USER",""), os.getenv("APP_PASS","")),
server_name="0.0.0.0", # optional, but explicit
server_port=int(os.getenv("PORT", 7860)),
)