Spaces:

giobin
/

MAIA_human_assessment

Sleeping

File size: 5,377 Bytes

d9cbdf1
138d0d5
 
 
d9cbdf1
138d0d5
 
57bf5d5
138d0d5
 
ccaeded
 
 
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
138d0d5
 
d88bb9d
138d0d5
 
ccaeded
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
ccaeded
 
 
 
138d0d5
 
 
ccaeded
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d88bb9d
 
138d0d5
 
 
 
ccaeded
 
 
 
 
138d0d5
 
 
 
 
 
 
ccaeded
138d0d5
ccaeded
138d0d5
 
 
ccaeded
 
138d0d5
ccaeded
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
60f83a0
19ec3d7
ccaeded
19ec3d7
60f83a0
 
 
ccaeded
 
 
60f83a0
ccaeded
19ec3d7
138d0d5
 
19ec3d7
 
 
 
ccaeded
 
19ec3d7
ccaeded

import streamlit as st
import pandas as pd
import os
from huggingface_hub import HfApi, hf_hub_download

HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

def assign_samples(csv_path):
    df = pd.read_csv(csv_path)
    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(5)
    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

def load_existing_annotations():
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])

csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

valid_users = list(assignments.keys())

if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []
if "selected_answer" not in st.session_state:
    st.session_state.selected_answer = None
if "not_enough_info" not in st.session_state:
    st.session_state.not_enough_info = False

def update_name():
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Select Your Name")
        selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Start", on_click=update_name)
    st.stop()

full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

if dataset.empty:
    st.write("### Great! You have completed your assignment. 🎉")
    st.stop()

def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

def save_choice():
    if st.session_state.selected_answer is None:
        st.warning("Please select an answer before proceeding.")
        return
    
    sample = dataset.iloc[st.session_state.index]
    st.session_state.results.append({
        "username": st.session_state.username,
        "id": sample["id"],
        "video_id": sample["video_id"],
        "answer1": sample["answer1"],
        "answer2": sample["answer2"],
        "selected_answer": st.session_state.selected_answer,
        "target": sample["target"],
        "not_enough_info": st.session_state.not_enough_info
    })
    
    st.session_state.index += 1
    st.session_state.selected_answer = None
    st.session_state.not_enough_info = False
    
    if st.session_state.index >= len(dataset):
        st.write("### Great! You have completed your assignment. 🎉")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()
    return

sample = dataset.iloc[st.session_state.index]

st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")

st.markdown("""
### Instructions:
- Look at the video thumbnail, do not play it!
- Select one of the two answers.
- If you think the frame does not provide enough information, select the checkbox.
- Click 'Next' to proceed.
""")
st.write("---")

st.video(sample["video_url"])

st.markdown("<h4 style='text-align: center; margin-top: 20px;'>Scegli la descrizione corretta tra A e B</h4>", unsafe_allow_html=True)

col1, col2 = st.columns(2)
with col1:
    if st.button(f"A: {sample.get('answer1', 'No answer1 available')}"):
        st.session_state.selected_answer = 0
with col2:
    if st.button(f"B: {sample.get('answer2', 'No answer2 available')}"):
        st.session_state.selected_answer = 1

st.markdown("<div style='text-align: center;'>", unsafe_allow_html=True)
st.session_state.not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.")
st.markdown("</div>", unsafe_allow_html=True)
st.write("\n")

if st.button("Next"):
    save_choice()