File size: 5,464 Bytes
d9cbdf1
138d0d5
 
 
d9cbdf1
138d0d5
 
57bf5d5
138d0d5
 
57bf5d5
7c9831e
 
 
57bf5d5
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
138d0d5
 
 
d88bb9d
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d88bb9d
 
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19ec3d7
60f83a0
19ec3d7
 
 
 
 
 
 
60f83a0
 
 
 
 
 
19ec3d7
 
 
 
138d0d5
 
19ec3d7
 
 
138d0d5
19ec3d7
138d0d5
19ec3d7
 
 
138d0d5
19ec3d7
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import streamlit as st
import pandas as pd
import os
from huggingface_hub import HfApi, hf_hub_download

HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

def assign_samples(csv_path):
    df = pd.read_csv(csv_path)

    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(50)
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(50)
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(50)

    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

def load_existing_annotations():
    """Load the existing annotations from the HF dataset."""
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])  # Return empty DataFrame if not found

# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

valid_users = list(assignments.keys())

# Initialize session state
if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []

def update_name():
    """Set username and reset index."""
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0  # Reset progress

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Select Your Name")
        selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Start", on_click=update_name)
    st.stop()

# Get assigned dataset and remove already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

# If all samples are labeled, stop execution
if dataset.empty:
    st.write("### Great! You have completed your assignment. πŸŽ‰")
    st.stop()

def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

def save_choice(choice_index):
    sample = dataset.iloc[st.session_state.index]
    st.session_state.results.append({
        "username": st.session_state.username,
        "id": sample["id"],
        "video_id": sample["video_id"],
        "answer1": sample["answer1"],
        "answer2": sample["answer2"],
        "selected_answer": choice_index,
        "target": sample["target"],
        "not_enough_info": not_enough_info
    })
    
    st.session_state.index += 1
    st.session_state.checkbox = False # reset the checkbox
    
    if st.session_state.index >= len(dataset):  # All remaining samples done
        st.write("### Great! You have completed your assignment. πŸŽ‰")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()
    return

# Select the current sample
sample = dataset.iloc[st.session_state.index]

# Title
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)

# Centered user name
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)

st.write("\n\n")  # Add empty space

# Instructions
st.markdown("""
### Instructions:
- Look at the video thumbnail, do not play it!
- Select the checkbox if you think so.
- Then choose one of the two answers.
""")

st.write("---")  # Adds a horizontal divider for better separation

# Display Video
st.video(sample["video_url"])

# Question Prompt
st.markdown("<h4 style='text-align: center; margin-top: 20px;'>Scegli la descrizione corretta tra A e B</h4>", unsafe_allow_html=True)

# Checkbox for uncertainty
st.markdown("<div style='text-align: center;'>", unsafe_allow_html=True)
not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key='checkbox')
st.markdown("</div>", unsafe_allow_html=True)

st.write("\n")  # Add spacing before buttons

# Buttons (Centered)
col1, col2 = st.columns(2)
with col1:
    st.button(f"A: {sample.get('answer1', 'No answer1 available')}", on_click=lambda: save_choice(0))
with col2:
    st.button(f"B: {sample.get('answer2', 'No answer2 available')}", on_click=lambda: save_choice(1))