File size: 5,377 Bytes
d9cbdf1
138d0d5
 
 
d9cbdf1
138d0d5
 
57bf5d5
138d0d5
 
ccaeded
 
 
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
138d0d5
 
d88bb9d
138d0d5
 
ccaeded
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
ccaeded
 
 
 
138d0d5
 
 
ccaeded
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d88bb9d
 
138d0d5
 
 
 
ccaeded
 
 
 
 
138d0d5
 
 
 
 
 
 
ccaeded
138d0d5
ccaeded
138d0d5
 
 
ccaeded
 
138d0d5
ccaeded
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
60f83a0
19ec3d7
ccaeded
19ec3d7
60f83a0
 
 
ccaeded
 
 
60f83a0
ccaeded
19ec3d7
138d0d5
 
19ec3d7
 
 
 
ccaeded
 
19ec3d7
ccaeded
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
import pandas as pd
import os
from huggingface_hub import HfApi, hf_hub_download

HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

def assign_samples(csv_path):
    df = pd.read_csv(csv_path)
    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(5)
    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

def load_existing_annotations():
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])

csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

valid_users = list(assignments.keys())

if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []
if "selected_answer" not in st.session_state:
    st.session_state.selected_answer = None
if "not_enough_info" not in st.session_state:
    st.session_state.not_enough_info = False

def update_name():
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Select Your Name")
        selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Start", on_click=update_name)
    st.stop()

full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

if dataset.empty:
    st.write("### Great! You have completed your assignment. πŸŽ‰")
    st.stop()

def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

def save_choice():
    if st.session_state.selected_answer is None:
        st.warning("Please select an answer before proceeding.")
        return
    
    sample = dataset.iloc[st.session_state.index]
    st.session_state.results.append({
        "username": st.session_state.username,
        "id": sample["id"],
        "video_id": sample["video_id"],
        "answer1": sample["answer1"],
        "answer2": sample["answer2"],
        "selected_answer": st.session_state.selected_answer,
        "target": sample["target"],
        "not_enough_info": st.session_state.not_enough_info
    })
    
    st.session_state.index += 1
    st.session_state.selected_answer = None
    st.session_state.not_enough_info = False
    
    if st.session_state.index >= len(dataset):
        st.write("### Great! You have completed your assignment. πŸŽ‰")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()
    return

sample = dataset.iloc[st.session_state.index]

st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")

st.markdown("""
### Instructions:
- Look at the video thumbnail, do not play it!
- Select one of the two answers.
- If you think the frame does not provide enough information, select the checkbox.
- Click 'Next' to proceed.
""")
st.write("---")

st.video(sample["video_url"])

st.markdown("<h4 style='text-align: center; margin-top: 20px;'>Scegli la descrizione corretta tra A e B</h4>", unsafe_allow_html=True)

col1, col2 = st.columns(2)
with col1:
    if st.button(f"A: {sample.get('answer1', 'No answer1 available')}"):
        st.session_state.selected_answer = 0
with col2:
    if st.button(f"B: {sample.get('answer2', 'No answer2 available')}"):
        st.session_state.selected_answer = 1

st.markdown("<div style='text-align: center;'>", unsafe_allow_html=True)
st.session_state.not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.")
st.markdown("</div>", unsafe_allow_html=True)
st.write("\n")

if st.button("Next"):
    save_choice()