File size: 5,840 Bytes
d9cbdf1
138d0d5
 
d9cbdf1
8823c0c
138d0d5
 
57bf5d5
8823c0c
138d0d5
 
fadec32
 
 
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
8823c0c
138d0d5
 
d88bb9d
138d0d5
 
ccaeded
138d0d5
8823c0c
138d0d5
 
 
 
8823c0c
138d0d5
 
8823c0c
138d0d5
 
 
 
 
 
 
fadec32
 
 
 
 
 
138d0d5
 
4a58e4d
 
 
138d0d5
 
8823c0c
138d0d5
 
 
 
8823c0c
 
4a58e4d
138d0d5
 
8823c0c
138d0d5
 
 
d88bb9d
 
138d0d5
 
 
 
8823c0c
31b5d12
138d0d5
31b5d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8823c0c
 
ccaeded
4a58e4d
138d0d5
 
 
 
 
 
 
 
8823c0c
138d0d5
 
8823c0c
60f83a0
19ec3d7
ccaeded
19ec3d7
8823c0c
60f83a0
4a58e4d
 
 
 
 
60f83a0
ccaeded
19ec3d7
8823c0c
138d0d5
 
8823c0c
 
 
 
 
 
4a58e4d
8823c0c
 
 
 
 
4a58e4d
31b5d12
 
4a58e4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import streamlit as st
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download

# Constants
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

# Function to assign samples to users
def assign_samples(csv_path):
    df = pd.read_csv(csv_path)
    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(5)
    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

# Function to load existing annotations from Hugging Face Hub
def load_existing_annotations():
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])

# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

# Valid users
valid_users = list(assignments.keys())

# Initialize session state variables
if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []

# User selectiontion
def update_name():
    """Set username and reset index."""
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0  # Reset progress

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Seleziona il tuo nome")
        selected_user = st.selectbox("Nome:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Inizia il task", on_click=update_name)
    st.stop()

# Retrieve assigned dataset and filter out already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

# Check if all samples are labeled
if st.session_state.index >= len(dataset):
    st.write("### Ottimo. Hai completato il tuo task! 🎉")
    st.stop()

# Function to push updated annotations to Hugging Face Hub
def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

# Function to save user choice
def save_choice():
    sample = dataset.iloc[st.session_state.index]
    selected_answer = st.session_state.get("selected_answer", None)
    not_enough_info = st.session_state.get("not_enough_info", False)
    
    if selected_answer is not None:
        st.session_state.results.append({
            "username": st.session_state.username,
            "id": sample["id"],
            "video_id": sample["video_id"],
            "answer1": sample["answer1"],
            "answer2": sample["answer2"],
            "selected_answer": selected_answer,
            "target": sample["target"],
            "not_enough_info": not_enough_info
        })
        st.session_state.index += 1

    # Save results and push to Hugging Face Hub if all samples are labeled
    if st.session_state.index >= len(dataset):
        st.write("### Ottimo. Hai completato il tuo task! 🎉")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()

# Display current sample
sample = dataset.iloc[st.session_state.index]

# Page title and user information
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")

# Instructions
st.markdown("""
### Istruzioni:
- Osserva attentamente il frame del video (senza premere play) e cerca di capire il contesto della scena
- Valuta le opzioni (A e B) e seleziona quella che ritieni più attinente al video.
- Se il frame non contiene tutte le informazioni necessarie per decidere con certezza l’alternativa, seleziona il checkbox sottostante. 
- Clicca 'Continua' per procedere.
""")
st.write("---")

# Display video thumbnail
st.video(sample["video_url"])

# Form for user input
with st.form("annotation_form"):
    # Exclusive choice between A and B
    selected_answer = st.radio(
        "Choose the correct description:",
        options=[0, 1],
        index=None,
        format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
        key="selected_answer"
    )

    # Independent checkbox for insufficient information
    not_enough_info = st.checkbox("Il frame non contiene abbastanza informazioni per scegliere con sicurezza", key="not_enough_info")

    # Submit button
    submit_button = st.form_submit_button("Continua", on_click=save_choice)