File size: 4,662 Bytes
d9cbdf1
138d0d5
 
 
d9cbdf1
138d0d5
 
57bf5d5
138d0d5
 
57bf5d5
138d0d5
 
 
57bf5d5
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
138d0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
import pandas as pd
import os
from huggingface_hub import HfApi, hf_hub_download

HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

def assign_samples(csv_path):
    df = pd.read_csv(csv_path)

    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(3)
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(3)
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(3)

    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

def load_existing_annotations():
    """Load the existing annotations from the HF dataset."""
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset")
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])  # Return empty DataFrame if not found

# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

valid_users = list(assignments.keys())

# Initialize session state
if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []

def update_name():
    """Set username and reset index."""
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0  # Reset progress

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Select Your Name")
        selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Start", on_click=update_name)
    st.stop()

# Get assigned dataset and remove already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

# If all samples are labeled, stop execution
if dataset.empty:
    st.write("### Great! You have completed your assignment. πŸŽ‰")
    st.stop()

def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True)
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset")
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

def save_choice(choice_index):
    sample = dataset.iloc[st.session_state.index]
    st.session_state.results.append({
        "username": st.session_state.username,
        "id": sample["id"],
        "video_id": sample["video_id"],
        "answer1": sample["answer1"],
        "answer2": sample["answer2"],
        "selected_answer": choice_index,
        "target": sample["target"],
        "not_enough_info": not_enough_info
    })
    
    st.session_state.index += 1
    st.session_state.checkbox = False # reset the checkbox
    
    if st.session_state.index >= len(dataset):  # All remaining samples done
        st.write("### Great! You have completed your assignment. πŸŽ‰")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()
    return

# Select the current sample
sample = dataset.iloc[st.session_state.index]

# Display content
st.write(f"## Video Question Answering Sample - User: {st.session_state.username}")
st.write(f"**Question Category:** {sample.get('question_category', 'No category available')}")
st.video(sample["video_url"])

# Checkbox for uncertainty
not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key='checkbox')

# Buttons
st.button(f"Select Answer 1: {sample.get('answer1', 'No answer1 available')}", on_click=lambda: save_choice(0))
st.button(f"Select Answer 2: {sample.get('answer2', 'No answer2 available')}", on_click=lambda: save_choice(1))