Spaces:
Sleeping
Sleeping
form implementation
Browse files
app.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import os
|
4 |
from huggingface_hub import HfApi, hf_hub_download
|
5 |
|
|
|
6 |
HF_REPO = "giobin/MAIA_human_assessment_annotations"
|
7 |
CSV_FILENAME = "user_selections.csv"
|
8 |
|
|
|
9 |
def assign_samples(csv_path):
|
10 |
df = pd.read_csv(csv_path)
|
11 |
group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(10)
|
@@ -22,6 +23,7 @@ def assign_samples(csv_path):
|
|
22 |
"Raffaella": group_3,
|
23 |
}
|
24 |
|
|
|
25 |
def load_existing_annotations():
|
26 |
try:
|
27 |
file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
|
@@ -29,42 +31,44 @@ def load_existing_annotations():
|
|
29 |
except Exception:
|
30 |
return pd.DataFrame(columns=["username", "id"])
|
31 |
|
|
|
32 |
csv_file = "static/mc.csv"
|
33 |
assignments = assign_samples(csv_file)
|
34 |
existing_annotations = load_existing_annotations()
|
35 |
|
|
|
36 |
valid_users = list(assignments.keys())
|
37 |
|
|
|
38 |
if "username" not in st.session_state:
|
39 |
st.session_state.username = None
|
40 |
if "index" not in st.session_state:
|
41 |
st.session_state.index = 0
|
42 |
if "results" not in st.session_state:
|
43 |
st.session_state.results = []
|
44 |
-
if "selected_answer" not in st.session_state:
|
45 |
-
st.session_state.selected_answer = None
|
46 |
-
if "not_enough_info" not in st.session_state:
|
47 |
-
st.session_state.not_enough_info = False
|
48 |
-
|
49 |
-
def update_name():
|
50 |
-
st.session_state.username = st.session_state.selected_user
|
51 |
-
st.session_state.index = 0
|
52 |
|
|
|
53 |
if st.session_state.username is None:
|
54 |
with st.form("user_form"):
|
55 |
st.write("### Select Your Name")
|
56 |
selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
|
57 |
-
submit_button = st.form_submit_button("Start"
|
|
|
|
|
|
|
58 |
st.stop()
|
59 |
|
|
|
60 |
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
|
61 |
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
|
62 |
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
|
63 |
|
64 |
-
if
|
|
|
65 |
st.write("### Great! You have completed your assignment. π")
|
66 |
st.stop()
|
67 |
|
|
|
68 |
def push_to_hf_hub(csv_path):
|
69 |
api = HfApi()
|
70 |
try:
|
@@ -74,11 +78,8 @@ def push_to_hf_hub(csv_path):
|
|
74 |
except Exception as e:
|
75 |
print(f"Error pushing to HF: {e}")
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
st.warning("Please select an answer before proceeding.")
|
80 |
-
return
|
81 |
-
|
82 |
sample = dataset.iloc[st.session_state.index]
|
83 |
st.session_state.results.append({
|
84 |
"username": st.session_state.username,
|
@@ -86,59 +87,55 @@ def save_choice():
|
|
86 |
"video_id": sample["video_id"],
|
87 |
"answer1": sample["answer1"],
|
88 |
"answer2": sample["answer2"],
|
89 |
-
"selected_answer":
|
90 |
"target": sample["target"],
|
91 |
-
"not_enough_info":
|
92 |
})
|
93 |
-
|
94 |
st.session_state.index += 1
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
if st.session_state.index >= len(dataset):
|
99 |
st.write("### Great! You have completed your assignment. π")
|
100 |
result_df = pd.DataFrame(st.session_state.results)
|
101 |
csv_path = "user_selections.csv"
|
102 |
-
|
103 |
if not existing_annotations.empty:
|
104 |
result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
|
105 |
-
|
106 |
result_df.to_csv(csv_path, index=False)
|
107 |
push_to_hf_hub(csv_path)
|
108 |
st.stop()
|
109 |
-
return
|
110 |
|
|
|
111 |
sample = dataset.iloc[st.session_state.index]
|
112 |
|
|
|
113 |
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
|
114 |
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
|
115 |
st.write("\n\n")
|
116 |
|
|
|
117 |
st.markdown("""
|
118 |
### Instructions:
|
119 |
-
- Look at the video thumbnail
|
120 |
-
- Select
|
121 |
-
- If
|
122 |
- Click 'Next' to proceed.
|
123 |
""")
|
124 |
st.write("---")
|
125 |
|
|
|
126 |
st.video(sample["video_url"])
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
if st.button("Next"):
|
144 |
-
save_choice()
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
from huggingface_hub import HfApi, hf_hub_download
|
4 |
|
5 |
+
# Constants
|
6 |
HF_REPO = "giobin/MAIA_human_assessment_annotations"
|
7 |
CSV_FILENAME = "user_selections.csv"
|
8 |
|
9 |
+
# Function to assign samples to users
|
10 |
def assign_samples(csv_path):
|
11 |
df = pd.read_csv(csv_path)
|
12 |
group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(10)
|
|
|
23 |
"Raffaella": group_3,
|
24 |
}
|
25 |
|
26 |
+
# Function to load existing annotations from Hugging Face Hub
|
27 |
def load_existing_annotations():
|
28 |
try:
|
29 |
file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
|
|
|
31 |
except Exception:
|
32 |
return pd.DataFrame(columns=["username", "id"])
|
33 |
|
34 |
+
# Load datasets
|
35 |
csv_file = "static/mc.csv"
|
36 |
assignments = assign_samples(csv_file)
|
37 |
existing_annotations = load_existing_annotations()
|
38 |
|
39 |
+
# Valid users
|
40 |
valid_users = list(assignments.keys())
|
41 |
|
42 |
+
# Initialize session state variables
|
43 |
if "username" not in st.session_state:
|
44 |
st.session_state.username = None
|
45 |
if "index" not in st.session_state:
|
46 |
st.session_state.index = 0
|
47 |
if "results" not in st.session_state:
|
48 |
st.session_state.results = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
# User selection form
|
51 |
if st.session_state.username is None:
|
52 |
with st.form("user_form"):
|
53 |
st.write("### Select Your Name")
|
54 |
selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
|
55 |
+
submit_button = st.form_submit_button("Start")
|
56 |
+
if submit_button:
|
57 |
+
st.session_state.username = selected_user
|
58 |
+
st.session_state.index = 0
|
59 |
st.stop()
|
60 |
|
61 |
+
# Retrieve assigned dataset and filter out already labeled samples
|
62 |
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
|
63 |
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
|
64 |
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
|
65 |
|
66 |
+
# Check if all samples are labeled
|
67 |
+
if st.session_state.index >= len(dataset):
|
68 |
st.write("### Great! You have completed your assignment. π")
|
69 |
st.stop()
|
70 |
|
71 |
+
# Function to push updated annotations to Hugging Face Hub
|
72 |
def push_to_hf_hub(csv_path):
|
73 |
api = HfApi()
|
74 |
try:
|
|
|
78 |
except Exception as e:
|
79 |
print(f"Error pushing to HF: {e}")
|
80 |
|
81 |
+
# Function to save user choice
|
82 |
+
def save_choice(selected_answer, not_enough_info):
|
|
|
|
|
|
|
83 |
sample = dataset.iloc[st.session_state.index]
|
84 |
st.session_state.results.append({
|
85 |
"username": st.session_state.username,
|
|
|
87 |
"video_id": sample["video_id"],
|
88 |
"answer1": sample["answer1"],
|
89 |
"answer2": sample["answer2"],
|
90 |
+
"selected_answer": selected_answer,
|
91 |
"target": sample["target"],
|
92 |
+
"not_enough_info": not_enough_info
|
93 |
})
|
|
|
94 |
st.session_state.index += 1
|
95 |
+
|
96 |
+
# Save results and push to Hugging Face Hub if all samples are labeled
|
|
|
97 |
if st.session_state.index >= len(dataset):
|
98 |
st.write("### Great! You have completed your assignment. π")
|
99 |
result_df = pd.DataFrame(st.session_state.results)
|
100 |
csv_path = "user_selections.csv"
|
|
|
101 |
if not existing_annotations.empty:
|
102 |
result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
|
|
|
103 |
result_df.to_csv(csv_path, index=False)
|
104 |
push_to_hf_hub(csv_path)
|
105 |
st.stop()
|
|
|
106 |
|
107 |
+
# Display current sample
|
108 |
sample = dataset.iloc[st.session_state.index]
|
109 |
|
110 |
+
# Page title and user information
|
111 |
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
|
112 |
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
|
113 |
st.write("\n\n")
|
114 |
|
115 |
+
# Instructions
|
116 |
st.markdown("""
|
117 |
### Instructions:
|
118 |
+
- Look at the video thumbnail; do not play it.
|
119 |
+
- Select the correct description (A or B).
|
120 |
+
- If the frame does not provide enough information to answer the question, select the checkbox.
|
121 |
- Click 'Next' to proceed.
|
122 |
""")
|
123 |
st.write("---")
|
124 |
|
125 |
+
# Display video thumbnail
|
126 |
st.video(sample["video_url"])
|
127 |
|
128 |
+
# Form for user input
|
129 |
+
with st.form("annotation_form"):
|
130 |
+
# Exclusive choice between A and B
|
131 |
+
selected_answer = st.radio(
|
132 |
+
"Choose the correct description:",
|
133 |
+
options=[0, 1],
|
134 |
+
format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
|
135 |
+
key="selected_answer"
|
136 |
+
)
|
137 |
+
|
138 |
+
# Independent checkbox for insufficient information
|
139 |
+
not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key="not_enough
|
140 |
+
::contentReference[oaicite:12]{index=12}
|
141 |
+
|
|
|
|
|
|