giobin commited on
Commit
8823c0c
Β·
verified Β·
1 Parent(s): ec3d602

form implementation

Browse files
Files changed (1) hide show
  1. app.py +42 -45
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import os
4
  from huggingface_hub import HfApi, hf_hub_download
5
 
 
6
  HF_REPO = "giobin/MAIA_human_assessment_annotations"
7
  CSV_FILENAME = "user_selections.csv"
8
 
 
9
  def assign_samples(csv_path):
10
  df = pd.read_csv(csv_path)
11
  group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(10)
@@ -22,6 +23,7 @@ def assign_samples(csv_path):
22
  "Raffaella": group_3,
23
  }
24
 
 
25
  def load_existing_annotations():
26
  try:
27
  file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
@@ -29,42 +31,44 @@ def load_existing_annotations():
29
  except Exception:
30
  return pd.DataFrame(columns=["username", "id"])
31
 
 
32
  csv_file = "static/mc.csv"
33
  assignments = assign_samples(csv_file)
34
  existing_annotations = load_existing_annotations()
35
 
 
36
  valid_users = list(assignments.keys())
37
 
 
38
  if "username" not in st.session_state:
39
  st.session_state.username = None
40
  if "index" not in st.session_state:
41
  st.session_state.index = 0
42
  if "results" not in st.session_state:
43
  st.session_state.results = []
44
- if "selected_answer" not in st.session_state:
45
- st.session_state.selected_answer = None
46
- if "not_enough_info" not in st.session_state:
47
- st.session_state.not_enough_info = False
48
-
49
- def update_name():
50
- st.session_state.username = st.session_state.selected_user
51
- st.session_state.index = 0
52
 
 
53
  if st.session_state.username is None:
54
  with st.form("user_form"):
55
  st.write("### Select Your Name")
56
  selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
57
- submit_button = st.form_submit_button("Start", on_click=update_name)
 
 
 
58
  st.stop()
59
 
 
60
  full_dataset = assignments[st.session_state.username].reset_index(drop=True)
61
  user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
62
  dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
63
 
64
- if dataset.empty:
 
65
  st.write("### Great! You have completed your assignment. πŸŽ‰")
66
  st.stop()
67
 
 
68
  def push_to_hf_hub(csv_path):
69
  api = HfApi()
70
  try:
@@ -74,11 +78,8 @@ def push_to_hf_hub(csv_path):
74
  except Exception as e:
75
  print(f"Error pushing to HF: {e}")
76
 
77
- def save_choice():
78
- if st.session_state.selected_answer is None:
79
- st.warning("Please select an answer before proceeding.")
80
- return
81
-
82
  sample = dataset.iloc[st.session_state.index]
83
  st.session_state.results.append({
84
  "username": st.session_state.username,
@@ -86,59 +87,55 @@ def save_choice():
86
  "video_id": sample["video_id"],
87
  "answer1": sample["answer1"],
88
  "answer2": sample["answer2"],
89
- "selected_answer": st.session_state.selected_answer,
90
  "target": sample["target"],
91
- "not_enough_info": st.session_state.not_enough_info
92
  })
93
-
94
  st.session_state.index += 1
95
- st.session_state.selected_answer = None
96
- st.session_state.not_enough_info = False
97
-
98
  if st.session_state.index >= len(dataset):
99
  st.write("### Great! You have completed your assignment. πŸŽ‰")
100
  result_df = pd.DataFrame(st.session_state.results)
101
  csv_path = "user_selections.csv"
102
-
103
  if not existing_annotations.empty:
104
  result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
105
-
106
  result_df.to_csv(csv_path, index=False)
107
  push_to_hf_hub(csv_path)
108
  st.stop()
109
- return
110
 
 
111
  sample = dataset.iloc[st.session_state.index]
112
 
 
113
  st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
114
  st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
115
  st.write("\n\n")
116
 
 
117
  st.markdown("""
118
  ### Instructions:
119
- - Look at the video thumbnail, do not play it!
120
- - Select one of the two answers.
121
- - If you think the frame does not provide enough information, select the checkbox.
122
  - Click 'Next' to proceed.
123
  """)
124
  st.write("---")
125
 
 
126
  st.video(sample["video_url"])
127
 
128
- st.markdown("<h4 style='text-align: center; margin-top: 20px;'>Scegli la descrizione corretta tra A e B</h4>", unsafe_allow_html=True)
129
-
130
- col1, col2 = st.columns(2)
131
- with col1:
132
- if st.button(f"A: {sample.get('answer1', 'No answer1 available')}"):
133
- st.session_state.selected_answer = 0
134
- with col2:
135
- if st.button(f"B: {sample.get('answer2', 'No answer2 available')}"):
136
- st.session_state.selected_answer = 1
137
-
138
- st.markdown("<div style='text-align: center;'>", unsafe_allow_html=True)
139
- st.session_state.not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.")
140
- st.markdown("</div>", unsafe_allow_html=True)
141
- st.write("\n")
142
-
143
- if st.button("Next"):
144
- save_choice()
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  from huggingface_hub import HfApi, hf_hub_download
4
 
5
+ # Constants
6
  HF_REPO = "giobin/MAIA_human_assessment_annotations"
7
  CSV_FILENAME = "user_selections.csv"
8
 
9
+ # Function to assign samples to users
10
  def assign_samples(csv_path):
11
  df = pd.read_csv(csv_path)
12
  group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(10)
 
23
  "Raffaella": group_3,
24
  }
25
 
26
+ # Function to load existing annotations from Hugging Face Hub
27
  def load_existing_annotations():
28
  try:
29
  file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
 
31
  except Exception:
32
  return pd.DataFrame(columns=["username", "id"])
33
 
34
+ # Load datasets
35
  csv_file = "static/mc.csv"
36
  assignments = assign_samples(csv_file)
37
  existing_annotations = load_existing_annotations()
38
 
39
+ # Valid users
40
  valid_users = list(assignments.keys())
41
 
42
+ # Initialize session state variables
43
  if "username" not in st.session_state:
44
  st.session_state.username = None
45
  if "index" not in st.session_state:
46
  st.session_state.index = 0
47
  if "results" not in st.session_state:
48
  st.session_state.results = []
 
 
 
 
 
 
 
 
49
 
50
+ # User selection form
51
  if st.session_state.username is None:
52
  with st.form("user_form"):
53
  st.write("### Select Your Name")
54
  selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
55
+ submit_button = st.form_submit_button("Start")
56
+ if submit_button:
57
+ st.session_state.username = selected_user
58
+ st.session_state.index = 0
59
  st.stop()
60
 
61
+ # Retrieve assigned dataset and filter out already labeled samples
62
  full_dataset = assignments[st.session_state.username].reset_index(drop=True)
63
  user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
64
  dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
65
 
66
+ # Check if all samples are labeled
67
+ if st.session_state.index >= len(dataset):
68
  st.write("### Great! You have completed your assignment. πŸŽ‰")
69
  st.stop()
70
 
71
+ # Function to push updated annotations to Hugging Face Hub
72
  def push_to_hf_hub(csv_path):
73
  api = HfApi()
74
  try:
 
78
  except Exception as e:
79
  print(f"Error pushing to HF: {e}")
80
 
81
+ # Function to save user choice
82
+ def save_choice(selected_answer, not_enough_info):
 
 
 
83
  sample = dataset.iloc[st.session_state.index]
84
  st.session_state.results.append({
85
  "username": st.session_state.username,
 
87
  "video_id": sample["video_id"],
88
  "answer1": sample["answer1"],
89
  "answer2": sample["answer2"],
90
+ "selected_answer": selected_answer,
91
  "target": sample["target"],
92
+ "not_enough_info": not_enough_info
93
  })
 
94
  st.session_state.index += 1
95
+
96
+ # Save results and push to Hugging Face Hub if all samples are labeled
 
97
  if st.session_state.index >= len(dataset):
98
  st.write("### Great! You have completed your assignment. πŸŽ‰")
99
  result_df = pd.DataFrame(st.session_state.results)
100
  csv_path = "user_selections.csv"
 
101
  if not existing_annotations.empty:
102
  result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
 
103
  result_df.to_csv(csv_path, index=False)
104
  push_to_hf_hub(csv_path)
105
  st.stop()
 
106
 
107
+ # Display current sample
108
  sample = dataset.iloc[st.session_state.index]
109
 
110
+ # Page title and user information
111
  st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
112
  st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
113
  st.write("\n\n")
114
 
115
+ # Instructions
116
  st.markdown("""
117
  ### Instructions:
118
+ - Look at the video thumbnail; do not play it.
119
+ - Select the correct description (A or B).
120
+ - If the frame does not provide enough information to answer the question, select the checkbox.
121
  - Click 'Next' to proceed.
122
  """)
123
  st.write("---")
124
 
125
+ # Display video thumbnail
126
  st.video(sample["video_url"])
127
 
128
+ # Form for user input
129
+ with st.form("annotation_form"):
130
+ # Exclusive choice between A and B
131
+ selected_answer = st.radio(
132
+ "Choose the correct description:",
133
+ options=[0, 1],
134
+ format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
135
+ key="selected_answer"
136
+ )
137
+
138
+ # Independent checkbox for insufficient information
139
+ not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key="not_enough
140
+ ::contentReference[oaicite:12]{index=12}
141
+