Spaces:
Sleeping
Sleeping
File size: 5,380 Bytes
d9cbdf1 138d0d5 d9cbdf1 138d0d5 57bf5d5 138d0d5 ec3d602 138d0d5 ff62d04 138d0d5 d88bb9d 138d0d5 ccaeded 138d0d5 ccaeded 138d0d5 ccaeded 138d0d5 d88bb9d 138d0d5 ccaeded 138d0d5 ccaeded 138d0d5 ccaeded 138d0d5 ccaeded 138d0d5 ccaeded 138d0d5 60f83a0 19ec3d7 ccaeded 19ec3d7 60f83a0 ccaeded 60f83a0 ccaeded 19ec3d7 138d0d5 19ec3d7 ccaeded 19ec3d7 ccaeded |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import streamlit as st
import pandas as pd
import os
from huggingface_hub import HfApi, hf_hub_download
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"
def assign_samples(csv_path):
df = pd.read_csv(csv_path)
group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(10)
group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(10)
group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(10)
return {
"Bernardo": group_1,
"Alessandro": group_1,
"Alessio": group_1,
"Lenci": group_2,
"Lucia": group_2,
"Davide": group_2,
"Giovanni": group_3,
"Raffaella": group_3,
}
def load_existing_annotations():
try:
file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
return pd.read_csv(file_path)
except Exception:
return pd.DataFrame(columns=["username", "id"])
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()
valid_users = list(assignments.keys())
if "username" not in st.session_state:
st.session_state.username = None
if "index" not in st.session_state:
st.session_state.index = 0
if "results" not in st.session_state:
st.session_state.results = []
if "selected_answer" not in st.session_state:
st.session_state.selected_answer = None
if "not_enough_info" not in st.session_state:
st.session_state.not_enough_info = False
def update_name():
st.session_state.username = st.session_state.selected_user
st.session_state.index = 0
if st.session_state.username is None:
with st.form("user_form"):
st.write("### Select Your Name")
selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
submit_button = st.form_submit_button("Start", on_click=update_name)
st.stop()
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
if dataset.empty:
st.write("### Great! You have completed your assignment. π")
st.stop()
def push_to_hf_hub(csv_path):
api = HfApi()
try:
api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
except Exception as e:
print(f"Error pushing to HF: {e}")
def save_choice():
if st.session_state.selected_answer is None:
st.warning("Please select an answer before proceeding.")
return
sample = dataset.iloc[st.session_state.index]
st.session_state.results.append({
"username": st.session_state.username,
"id": sample["id"],
"video_id": sample["video_id"],
"answer1": sample["answer1"],
"answer2": sample["answer2"],
"selected_answer": st.session_state.selected_answer,
"target": sample["target"],
"not_enough_info": st.session_state.not_enough_info
})
st.session_state.index += 1
st.session_state.selected_answer = None
st.session_state.not_enough_info = False
if st.session_state.index >= len(dataset):
st.write("### Great! You have completed your assignment. π")
result_df = pd.DataFrame(st.session_state.results)
csv_path = "user_selections.csv"
if not existing_annotations.empty:
result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
result_df.to_csv(csv_path, index=False)
push_to_hf_hub(csv_path)
st.stop()
return
sample = dataset.iloc[st.session_state.index]
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")
st.markdown("""
### Instructions:
- Look at the video thumbnail, do not play it!
- Select one of the two answers.
- If you think the frame does not provide enough information, select the checkbox.
- Click 'Next' to proceed.
""")
st.write("---")
st.video(sample["video_url"])
st.markdown("<h4 style='text-align: center; margin-top: 20px;'>Scegli la descrizione corretta tra A e B</h4>", unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
if st.button(f"A: {sample.get('answer1', 'No answer1 available')}"):
st.session_state.selected_answer = 0
with col2:
if st.button(f"B: {sample.get('answer2', 'No answer2 available')}"):
st.session_state.selected_answer = 1
st.markdown("<div style='text-align: center;'>", unsafe_allow_html=True)
st.session_state.not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.")
st.markdown("</div>", unsafe_allow_html=True)
st.write("\n")
if st.button("Next"):
save_choice()
|