Spaces:
Sleeping
Sleeping
File size: 5,011 Bytes
d9cbdf1 138d0d5 d9cbdf1 138d0d5 57bf5d5 138d0d5 57bf5d5 138d0d5 57bf5d5 138d0d5 ff62d04 138d0d5 d88bb9d 138d0d5 d88bb9d 138d0d5 60f83a0 138d0d5 60f83a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
import pandas as pd
import os
from huggingface_hub import HfApi, hf_hub_download
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"
def assign_samples(csv_path):
df = pd.read_csv(csv_path)
group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(3)
group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(3)
group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(3)
return {
"Bernardo": group_1,
"Alessandro": group_1,
"Alessio": group_1,
"Lenci": group_2,
"Lucia": group_2,
"Davide": group_2,
"Giovanni": group_3,
"Raffaella": group_3,
}
def load_existing_annotations():
"""Load the existing annotations from the HF dataset."""
try:
file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
return pd.read_csv(file_path)
except Exception:
return pd.DataFrame(columns=["username", "id"]) # Return empty DataFrame if not found
# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()
valid_users = list(assignments.keys())
# Initialize session state
if "username" not in st.session_state:
st.session_state.username = None
if "index" not in st.session_state:
st.session_state.index = 0
if "results" not in st.session_state:
st.session_state.results = []
def update_name():
"""Set username and reset index."""
st.session_state.username = st.session_state.selected_user
st.session_state.index = 0 # Reset progress
if st.session_state.username is None:
with st.form("user_form"):
st.write("### Select Your Name")
selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
submit_button = st.form_submit_button("Start", on_click=update_name)
st.stop()
# Get assigned dataset and remove already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
# If all samples are labeled, stop execution
if dataset.empty:
st.write("### Great! You have completed your assignment. π")
st.stop()
def push_to_hf_hub(csv_path):
api = HfApi()
try:
api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
except Exception as e:
print(f"Error pushing to HF: {e}")
def save_choice(choice_index):
sample = dataset.iloc[st.session_state.index]
st.session_state.results.append({
"username": st.session_state.username,
"id": sample["id"],
"video_id": sample["video_id"],
"answer1": sample["answer1"],
"answer2": sample["answer2"],
"selected_answer": choice_index,
"target": sample["target"],
"not_enough_info": not_enough_info
})
st.session_state.index += 1
st.session_state.checkbox = False # reset the checkbox
if st.session_state.index >= len(dataset): # All remaining samples done
st.write("### Great! You have completed your assignment. π")
result_df = pd.DataFrame(st.session_state.results)
csv_path = "user_selections.csv"
if not existing_annotations.empty:
result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
result_df.to_csv(csv_path, index=False)
push_to_hf_hub(csv_path)
st.stop()
return
# Select the current sample
sample = dataset.iloc[st.session_state.index]
# Display content
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.write(f"### User: {st.session_state.username}")
st.write("\n\n\n") # Add empty space
st.markdown("""
### Instructions:
- Look at the video thumbnail, do not play it!
- Select the checkbox if you think so.
- Then choose one of the two answers.
""")
st.write(f"**Question Category:** {sample.get('question_category', 'No category available')}")
st.video(sample["video_url"])
# Checkbox for uncertainty
not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key='checkbox')
# Buttons
st.button(f"Answer 1: {sample.get('answer1', 'No answer1 available')}", on_click=lambda: save_choice(0))
st.button(f"Answer 2: {sample.get('answer2', 'No answer2 available')}", on_click=lambda: save_choice(1))
|