Spaces:
Sleeping
Sleeping
File size: 4,662 Bytes
d9cbdf1 138d0d5 d9cbdf1 138d0d5 57bf5d5 138d0d5 57bf5d5 138d0d5 57bf5d5 138d0d5 ff62d04 138d0d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
import pandas as pd
import os
from huggingface_hub import HfApi, hf_hub_download
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"
def assign_samples(csv_path):
df = pd.read_csv(csv_path)
group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(3)
group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(3)
group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(3)
return {
"Bernardo": group_1,
"Alessandro": group_1,
"Alessio": group_1,
"Lenci": group_2,
"Lucia": group_2,
"Davide": group_2,
"Giovanni": group_3,
"Raffaella": group_3,
}
def load_existing_annotations():
"""Load the existing annotations from the HF dataset."""
try:
file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset")
return pd.read_csv(file_path)
except Exception:
return pd.DataFrame(columns=["username", "id"]) # Return empty DataFrame if not found
# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()
valid_users = list(assignments.keys())
# Initialize session state
if "username" not in st.session_state:
st.session_state.username = None
if "index" not in st.session_state:
st.session_state.index = 0
if "results" not in st.session_state:
st.session_state.results = []
def update_name():
"""Set username and reset index."""
st.session_state.username = st.session_state.selected_user
st.session_state.index = 0 # Reset progress
if st.session_state.username is None:
with st.form("user_form"):
st.write("### Select Your Name")
selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
submit_button = st.form_submit_button("Start", on_click=update_name)
st.stop()
# Get assigned dataset and remove already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
# If all samples are labeled, stop execution
if dataset.empty:
st.write("### Great! You have completed your assignment. π")
st.stop()
def push_to_hf_hub(csv_path):
api = HfApi()
try:
api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True)
api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset")
print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
except Exception as e:
print(f"Error pushing to HF: {e}")
def save_choice(choice_index):
sample = dataset.iloc[st.session_state.index]
st.session_state.results.append({
"username": st.session_state.username,
"id": sample["id"],
"video_id": sample["video_id"],
"answer1": sample["answer1"],
"answer2": sample["answer2"],
"selected_answer": choice_index,
"target": sample["target"],
"not_enough_info": not_enough_info
})
st.session_state.index += 1
st.session_state.checkbox = False # reset the checkbox
if st.session_state.index >= len(dataset): # All remaining samples done
st.write("### Great! You have completed your assignment. π")
result_df = pd.DataFrame(st.session_state.results)
csv_path = "user_selections.csv"
if not existing_annotations.empty:
result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
result_df.to_csv(csv_path, index=False)
push_to_hf_hub(csv_path)
st.stop()
return
# Select the current sample
sample = dataset.iloc[st.session_state.index]
# Display content
st.write(f"## Video Question Answering Sample - User: {st.session_state.username}")
st.write(f"**Question Category:** {sample.get('question_category', 'No category available')}")
st.video(sample["video_url"])
# Checkbox for uncertainty
not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key='checkbox')
# Buttons
st.button(f"Select Answer 1: {sample.get('answer1', 'No answer1 available')}", on_click=lambda: save_choice(0))
st.button(f"Select Answer 2: {sample.get('answer2', 'No answer2 available')}", on_click=lambda: save_choice(1))
|