Spaces:

giobin
/

MAIA_human_assessment

Sleeping

App Files Files Community

giovanni-bonetta commited on Feb 20

Commit

138d0d5

1 Parent(s): ff62d04

update gen and mc files, and app.py

Browse files

Files changed (3) hide show

app.py +117 -15
static/gen.csv +0 -0
static/mc.csv +0 -0

app.py CHANGED Viewed

@@ -1,21 +1,123 @@
 import streamlit as st
-from datasets import load_dataset
-import tempfile
-# Load the dataset in streaming mode
-dataset = load_dataset("giobin/MAIA_2400", streaming=True)
-# Get an iterator over the dataset (assuming it's not too large)
-dataset_iter = iter(dataset["train"])  # Adjust split name if needed
-# Fetch the first video example
-video_example = next(dataset_iter)
-video_reader = video_example["video"]  # Decord VideoReader object
-# Save the video temporarily
-with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
-    tmp_file.write(video_reader[:].asnumpy().tobytes())  # Convert frames to bytes
-    video_path = tmp_file.name
-# Display the video
-st.video(video_path)

 import streamlit as st
+import pandas as pd
+import os
+from huggingface_hub import HfApi, hf_hub_download
+HF_REPO = "giobin/MAIA_human_assessment_annotations"
+CSV_FILENAME = "user_selections.csv"
+def assign_samples(csv_path):
+    df = pd.read_csv(csv_path)
+    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(3)
+    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(3)
+    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(3)
+    return {
+        "Bernardo": group_1,
+        "Alessandro": group_1,
+        "Alessio": group_1,
+        "Lenci": group_2,
+        "Lucia": group_2,
+        "Davide": group_2,
+        "Giovanni": group_3,
+        "Raffaella": group_3,
+    }
+def load_existing_annotations():
+    """Load the existing annotations from the HF dataset."""
+    try:
+        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset")
+        return pd.read_csv(file_path)
+    except Exception:
+        return pd.DataFrame(columns=["username", "id"])  # Return empty DataFrame if not found
+# Load datasets
+csv_file = "static/mc.csv"
+assignments = assign_samples(csv_file)
+existing_annotations = load_existing_annotations()
+valid_users = list(assignments.keys())
+# Initialize session state
+if "username" not in st.session_state:
+    st.session_state.username = None
+if "index" not in st.session_state:
+    st.session_state.index = 0
+if "results" not in st.session_state:
+    st.session_state.results = []
+def update_name():
+    """Set username and reset index."""
+    st.session_state.username = st.session_state.selected_user
+    st.session_state.index = 0  # Reset progress
+if st.session_state.username is None:
+    with st.form("user_form"):
+        st.write("### Select Your Name")
+        selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
+        submit_button = st.form_submit_button("Start", on_click=update_name)
+    st.stop()
+# Get assigned dataset and remove already labeled samples
+full_dataset = assignments[st.session_state.username].reset_index(drop=True)
+user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
+dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
+# If all samples are labeled, stop execution
+if dataset.empty:
+    st.write("### Great! You have completed your assignment. 🎉")
+    st.stop()
+def push_to_hf_hub(csv_path):
+    api = HfApi()
+    try:
+        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True)
+        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset")
+        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
+    except Exception as e:
+        print(f"Error pushing to HF: {e}")
+def save_choice(choice_index):
+    sample = dataset.iloc[st.session_state.index]
+    st.session_state.results.append({
+        "username": st.session_state.username,
+        "id": sample["id"],
+        "video_id": sample["video_id"],
+        "answer1": sample["answer1"],
+        "answer2": sample["answer2"],
+        "selected_answer": choice_index,
+        "target": sample["target"],
+        "not_enough_info": not_enough_info
+    })
+    st.session_state.index += 1
+    st.session_state.checkbox = False # reset the checkbox
+    if st.session_state.index >= len(dataset):  # All remaining samples done
+        st.write("### Great! You have completed your assignment. 🎉")
+        result_df = pd.DataFrame(st.session_state.results)
+        csv_path = "user_selections.csv"
+        if not existing_annotations.empty:
+            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
+        result_df.to_csv(csv_path, index=False)
+        push_to_hf_hub(csv_path)
+        st.stop()
+    return
+# Select the current sample
+sample = dataset.iloc[st.session_state.index]
+# Display content
+st.write(f"## Video Question Answering Sample - User: {st.session_state.username}")
+st.write(f"**Question Category:** {sample.get('question_category', 'No category available')}")
+st.video(sample["video_url"])
+# Checkbox for uncertainty
+not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key='checkbox')
+# Buttons
+st.button(f"Select Answer 1: {sample.get('answer1', 'No answer1 available')}", on_click=lambda: save_choice(0))
+st.button(f"Select Answer 2: {sample.get('answer2', 'No answer2 available')}", on_click=lambda: save_choice(1))

static/gen.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

static/mc.csv ADDED Viewed

The diff for this file is too large to render. See raw diff