Spaces:

saridormi
/

labeling-template

Sleeping

App Files Files Community

saridormi commited on Mar 26

Commit

1060d5d

1 Parent(s): f053717

fix the app

Browse files

Files changed (2) hide show

.env +1 -0
app.py +199 -79

.env CHANGED Viewed

@@ -4,5 +4,6 @@ HF_INPUT_DATASET_PATH="commit_message_generation/predictions/o1-preview-2024-09-
 HF_INPUT_DATASET_ID_COLUMN="hash"
 HF_INPUT_DATASET_COLUMN_A="reference"
 HF_INPUT_DATASET_COLUMN_B="prediction"
 HF_OUTPUT_DATASET="saridormi/labels"
 HF_OUTPUT_DATASET_DIR="cmg"

 HF_INPUT_DATASET_ID_COLUMN="hash"
 HF_INPUT_DATASET_COLUMN_A="reference"
 HF_INPUT_DATASET_COLUMN_B="prediction"
+#HF_INPUT_DATASET_URL_COLUMN="hash" <-- optional
 HF_OUTPUT_DATASET="saridormi/labels"
 HF_OUTPUT_DATASET_DIR="cmg"

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import datetime
 import logging
 from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
 from dotenv import load_dotenv
 load_dotenv()
@@ -15,15 +16,16 @@ HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH")
 HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
 HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
 HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
 HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
 HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
 INSTRUCTIONS = """
 # Pairwise Model Output Labeling
 Please compare the two model outputs shown below and select which one you think is better.
-- Choose "Left is better" if the left output is superior
-- Choose "Right is better" if the right output is superior
-- Choose "Tie" if they are equally good or bad
 - Choose "Can't choose" if you cannot make a determination
 """
 SAVE_EVERY_N_EXAMPLES = 5
@@ -31,9 +33,10 @@ SAVE_EVERY_N_EXAMPLES = 5
 class PairwiseLabeler:
     def __init__(self):
-        self.current_index = 0
-        self.results = []
         self.df = self.read_hf_dataset()
     def __len__(self):
         return len(self.df)
@@ -59,94 +62,137 @@ class PairwiseLabeler:
                 HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
                 HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
             }
             return pd.DataFrame(sample_data)
-    def get_current_pair(self):
-        if self.current_index >= len(self.df):
-            return None, None, None
-        item = self.df.iloc[self.current_index]
-        item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index}")
         left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
         right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
-        return item_id, left_text, right_text
-    def submit_judgment(self, item_id, left_text, right_text, choice):
         if item_id is None:
-            return item_id, left_text, right_text, self.current_index
         # Record the judgment
         result = {
             "item_id": item_id,
-            "generation_a": left_text,
-            "generation_b": right_text,
             "judgment": choice,
             "timestamp": datetime.datetime.now().isoformat(),
-            "labeler_id": str(uuid.uuid4())[:8]  # Anonymous ID for the labeling session
         }
-        self.results.append(result)
         # Move to next item
-        self.current_index += 1
         # Save results periodically
-        if len(self.results) % SAVE_EVERY_N_EXAMPLES == 0:
-            self.save_results()
         # Get next pair
-        next_id, next_left, next_right = self.get_current_pair()
-        return next_id, next_left, next_right, self.current_index
-    def save_results(self):
-        if not self.results:
             return
         try:
             # Convert results to dataset format
-            results_df = pd.DataFrame(self.results)
             results_df.to_json("temp.jsonl", orient="records", lines=True)
             # Push to Hugging Face Hub
             try:
-                num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR)])
             except Exception as e:
                 num_files = 0
-            upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, f"results_{num_files+1}.jsonl"), path_or_fileobj="temp.jsonl")
             os.remove("temp.jsonl")
-            self.results = []
-            logging.info(f"Saved {len(self.results)} results to {HF_OUTPUT_DATASET}")
         except Exception as e:
             logging.error(f"Error saving results: {e}")
 # Initialize the labeler
 labeler = PairwiseLabeler()
-# Get the first pair
-initial_id, initial_left, initial_right = labeler.get_current_pair()
 with gr.Blocks() as app:
     gr.Markdown(INSTRUCTIONS)
     with gr.Row():
         with gr.Column():
             left_output = gr.Textbox(
-                value=initial_left,
-                label="Model Output A",
                 lines=10,
                 interactive=False
             )
         with gr.Column():
             right_output = gr.Textbox(
-                value=initial_right,
-                label="Model Output B",
                 lines=10,
                 interactive=False
             )
-    item_id = gr.Textbox(value=initial_id, visible=False)
     with gr.Row():
         left_btn = gr.Button("⬅️ A is better", variant="primary")
@@ -155,55 +201,129 @@ with gr.Blocks() as app:
         cant_choose_btn = gr.Button("🤔 Can't choose")
     current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
-                                   value=labeler.current_index,
-                                   interactive=False,
-                                   label='sample_ind',
-                                   info=f"Samples labeled (out of {len(labeler)})",
-                                   show_label=False,
-                                   container=False,
-                                   scale=5)
-    def judge_left(item_id, left_text, right_text):
-        return judge("A is better", item_id, left_text, right_text)
-    def judge_right(item_id, left_text, right_text):
-        return judge("B is better", item_id, left_text, right_text)
-    def judge_tie(item_id, left_text, right_text):
-        return judge("Tie", item_id, left_text, right_text)
-    def judge_cant_choose(item_id, left_text, right_text):
-        return judge("Can't choose", item_id, left_text, right_text)
-    def judge(choice, item_id, left_text, right_text):
-        new_id, new_left, new_right, new_index = labeler.submit_judgment(
-            item_id, left_text, right_text, choice
         )
-        return new_id, new_left, new_right, new_index
-    left_btn.click(
-        judge_left,
-        inputs=[item_id, left_output, right_output],
-        outputs=[item_id, left_output, right_output, current_sample_sld]
-    )
-    right_btn.click(
-        judge_right,
-        inputs=[item_id, left_output, right_output],
-        outputs=[item_id, left_output, right_output, current_sample_sld]
-    )
-    tie_btn.click(
-        judge_tie,
-        inputs=[item_id, left_output, right_output],
-        outputs=[item_id, left_output, right_output, current_sample_sld]
-    )
-    cant_choose_btn.click(
-        judge_cant_choose,
-        inputs=[item_id, left_output, right_output],
-        outputs=[item_id, left_output, right_output, current_sample_sld]
-    )
 if __name__ == "__main__":
     app.launch()

 import logging
 from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
 from dotenv import load_dotenv
+from collections import defaultdict
 load_dotenv()
 HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
 HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
 HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
+HF_INPUT_DATASET_URL_COLUMN = os.getenv("HF_INPUT_DATASET_URL_COLUMN")
 HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
 HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
 INSTRUCTIONS = """
 # Pairwise Model Output Labeling
 Please compare the two model outputs shown below and select which one you think is better.
+- Choose "A is better" if the output from Model A (left) is superior
+- Choose "B is better" if the output from Model B (right) is superior
+- Choose "Tie" if you think they are equally good or bad
 - Choose "Can't choose" if you cannot make a determination
 """
 SAVE_EVERY_N_EXAMPLES = 5
 class PairwiseLabeler:
     def __init__(self):
+        self.current_index = defaultdict(int)
+        self.results = defaultdict(list)
         self.df = self.read_hf_dataset()
+        self.has_url_column = HF_INPUT_DATASET_URL_COLUMN and HF_INPUT_DATASET_URL_COLUMN in self.df.columns
     def __len__(self):
         return len(self.df)
                 HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
                 HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
             }
+            # Add URL column to sample data if specified
+            if HF_INPUT_DATASET_URL_COLUMN:
+                sample_data[HF_INPUT_DATASET_URL_COLUMN] = [f"https://example.com/sample_{i}" for i in range(SAVE_EVERY_N_EXAMPLES)]
             return pd.DataFrame(sample_data)
+    def get_current_pair(self, session_id):
+        if self.current_index[session_id] >= len(self.df):
+            if self.has_url_column:
+                return None, None, None, None
+            else:
+                return None, None, None
+        item = self.df.iloc[self.current_index[session_id]]
+        item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index[session_id]}")
         left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
         right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
+        if self.has_url_column:
+            url = item.get(HF_INPUT_DATASET_URL_COLUMN, "")
+            return item_id, left_text, right_text, url
+        else:
+            return item_id, left_text, right_text
+    def submit_judgment(self, item_id, left_text, right_text, choice, session_id):
         if item_id is None:
+            if self.has_url_column:
+                return item_id, left_text, right_text, None, self.current_index[session_id]
+            else:
+                return item_id, left_text, right_text, self.current_index[session_id]
+        # Get the current URL if available
+        current_url = None
+        if self.has_url_column:
+            current_url = self.df.iloc[self.current_index[session_id]].get(HF_INPUT_DATASET_URL_COLUMN, "")
         # Record the judgment
         result = {
             "item_id": item_id,
             "judgment": choice,
             "timestamp": datetime.datetime.now().isoformat(),
+            "labeler_id": session_id
         }
+        self.results[session_id].append(result)
         # Move to next item
+        self.current_index[session_id] += 1
         # Save results periodically
+        if len(self.results[session_id]) % SAVE_EVERY_N_EXAMPLES == 0:
+            self.save_results(session_id)
         # Get next pair
+        if self.has_url_column:
+            next_id, next_left, next_right, next_url = self.get_current_pair(session_id)
+            return next_id, next_left, next_right, next_url, self.current_index[session_id]
+        else:
+            next_id, next_left, next_right = self.get_current_pair(session_id)
+            return next_id, next_left, next_right, self.current_index[session_id]
+    def save_results(self, session_id):
+        if not self.results[session_id]:
             return
         try:
             # Convert results to dataset format
+            results_df = pd.DataFrame(self.results[session_id])
             results_df.to_json("temp.jsonl", orient="records", lines=True)
             # Push to Hugging Face Hub
             try:
+                num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR) if session_id in _.path])
             except Exception as e:
                 num_files = 0
+            # Use session_id in filename to avoid conflicts
+            filename = f"results_{session_id}_{num_files+1}.jsonl"
+            upload_file(
+                repo_id=HF_OUTPUT_DATASET,
+                repo_type="dataset",
+                path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, filename),
+                path_or_fileobj="temp.jsonl"
+            )
             os.remove("temp.jsonl")
+            # Clear saved results
+            self.results[session_id] = []
+            logging.info(f"Saved results for session {session_id} to {HF_OUTPUT_DATASET}/{filename}")
         except Exception as e:
             logging.error(f"Error saving results: {e}")
+            # Keep results in memory to try saving again later
 # Initialize the labeler
 labeler = PairwiseLabeler()
+# Create a unique session ID
+def create_new_session():
+    return str(uuid.uuid4())[:8]
 with gr.Blocks() as app:
+    # State for the session ID
+    session_id = gr.State(value=None)
+    # The actual interface components will be created here
     gr.Markdown(INSTRUCTIONS)
+    # URL display component - only shown if URL column is defined
+    url_display = None
+    if labeler.has_url_column:
+        url_display = gr.HTML(label="Reference URL")
+    session_id_display = gr.Textbox(label="Session Information", interactive=False)
     with gr.Row():
         with gr.Column():
             left_output = gr.Textbox(
+                label="Model A Output",
                 lines=10,
                 interactive=False
             )
         with gr.Column():
             right_output = gr.Textbox(
+                label="Model B Output",
                 lines=10,
                 interactive=False
             )
+    item_id = gr.Textbox(visible=False)
     with gr.Row():
         left_btn = gr.Button("⬅️ A is better", variant="primary")
         cant_choose_btn = gr.Button("🤔 Can't choose")
     current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
+                                  interactive=False,
+                                  label='sample_ind',
+                                  info=f"Samples labeled (out of {len(labeler)})",
+                                  show_label=False,
+                                  container=False,
+                                  scale=5)
+    # Initialize the session and get the first pair
+    def init_session():
+        new_session_id = create_new_session()
+        if labeler.has_url_column:
+            initial_id, initial_left, initial_right, initial_url = labeler.get_current_pair(new_session_id)
+            url_html = f'<a href="{initial_url}" target="_blank">{initial_url}</a>' if initial_url else ""
+            return (
+                new_session_id,                            # session_id state
+                f"Session ID: {new_session_id}",           # session_id_display
+                url_html,                                  # url_display
+                initial_left,                              # left_output
+                initial_right,                             # right_output
+                initial_id,                                # item_id
+                labeler.current_index[new_session_id]      # current_sample_sld
+            )
+        else:
+            initial_id, initial_left, initial_right = labeler.get_current_pair(new_session_id)
+            return (
+                new_session_id,                            # session_id state
+                f"Session ID: {new_session_id}",           # session_id_display
+                initial_left,                              # left_output
+                initial_right,                             # right_output
+                initial_id,                                # item_id
+                labeler.current_index[new_session_id]      # current_sample_sld
+            )
+    # Run the initialization when the app loads
+    if labeler.has_url_column:
+        app.load(
+            init_session,
+            inputs=None,
+            outputs=[session_id, session_id_display, url_display, left_output, right_output, item_id, current_sample_sld]
+        )
+    else:
+        app.load(
+            init_session,
+            inputs=None,
+            outputs=[session_id, session_id_display, left_output, right_output, item_id, current_sample_sld]
+        )
+    def judge_left(session_id, item_id, left_text, right_text):
+        return judge("A is better", session_id, item_id, left_text, right_text)
+    def judge_right(session_id, item_id, left_text, right_text):
+        return judge("B is better", session_id, item_id, left_text, right_text)
+    def judge_tie(session_id, item_id, left_text, right_text):
+        return judge("Tie", session_id, item_id, left_text, right_text)
+    def judge_cant_choose(session_id, item_id, left_text, right_text):
+        return judge("Can't choose", session_id, item_id, left_text, right_text)
+    def judge(choice, session_id, item_id, left_text, right_text):
+        if labeler.has_url_column:
+            new_id, new_left, new_right, new_url, new_index = labeler.submit_judgment(
+                item_id, left_text, right_text, choice, session_id
+            )
+            url_html = f'<a href="{new_url}" target="_blank">{new_url}</a>' if new_url else ""
+            return new_id, new_left, new_right, url_html, new_index
+        else:
+            new_id, new_left, new_right, new_index = labeler.submit_judgment(
+                item_id, left_text, right_text, choice, session_id
+            )
+            return new_id, new_left, new_right, new_index
+    if labeler.has_url_column:
+        left_btn.click(
+            judge_left,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
+        )
+        right_btn.click(
+            judge_right,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
+        )
+        tie_btn.click(
+            judge_tie,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
+        )
+        cant_choose_btn.click(
+            judge_cant_choose,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
+        )
+    else:
+        left_btn.click(
+            judge_left,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, current_sample_sld]
+        )
+        right_btn.click(
+            judge_right,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, current_sample_sld]
+        )
+        tie_btn.click(
+            judge_tie,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, current_sample_sld]
+        )
+        cant_choose_btn.click(
+            judge_cant_choose,
+            inputs=[session_id, item_id, left_output, right_output],
+            outputs=[item_id, left_output, right_output, current_sample_sld]
         )
 if __name__ == "__main__":
     app.launch()