Spaces:

orionweller
/

human-mlm-clm-predictor

Sleeping

App Files Files Community

orionweller commited on 23 days ago

Commit

9b671c4

verified ·

1 Parent(s): bbed6df

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -21

app.py CHANGED Viewed

@@ -69,6 +69,8 @@ def prepare_mlm_sample(text, mask_ratio=0.15):
     num_to_mask = max(1, min(8, int(len(maskable_indices) * mask_ratio)))
     # Randomly select indices to mask
     indices_to_mask = random.sample(maskable_indices, min(num_to_mask, len(maskable_indices)))
     # Create a copy of tokens to mask
     masked_tokens_list = tokens.copy()
@@ -323,6 +325,7 @@ def generate_new_sample(mask_ratio):
 def check_answer(user_input, task):
     """Check user answer based on current task."""
     if task == "mlm":
         return check_mlm_answer(user_input)
     else:  # NTP
@@ -373,7 +376,11 @@ with gr.Blocks(title="MLM and NTP Testing") as demo:
         new_button = gr.Button("New Sample")
         reset_button = gr.Button("Reset Stats")
-    with gr.Group() as mlm_group:
         mlm_instructions = gr.Markdown("""
         ### MLM Instructions
         1. For each [MASK] token, provide your guess for the original word.
@@ -381,28 +388,50 @@ with gr.Blocks(title="MLM and NTP Testing") as demo:
         3. Make sure you provide exactly the same number of answers as [MASK] tokens.
         **Example format:** `word1, word2, word3` or `word1,word2,word3`
-        """)
-        mlm_answer = gr.Textbox(
-            label="Your answers (comma-separated)",
-            placeholder="word1, word2, word3",
-            lines=1
-        )
-    with gr.Group(visible=False) as ntp_group:
-        ntp_answer = gr.Textbox(
-            label="Your Next Token Prediction",
-            placeholder="Predict the next token/word...",
             lines=1
         )
     with gr.Row():
-        check_button = gr.Button("Check Answer")
     result = gr.Textbox(label="Result", lines=6)
     # Set up event handlers
-    task_radio.change(switch_task, inputs=[task_radio], outputs=[mlm_group, ntp_group])
     # Update the sample text and also update the mask count
     def new_sample_with_count(mask_ratio_pct, task):
@@ -426,16 +455,23 @@ with gr.Blocks(title="MLM and NTP Testing") as demo:
     reset_button.click(reset_stats, inputs=None, outputs=[result])
     check_button.click(
-        check_answer,
-        inputs=[
-            gr.Textbox(value=lambda: mlm_answer.value if current_task == "mlm" else ntp_answer.value),
-            task_radio
-        ],
         outputs=[result]
     )
-    mlm_answer.submit(check_mlm_answer, inputs=[mlm_answer], outputs=[result])
-    ntp_answer.submit(check_ntp_answer, inputs=[ntp_answer], outputs=[result])
 demo.launch()

     num_to_mask = max(1, min(8, int(len(maskable_indices) * mask_ratio)))
     # Randomly select indices to mask
     indices_to_mask = random.sample(maskable_indices, min(num_to_mask, len(maskable_indices)))
+    # Sort indices to ensure they're in order
+    indices_to_mask.sort()
     # Create a copy of tokens to mask
     masked_tokens_list = tokens.copy()
 def check_answer(user_input, task):
     """Check user answer based on current task."""
+    # Make the current task visible in UI and more prominent
     if task == "mlm":
         return check_mlm_answer(user_input)
     else:  # NTP
         new_button = gr.Button("New Sample")
         reset_button = gr.Button("Reset Stats")
+    # Consolidated input area - only one visible at a time
+    input_area = gr.Group()
+    with input_area:
+        # Task-specific input instructions
         mlm_instructions = gr.Markdown("""
         ### MLM Instructions
         1. For each [MASK] token, provide your guess for the original word.
         3. Make sure you provide exactly the same number of answers as [MASK] tokens.
         **Example format:** `word1, word2, word3` or `word1,word2,word3`
+        """, visible=True)
+        ntp_instructions = gr.Markdown("""
+        ### NTP Instructions
+        Predict the next word or token that would follow the text.
+        Type a single word or token for each prediction.
+        """, visible=False)
+        # Unified input box
+        answer_input = gr.Textbox(
+            label="Your answer",
+            placeholder="For MLM: word1, word2, word3 | For NTP: single word",
             lines=1
         )
     with gr.Row():
+        check_button = gr.Button("Check Answer", variant="primary")
     result = gr.Textbox(label="Result", lines=6)
+    # Function to switch task type
+    def switch_task_unified(task):
+        if task == "mlm":
+            mask_text = f"**Number of [MASK] tokens to guess: {len(masked_tokens)}**"
+            return (
+                gr.update(visible=True),  # mlm_instructions
+                gr.update(visible=False), # ntp_instructions
+                gr.update(placeholder="comma-separated answers (e.g., word1, word2, word3)"),
+                mask_text
+            )
+        else:  # ntp
+            return (
+                gr.update(visible=False), # mlm_instructions
+                gr.update(visible=True),  # ntp_instructions
+                gr.update(placeholder="Type the next word/token you predict"),
+                "**Next Token Prediction mode - guess one token at a time**"
+            )
     # Set up event handlers
+    task_radio.change(
+        switch_task_unified,
+        inputs=[task_radio],
+        outputs=[mlm_instructions, ntp_instructions, answer_input, mask_count]
+    )
     # Update the sample text and also update the mask count
     def new_sample_with_count(mask_ratio_pct, task):
     reset_button.click(reset_stats, inputs=None, outputs=[result])
+    # Unified check answer function
+    def unified_check_answer(user_input, task):
+        if task == "mlm":
+            return check_mlm_answer(user_input)
+        else:  # ntp
+            return check_ntp_answer(user_input)
     check_button.click(
+        unified_check_answer,
+        inputs=[answer_input, task_radio],
         outputs=[result]
     )
+    answer_input.submit(
+        unified_check_answer,
+        inputs=[answer_input, task_radio],
+        outputs=[result]
+    )
 demo.launch()