Tabular-LLM-Study-Debugging

Sleeping

luulinh90s commited on Sep 17, 2024

Commit

9914f07

1 Parent(s): 4ef1c1d

update

Files changed (4) hide show

app.py CHANGED Viewed

@@ -83,7 +83,7 @@ def save_session_data_to_hf(session_id, data):
             f.write(json_data)
         api = HfApi()
-        repo_path = "session_data_foward_simulation"
         api.upload_file(
             path_or_fileobj=temp_file_path,
@@ -115,8 +115,8 @@ def load_samples():
 def select_balanced_samples(samples):
     try:
         # Separate samples into two groups
-        tp_fp_samples = [s for s in samples if s['category'] in ['TP', 'FP']]
-        tn_fn_samples = [s for s in samples if s['category'] in ['TN', 'FN']]
         # Check if we have enough samples in each group
         if len(tp_fp_samples) < 5 or len(tn_fn_samples) < 5:
@@ -323,11 +323,12 @@ def completed(session_id):
             logger.info(f"ground_truth_key: {ground_truth_key}")
             if ground_truth_key in ground_truth:
-                # TODO: Important Note ->
-                # Using model prediction as we are doing forward simulation
-                # Please use ground_truth[ground_truth_key]['answer'].upper() if running verification task
                 model_prediction = ground_truth[ground_truth_key]['prediction'].upper()
-                if user_prediction.upper() == model_prediction:
                     correct_predictions += 1
                 if user_prediction.upper() == "TRUE":

             f.write(json_data)
         api = HfApi()
+        repo_path = "session_data_debugging"
         api.upload_file(
             path_or_fileobj=temp_file_path,
 def select_balanced_samples(samples):
     try:
         # Separate samples into two groups
+        tp_fp_samples = [s for s in samples if s['category'] in ['TP', 'TN']]
+        tn_fn_samples = [s for s in samples if s['category'] in ['FP', 'FN']]
         # Check if we have enough samples in each group
         if len(tp_fp_samples) < 5 or len(tn_fn_samples) < 5:
             logger.info(f"ground_truth_key: {ground_truth_key}")
             if ground_truth_key in ground_truth:
                 model_prediction = ground_truth[ground_truth_key]['prediction'].upper()
+                ground_truth_label = ground_truth[ground_truth_key]['answer'].upper()
+                correctness = "TRUE" if model_prediction.upper() == ground_truth_label.upper() else "FALSE"
+                if user_prediction.upper() == correctness:
                     correct_predictions += 1
                 if user_prediction.upper() == "TRUE":

templates/attribution.html CHANGED Viewed

@@ -159,7 +159,7 @@
             <li>Green highlights (cells): Pinpoint the exact information (opposing team's score of 0) that directly answer the question.</li>
         </ul>
         <p>
-            During the experiment, you will use explanations to simulate model prediction. Your task will be to look at the provided explanations and simulate the model prediction on the Statement (CORRECT or WRONG).
         </p>
         <button onclick="location.href='{{ url_for('index') }}'">Proceed to Experiment</button>
     </div>

             <li>Green highlights (cells): Pinpoint the exact information (opposing team's score of 0) that directly answer the question.</li>
         </ul>
         <p>
+            During the experiment, you will use explanations to debug model prediction. Your task will be to look at the provided explanations and determine if model will output the wrong prediction.
         </p>
         <button onclick="location.href='{{ url_for('index') }}'">Proceed to Experiment</button>
     </div>

templates/experiment.html CHANGED Viewed

@@ -118,22 +118,22 @@
         </div>
         <div class="task-description">
             <p><strong>Task:</strong> {{ statement | safe }}</p>
-            <p class="highlight">Based on the explanation below, will the AI model predict the statement as correct or wrong?</p>
         </div>
         <div class="visualization-container">
             <iframe src="{{ visualization }}"></iframe>
         </div>
         <div class="bottom-question">
-            <h1>Will the model predict the statement as Correct or Wrong based on the provided explanation?</h1>
         </div>
         <div class="buttons">
             <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
                 <input type="hidden" name="session_id" value="{{ session_id }}">
-                <button type="submit" name="prediction" value="TRUE"><h1>Model predicts: Correct</h1></button>
             </form>
             <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
                 <input type="hidden" name="session_id" value="{{ session_id }}">
-                <button type="submit" name="prediction" value="FALSE"><h1>Model predicts: Wrong</h1></button>
             </form>
         </div>
     </div>

         </div>
         <div class="task-description">
             <p><strong>Task:</strong> {{ statement | safe }}</p>
+            <p class="highlight">You will not be given the model prediction. Based on the explanation below, will the AI model output a wrong prediction?</p>
         </div>
         <div class="visualization-container">
             <iframe src="{{ visualization }}"></iframe>
         </div>
         <div class="bottom-question">
+            <h1>Will the model output a wrong prediction based on the provided explanation?</h1>
         </div>
         <div class="buttons">
             <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
                 <input type="hidden" name="session_id" value="{{ session_id }}">
+                <button type="submit" name="prediction" value="TRUE"><h1>Model will be Correct</h1></button>
             </form>
             <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
                 <input type="hidden" name="session_id" value="{{ session_id }}">
+                <button type="submit" name="prediction" value="FALSE"><h1>Model will be Wrong</h1></button>
             </form>
         </div>
     </div>

templates/introduction.html CHANGED Viewed

@@ -201,7 +201,7 @@
         <div class="outro">
             <h2>Your Task</h2>
-            <p>Your task in this experiment is to evaluate methods that explain how a TableQA works. You will be given an explanation and asked to simulate what the TableQA model will predict (Correct or Wrong).</p>
         </div>
         <div class="button-container">

         <div class="outro">
             <h2>Your Task</h2>
+            <p>Your task in this experiment is to evaluate methods that explain how a TableQA works. You will be given an explanation and asked to detect AI errors.</p>
         </div>
         <div class="button-container">