luulinh90s commited on
Commit
9914f07
·
1 Parent(s): 4ef1c1d
app.py CHANGED
@@ -83,7 +83,7 @@ def save_session_data_to_hf(session_id, data):
83
  f.write(json_data)
84
 
85
  api = HfApi()
86
- repo_path = "session_data_foward_simulation"
87
 
88
  api.upload_file(
89
  path_or_fileobj=temp_file_path,
@@ -115,8 +115,8 @@ def load_samples():
115
  def select_balanced_samples(samples):
116
  try:
117
  # Separate samples into two groups
118
- tp_fp_samples = [s for s in samples if s['category'] in ['TP', 'FP']]
119
- tn_fn_samples = [s for s in samples if s['category'] in ['TN', 'FN']]
120
 
121
  # Check if we have enough samples in each group
122
  if len(tp_fp_samples) < 5 or len(tn_fn_samples) < 5:
@@ -323,11 +323,12 @@ def completed(session_id):
323
  logger.info(f"ground_truth_key: {ground_truth_key}")
324
 
325
  if ground_truth_key in ground_truth:
326
- # TODO: Important Note ->
327
- # Using model prediction as we are doing forward simulation
328
- # Please use ground_truth[ground_truth_key]['answer'].upper() if running verification task
329
  model_prediction = ground_truth[ground_truth_key]['prediction'].upper()
330
- if user_prediction.upper() == model_prediction:
 
 
 
 
331
  correct_predictions += 1
332
 
333
  if user_prediction.upper() == "TRUE":
 
83
  f.write(json_data)
84
 
85
  api = HfApi()
86
+ repo_path = "session_data_debugging"
87
 
88
  api.upload_file(
89
  path_or_fileobj=temp_file_path,
 
115
  def select_balanced_samples(samples):
116
  try:
117
  # Separate samples into two groups
118
+ tp_fp_samples = [s for s in samples if s['category'] in ['TP', 'TN']]
119
+ tn_fn_samples = [s for s in samples if s['category'] in ['FP', 'FN']]
120
 
121
  # Check if we have enough samples in each group
122
  if len(tp_fp_samples) < 5 or len(tn_fn_samples) < 5:
 
323
  logger.info(f"ground_truth_key: {ground_truth_key}")
324
 
325
  if ground_truth_key in ground_truth:
 
 
 
326
  model_prediction = ground_truth[ground_truth_key]['prediction'].upper()
327
+ ground_truth_label = ground_truth[ground_truth_key]['answer'].upper()
328
+
329
+ correctness = "TRUE" if model_prediction.upper() == ground_truth_label.upper() else "FALSE"
330
+
331
+ if user_prediction.upper() == correctness:
332
  correct_predictions += 1
333
 
334
  if user_prediction.upper() == "TRUE":
templates/attribution.html CHANGED
@@ -159,7 +159,7 @@
159
  <li>Green highlights (cells): Pinpoint the exact information (opposing team's score of 0) that directly answer the question.</li>
160
  </ul>
161
  <p>
162
- During the experiment, you will use explanations to simulate model prediction. Your task will be to look at the provided explanations and simulate the model prediction on the Statement (CORRECT or WRONG).
163
  </p>
164
  <button onclick="location.href='{{ url_for('index') }}'">Proceed to Experiment</button>
165
  </div>
 
159
  <li>Green highlights (cells): Pinpoint the exact information (opposing team's score of 0) that directly answer the question.</li>
160
  </ul>
161
  <p>
162
+ During the experiment, you will use explanations to debug model prediction. Your task will be to look at the provided explanations and determine if model will output the wrong prediction.
163
  </p>
164
  <button onclick="location.href='{{ url_for('index') }}'">Proceed to Experiment</button>
165
  </div>
templates/experiment.html CHANGED
@@ -118,22 +118,22 @@
118
  </div>
119
  <div class="task-description">
120
  <p><strong>Task:</strong> {{ statement | safe }}</p>
121
- <p class="highlight">Based on the explanation below, will the AI model predict the statement as correct or wrong?</p>
122
  </div>
123
  <div class="visualization-container">
124
  <iframe src="{{ visualization }}"></iframe>
125
  </div>
126
  <div class="bottom-question">
127
- <h1>Will the model predict the statement as Correct or Wrong based on the provided explanation?</h1>
128
  </div>
129
  <div class="buttons">
130
  <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
131
  <input type="hidden" name="session_id" value="{{ session_id }}">
132
- <button type="submit" name="prediction" value="TRUE"><h1>Model predicts: Correct</h1></button>
133
  </form>
134
  <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
135
  <input type="hidden" name="session_id" value="{{ session_id }}">
136
- <button type="submit" name="prediction" value="FALSE"><h1>Model predicts: Wrong</h1></button>
137
  </form>
138
  </div>
139
  </div>
 
118
  </div>
119
  <div class="task-description">
120
  <p><strong>Task:</strong> {{ statement | safe }}</p>
121
+ <p class="highlight">You will not be given the model prediction. Based on the explanation below, will the AI model output a wrong prediction?</p>
122
  </div>
123
  <div class="visualization-container">
124
  <iframe src="{{ visualization }}"></iframe>
125
  </div>
126
  <div class="bottom-question">
127
+ <h1>Will the model output a wrong prediction based on the provided explanation?</h1>
128
  </div>
129
  <div class="buttons">
130
  <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
131
  <input type="hidden" name="session_id" value="{{ session_id }}">
132
+ <button type="submit" name="prediction" value="TRUE"><h1>Model will be Correct</h1></button>
133
  </form>
134
  <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
135
  <input type="hidden" name="session_id" value="{{ session_id }}">
136
+ <button type="submit" name="prediction" value="FALSE"><h1>Model will be Wrong</h1></button>
137
  </form>
138
  </div>
139
  </div>
templates/introduction.html CHANGED
@@ -201,7 +201,7 @@
201
 
202
  <div class="outro">
203
  <h2>Your Task</h2>
204
- <p>Your task in this experiment is to evaluate methods that explain how a TableQA works. You will be given an explanation and asked to simulate what the TableQA model will predict (Correct or Wrong).</p>
205
  </div>
206
 
207
  <div class="button-container">
 
201
 
202
  <div class="outro">
203
  <h2>Your Task</h2>
204
+ <p>Your task in this experiment is to evaluate methods that explain how a TableQA works. You will be given an explanation and asked to detect AI errors.</p>
205
  </div>
206
 
207
  <div class="button-container">