Commit
·
9914f07
1
Parent(s):
4ef1c1d
update
Browse files- app.py +8 -7
- templates/attribution.html +1 -1
- templates/experiment.html +4 -4
- templates/introduction.html +1 -1
app.py
CHANGED
@@ -83,7 +83,7 @@ def save_session_data_to_hf(session_id, data):
|
|
83 |
f.write(json_data)
|
84 |
|
85 |
api = HfApi()
|
86 |
-
repo_path = "
|
87 |
|
88 |
api.upload_file(
|
89 |
path_or_fileobj=temp_file_path,
|
@@ -115,8 +115,8 @@ def load_samples():
|
|
115 |
def select_balanced_samples(samples):
|
116 |
try:
|
117 |
# Separate samples into two groups
|
118 |
-
tp_fp_samples = [s for s in samples if s['category'] in ['TP', '
|
119 |
-
tn_fn_samples = [s for s in samples if s['category'] in ['
|
120 |
|
121 |
# Check if we have enough samples in each group
|
122 |
if len(tp_fp_samples) < 5 or len(tn_fn_samples) < 5:
|
@@ -323,11 +323,12 @@ def completed(session_id):
|
|
323 |
logger.info(f"ground_truth_key: {ground_truth_key}")
|
324 |
|
325 |
if ground_truth_key in ground_truth:
|
326 |
-
# TODO: Important Note ->
|
327 |
-
# Using model prediction as we are doing forward simulation
|
328 |
-
# Please use ground_truth[ground_truth_key]['answer'].upper() if running verification task
|
329 |
model_prediction = ground_truth[ground_truth_key]['prediction'].upper()
|
330 |
-
|
|
|
|
|
|
|
|
|
331 |
correct_predictions += 1
|
332 |
|
333 |
if user_prediction.upper() == "TRUE":
|
|
|
83 |
f.write(json_data)
|
84 |
|
85 |
api = HfApi()
|
86 |
+
repo_path = "session_data_debugging"
|
87 |
|
88 |
api.upload_file(
|
89 |
path_or_fileobj=temp_file_path,
|
|
|
115 |
def select_balanced_samples(samples):
|
116 |
try:
|
117 |
# Separate samples into two groups
|
118 |
+
tp_fp_samples = [s for s in samples if s['category'] in ['TP', 'TN']]
|
119 |
+
tn_fn_samples = [s for s in samples if s['category'] in ['FP', 'FN']]
|
120 |
|
121 |
# Check if we have enough samples in each group
|
122 |
if len(tp_fp_samples) < 5 or len(tn_fn_samples) < 5:
|
|
|
323 |
logger.info(f"ground_truth_key: {ground_truth_key}")
|
324 |
|
325 |
if ground_truth_key in ground_truth:
|
|
|
|
|
|
|
326 |
model_prediction = ground_truth[ground_truth_key]['prediction'].upper()
|
327 |
+
ground_truth_label = ground_truth[ground_truth_key]['answer'].upper()
|
328 |
+
|
329 |
+
correctness = "TRUE" if model_prediction.upper() == ground_truth_label.upper() else "FALSE"
|
330 |
+
|
331 |
+
if user_prediction.upper() == correctness:
|
332 |
correct_predictions += 1
|
333 |
|
334 |
if user_prediction.upper() == "TRUE":
|
templates/attribution.html
CHANGED
@@ -159,7 +159,7 @@
|
|
159 |
<li>Green highlights (cells): Pinpoint the exact information (opposing team's score of 0) that directly answer the question.</li>
|
160 |
</ul>
|
161 |
<p>
|
162 |
-
During the experiment, you will use explanations to
|
163 |
</p>
|
164 |
<button onclick="location.href='{{ url_for('index') }}'">Proceed to Experiment</button>
|
165 |
</div>
|
|
|
159 |
<li>Green highlights (cells): Pinpoint the exact information (opposing team's score of 0) that directly answer the question.</li>
|
160 |
</ul>
|
161 |
<p>
|
162 |
+
During the experiment, you will use explanations to debug model prediction. Your task will be to look at the provided explanations and determine if model will output the wrong prediction.
|
163 |
</p>
|
164 |
<button onclick="location.href='{{ url_for('index') }}'">Proceed to Experiment</button>
|
165 |
</div>
|
templates/experiment.html
CHANGED
@@ -118,22 +118,22 @@
|
|
118 |
</div>
|
119 |
<div class="task-description">
|
120 |
<p><strong>Task:</strong> {{ statement | safe }}</p>
|
121 |
-
<p class="highlight">Based on the explanation below, will the AI model
|
122 |
</div>
|
123 |
<div class="visualization-container">
|
124 |
<iframe src="{{ visualization }}"></iframe>
|
125 |
</div>
|
126 |
<div class="bottom-question">
|
127 |
-
<h1>Will the model
|
128 |
</div>
|
129 |
<div class="buttons">
|
130 |
<form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
|
131 |
<input type="hidden" name="session_id" value="{{ session_id }}">
|
132 |
-
<button type="submit" name="prediction" value="TRUE"><h1>Model
|
133 |
</form>
|
134 |
<form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
|
135 |
<input type="hidden" name="session_id" value="{{ session_id }}">
|
136 |
-
<button type="submit" name="prediction" value="FALSE"><h1>Model
|
137 |
</form>
|
138 |
</div>
|
139 |
</div>
|
|
|
118 |
</div>
|
119 |
<div class="task-description">
|
120 |
<p><strong>Task:</strong> {{ statement | safe }}</p>
|
121 |
+
<p class="highlight">You will not be given the model prediction. Based on the explanation below, will the AI model output a wrong prediction?</p>
|
122 |
</div>
|
123 |
<div class="visualization-container">
|
124 |
<iframe src="{{ visualization }}"></iframe>
|
125 |
</div>
|
126 |
<div class="bottom-question">
|
127 |
+
<h1>Will the model output a wrong prediction based on the provided explanation?</h1>
|
128 |
</div>
|
129 |
<div class="buttons">
|
130 |
<form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
|
131 |
<input type="hidden" name="session_id" value="{{ session_id }}">
|
132 |
+
<button type="submit" name="prediction" value="TRUE"><h1>Model will be Correct</h1></button>
|
133 |
</form>
|
134 |
<form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
|
135 |
<input type="hidden" name="session_id" value="{{ session_id }}">
|
136 |
+
<button type="submit" name="prediction" value="FALSE"><h1>Model will be Wrong</h1></button>
|
137 |
</form>
|
138 |
</div>
|
139 |
</div>
|
templates/introduction.html
CHANGED
@@ -201,7 +201,7 @@
|
|
201 |
|
202 |
<div class="outro">
|
203 |
<h2>Your Task</h2>
|
204 |
-
<p>Your task in this experiment is to evaluate methods that explain how a TableQA works. You will be given an explanation and asked to
|
205 |
</div>
|
206 |
|
207 |
<div class="button-container">
|
|
|
201 |
|
202 |
<div class="outro">
|
203 |
<h2>Your Task</h2>
|
204 |
+
<p>Your task in this experiment is to evaluate methods that explain how a TableQA works. You will be given an explanation and asked to detect AI errors.</p>
|
205 |
</div>
|
206 |
|
207 |
<div class="button-container">
|