luulinh90s commited on
Commit
dd66345
·
1 Parent(s): bf79e80
Files changed (1) hide show
  1. app.py +183 -58
app.py CHANGED
@@ -5,7 +5,17 @@ import os
5
  import string
6
  import logging
7
  from datetime import datetime
8
- from huggingface_hub import login, HfApi, hf_hub_download
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO,
@@ -16,35 +26,64 @@ logging.basicConfig(level=logging.INFO,
16
  ])
17
  logger = logging.getLogger(__name__)
18
 
19
- # Use the Hugging Face token from environment variables
20
- hf_token = os.environ.get("HF_TOKEN")
21
- if hf_token:
22
- login(token=hf_token)
23
- else:
24
- logger.error("HF_TOKEN not found in environment variables")
25
-
26
  app = Flask(__name__)
27
  app.config['SECRET_KEY'] = 'supersecretkey' # Change this to a random secret key
28
 
29
  # Directories for visualizations
30
- VISUALIZATION_DIRS = {
31
- "No-XAI": "htmls_NO_XAI",
32
- "Dater": "htmls_DATER",
33
- "Chain-of-Table": "htmls_COT",
34
- "Plan-of-SQLs": "htmls_POS"
 
 
 
 
 
 
 
35
  }
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- def save_session_data(session_id, data):
 
 
 
 
 
39
  try:
40
- file_name = f'{session_id}_session.json'
 
 
 
 
 
 
 
41
  file_name = "".join(c for c in file_name if c.isalnum() or c in ['_', '-', '.'])
42
 
 
43
  json_data = json.dumps(data, indent=4)
 
 
44
  temp_file_path = f"/tmp/{file_name}"
45
  with open(temp_file_path, 'w') as f:
46
  f.write(json_data)
47
 
 
48
  api = HfApi()
49
  api.upload_file(
50
  path_or_fileobj=temp_file_path,
@@ -53,62 +92,93 @@ def save_session_data(session_id, data):
53
  repo_type="space",
54
  )
55
 
 
56
  os.remove(temp_file_path)
57
- logger.info(f"Session data saved for session: {session_id} in Hugging Face Data Space")
 
58
  except Exception as e:
59
- logger.exception(f"Error saving session data for session: {session_id}: {e}")
60
 
 
61
 
62
- def load_session_data(session_id):
63
  try:
 
64
  api = HfApi()
65
- file_name = f'{session_id}_session.json'
 
 
 
 
 
 
 
 
 
 
 
 
66
  file_path = hf_hub_download(repo_id="luulinh90s/Tabular-LLM-Study-Data", repo_type="space",
67
- filename=f"session_data_pref/{file_name}")
68
 
69
  with open(file_path, 'r') as f:
70
  data = json.load(f)
71
 
72
- logger.info(f"Session data loaded for session: {session_id} from Hugging Face Data Space")
73
  return data
74
  except Exception as e:
75
- logger.exception(f"Error loading session data for session: {session_id}: {e}")
76
  return None
77
 
78
-
79
  def load_samples(methods):
80
  logger.info(f"Loading samples for methods: {methods}")
81
- samples = set()
82
  categories = ["TP", "TN", "FP", "FN"]
83
 
 
 
84
  for category in categories:
85
- files_a = set(os.listdir(f'{VISUALIZATION_DIRS[methods[0]]}/{category}'))
86
- files_b = set(os.listdir(f'{VISUALIZATION_DIRS[methods[1]]}/{category}'))
 
 
 
 
87
  matching_files = files_a & files_b
 
88
  for file in matching_files:
89
  samples.add((category, file))
90
 
 
91
  samples = [{'category': category, 'file': file} for category, file in samples]
 
92
  logger.info(f"Loaded {len(samples)} unique samples across all categories")
93
  return samples
94
 
95
-
96
  def select_balanced_samples(samples):
97
  try:
 
98
  unique_samples = list({(s['category'], s['file']) for s in samples})
 
99
  if len(unique_samples) < 10:
100
  logger.warning(f"Not enough unique samples. Only {len(unique_samples)} available.")
101
  selected_samples = unique_samples
102
  else:
103
  selected_samples = random.sample(unique_samples, 10)
104
 
 
 
 
105
  selected_samples = [{'category': category, 'file': file} for category, file in selected_samples]
 
106
  logger.info(f"Selected {len(selected_samples)} unique samples")
107
  return selected_samples
108
  except Exception as e:
109
  logger.exception("Error selecting balanced samples")
110
  return []
111
 
 
 
112
 
113
  @app.route('/', methods=['GET', 'POST'])
114
  def index():
@@ -123,7 +193,7 @@ def index():
123
  return "Please fill in all fields and select exactly two methods.", 400
124
 
125
  try:
126
- seed_int = int(seed)
127
  random.seed(seed_int)
128
  all_samples = load_samples(methods)
129
  selected_samples = select_balanced_samples(all_samples)
@@ -134,11 +204,11 @@ def index():
134
  return "No samples were selected", 500
135
 
136
  start_time = datetime.now().isoformat()
137
- session_id = f"{username}_{start_time}"
138
  session_data = {
139
  'session_id': session_id,
140
  'username': username,
141
- 'seed': seed,
142
  'methods': methods,
143
  'selected_samples': selected_samples,
144
  'current_index': 0,
@@ -154,41 +224,68 @@ def index():
154
  return "An error occurred", 500
155
  return render_template('index.html')
156
 
157
-
158
- @app.route('/experiment/<session_id>', methods=['GET', 'POST'])
159
- def experiment(session_id):
160
  try:
161
- session_data = load_session_data(session_id)
162
  if not session_data:
163
- logger.error(f"No session data found for session: {session_id}")
164
  return redirect(url_for('index'))
165
 
 
 
 
166
  selected_samples = session_data['selected_samples']
167
  methods = session_data['methods']
168
  current_index = session_data['current_index']
169
 
 
 
170
  if current_index >= len(selected_samples):
171
- return redirect(url_for('completed', session_id=session_id))
172
 
173
  sample = selected_samples[current_index]
 
 
174
  method_a, method_b = methods
175
 
176
- file_a = os.path.join(VISUALIZATION_DIRS[method_a], sample['category'], sample['file'])
177
- file_b = os.path.join(VISUALIZATION_DIRS[method_b], sample['category'], sample['file'])
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- if not os.path.exists(file_a) or not os.path.exists(file_b):
 
 
 
 
 
 
 
 
 
180
  logger.error(f"Missing files for comparison at index {current_index}")
181
  session_data['current_index'] += 1
182
- save_session_data(session_id, session_data)
183
- return redirect(url_for('experiment', session_id=session_id))
184
 
185
  visualization_a = url_for('send_visualization', filename=file_a)
186
  visualization_b = url_for('send_visualization', filename=file_b)
187
 
188
  statement = """
189
- Please note that in select row function, starting index is 0 for Chain-of-Table 1 for Dater and Index * represents the selection of the whole Table.
190
- You are now given two explanations that describe the reasoning process of the Table QA model.
191
- Please analyze the explanations and determine which one provides a clearer and more accurate reasoning process.
192
  """
193
 
194
  return render_template('experiment.html',
@@ -198,51 +295,76 @@ def experiment(session_id):
198
  visualization_b=visualization_b,
199
  method_a=method_a,
200
  method_b=method_b,
201
- session_id=session_id)
202
  except Exception as e:
203
  logger.exception(f"An error occurred in the experiment route: {e}")
204
  return "An error occurred", 500
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  @app.route('/feedback', methods=['POST'])
208
  def feedback():
209
  try:
210
- session_id = request.form['session_id']
211
  feedback = request.form['feedback']
212
 
213
- session_data = load_session_data(session_id)
214
  if not session_data:
215
- logger.error(f"No session data found for session: {session_id}")
216
  return redirect(url_for('index'))
217
 
 
218
  session_data['responses'].append({
219
  'sample_id': session_data['current_index'],
220
  'preferred_method': feedback,
221
  'timestamp': datetime.now().isoformat()
222
  })
223
 
 
224
  session_data['current_index'] += 1
225
- save_session_data(session_id, session_data)
226
- logger.info(f"Feedback saved for session {session_id}, sample {session_data['current_index'] - 1}")
 
 
227
 
228
  if session_data['current_index'] >= len(session_data['selected_samples']):
229
- return redirect(url_for('completed', session_id=session_id))
230
 
231
- return redirect(url_for('experiment', session_id=session_id))
232
  except Exception as e:
233
  logger.exception(f"Error in feedback route: {e}")
234
  return "An error occurred", 500
235
 
236
 
237
- @app.route('/completed/<session_id>')
238
- def completed(session_id):
239
  try:
240
- session_data = load_session_data(session_id)
241
  if not session_data:
242
- logger.error(f"No session data found for session: {session_id}")
243
  return redirect(url_for('index'))
244
 
245
  session_data['end_time'] = datetime.now().isoformat()
 
246
  methods = session_data['methods']
247
  responses = session_data['responses']
248
 
@@ -257,7 +379,9 @@ def completed(session_id):
257
  preferences[method] = round((preferences[method] / total_responses) * 100, 2)
258
 
259
  session_data['preferences'] = preferences
260
- save_session_data(session_id, session_data)
 
 
261
 
262
  return render_template('completed.html', preferences=preferences)
263
  except Exception as e:
@@ -268,6 +392,7 @@ def completed(session_id):
268
  @app.route('/visualizations/<path:filename>')
269
  def send_visualization(filename):
270
  logger.info(f"Attempting to serve file: {filename}")
 
271
  base_dir = os.getcwd()
272
  file_path = os.path.normpath(os.path.join(base_dir, filename))
273
  if not file_path.startswith(base_dir):
@@ -283,5 +408,5 @@ def send_visualization(filename):
283
 
284
 
285
  if __name__ == "__main__":
286
- os.makedirs('session_data', exist_ok=True)
287
  app.run(host="0.0.0.0", port=7860, debug=True)
 
5
  import string
6
  import logging
7
  from datetime import datetime
8
+
9
+
10
+ import os
11
+ from huggingface_hub import login
12
+
13
+ # Use the Hugging Face token from environment variables
14
+ hf_token = os.environ.get("HF_TOKEN")
15
+ if hf_token:
16
+ login(token=hf_token)
17
+ else:
18
+ logger.error("HF_TOKEN not found in environment variables")
19
 
20
  # Set up logging
21
  logging.basicConfig(level=logging.INFO,
 
26
  ])
27
  logger = logging.getLogger(__name__)
28
 
 
 
 
 
 
 
 
29
  app = Flask(__name__)
30
  app.config['SECRET_KEY'] = 'supersecretkey' # Change this to a random secret key
31
 
32
  # Directories for visualizations
33
+ VISUALIZATION_DIRS_PLAN_OF_SQLS = {
34
+ "TP": "htmls_POS/TP",
35
+ "TN": "htmls_POS/TN",
36
+ "FP": "htmls_POS/FP",
37
+ "FN": "htmls_POS/FN"
38
+ }
39
+
40
+ VISUALIZATION_DIRS_CHAIN_OF_TABLE = {
41
+ "TP": "htmls_COT/TP",
42
+ "TN": "htmls_COT/TN",
43
+ "FP": "htmls_COT/FP",
44
+ "FN": "htmls_COT/FN"
45
  }
46
 
47
+ VISUALIZATION_DIRS_NO_XAI = {
48
+ "TP": "htmls_NO_XAI/TP",
49
+ "TN": "htmls_NO_XAI/TN",
50
+ "FP": "htmls_NO_XAI/FP",
51
+ "FN": "htmls_NO_XAI/FN"
52
+ }
53
+
54
+ VISUALIZATION_DIRS_DATER = {
55
+ "TP": "htmls_DATER/TP",
56
+ "TN": "htmls_DATER/TN",
57
+ "FP": "htmls_DATER/FP",
58
+ "FN": "htmls_DATER/FN"
59
+ }
60
 
61
+ import json
62
+ import os
63
+ from datetime import datetime
64
+
65
+ from huggingface_hub import HfApi
66
+ def save_session_data(username, data):
67
  try:
68
+ # Extract seed and start_time from the data
69
+ seed = data.get('seed', 'unknown')
70
+ start_time = data.get('start_time', datetime.now().isoformat())
71
+
72
+ # Create a filename with username, seed, and start_time
73
+ file_name = f'{username}_seed{seed}_{start_time}_session.json'
74
+
75
+ # Remove any characters that might not be safe for filenames
76
  file_name = "".join(c for c in file_name if c.isalnum() or c in ['_', '-', '.'])
77
 
78
+ # Convert data to JSON string
79
  json_data = json.dumps(data, indent=4)
80
+
81
+ # Create a temporary file
82
  temp_file_path = f"/tmp/{file_name}"
83
  with open(temp_file_path, 'w') as f:
84
  f.write(json_data)
85
 
86
+ # Upload the file to the Hugging Face Space, overwriting if it exists
87
  api = HfApi()
88
  api.upload_file(
89
  path_or_fileobj=temp_file_path,
 
92
  repo_type="space",
93
  )
94
 
95
+ # Remove the temporary file
96
  os.remove(temp_file_path)
97
+
98
+ logger.info(f"Session data saved for user {username} with seed {seed} and start time {start_time} in Hugging Face Data Space")
99
  except Exception as e:
100
+ logger.exception(f"Error saving session data for user {username}: {e}")
101
 
102
+ from huggingface_hub import hf_hub_download, HfApi
103
 
104
+ def load_session_data(username):
105
  try:
106
+ # List all files in the repo
107
  api = HfApi()
108
+ files = api.list_repo_files(repo_id="luulinh90s/Tabular-LLM-Study-Data", repo_type="space")
109
+
110
+ # Filter files for the user
111
+ user_files = [f for f in files if f.startswith(f'session_data_pref/{username}_')]
112
+
113
+ if not user_files:
114
+ logger.warning(f"No session data found for user {username}")
115
+ return None
116
+
117
+ # Get the most recent file
118
+ latest_file = sorted(user_files, reverse=True)[0]
119
+
120
+ # Download the file from the data storage Space
121
  file_path = hf_hub_download(repo_id="luulinh90s/Tabular-LLM-Study-Data", repo_type="space",
122
+ filename=latest_file)
123
 
124
  with open(file_path, 'r') as f:
125
  data = json.load(f)
126
 
127
+ logger.info(f"Session data loaded for user {username} from Hugging Face Data Space")
128
  return data
129
  except Exception as e:
130
+ logger.exception(f"Error loading session data for user {username}: {e}")
131
  return None
132
 
 
133
  def load_samples(methods):
134
  logger.info(f"Loading samples for methods: {methods}")
135
+ samples = set() # Use a set to avoid duplicates
136
  categories = ["TP", "TN", "FP", "FN"]
137
 
138
+ method_dirs = [get_method_dir(method) for method in methods]
139
+
140
  for category in categories:
141
+ dir_a = f'htmls_{method_dirs[0].upper()}/{category}'
142
+ dir_b = f'htmls_{method_dirs[1].upper()}/{category}'
143
+
144
+ files_a = set(os.listdir(dir_a))
145
+ files_b = set(os.listdir(dir_b))
146
+
147
  matching_files = files_a & files_b
148
+
149
  for file in matching_files:
150
  samples.add((category, file))
151
 
152
+ # Convert set of tuples back to list of dictionaries
153
  samples = [{'category': category, 'file': file} for category, file in samples]
154
+
155
  logger.info(f"Loaded {len(samples)} unique samples across all categories")
156
  return samples
157
 
 
158
  def select_balanced_samples(samples):
159
  try:
160
+ # Ensure we have at least 10 unique samples
161
  unique_samples = list({(s['category'], s['file']) for s in samples})
162
+
163
  if len(unique_samples) < 10:
164
  logger.warning(f"Not enough unique samples. Only {len(unique_samples)} available.")
165
  selected_samples = unique_samples
166
  else:
167
  selected_samples = random.sample(unique_samples, 10)
168
 
169
+ logger.info(f"Unique sampled samples:\n{selected_samples}")
170
+
171
+ # Convert back to dictionary format
172
  selected_samples = [{'category': category, 'file': file} for category, file in selected_samples]
173
+
174
  logger.info(f"Selected {len(selected_samples)} unique samples")
175
  return selected_samples
176
  except Exception as e:
177
  logger.exception("Error selecting balanced samples")
178
  return []
179
 
180
+ def generate_random_string(length=8):
181
+ return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
182
 
183
  @app.route('/', methods=['GET', 'POST'])
184
  def index():
 
193
  return "Please fill in all fields and select exactly two methods.", 400
194
 
195
  try:
196
+ seed_int = int(seed) # Convert to int for random.seed()
197
  random.seed(seed_int)
198
  all_samples = load_samples(methods)
199
  selected_samples = select_balanced_samples(all_samples)
 
204
  return "No samples were selected", 500
205
 
206
  start_time = datetime.now().isoformat()
207
+ session_id = f"{username}_{start_time}" # Create a unique session ID
208
  session_data = {
209
  'session_id': session_id,
210
  'username': username,
211
+ 'seed': seed, # Store as string
212
  'methods': methods,
213
  'selected_samples': selected_samples,
214
  'current_index': 0,
 
224
  return "An error occurred", 500
225
  return render_template('index.html')
226
 
227
+ @app.route('/experiment/<username>', methods=['GET', 'POST'])
228
+ def experiment(username):
 
229
  try:
230
+ session_data = load_session_data(username)
231
  if not session_data:
232
+ logger.error(f"No session data found for user: {username}")
233
  return redirect(url_for('index'))
234
 
235
+
236
+ logger.info(f"Session data:\n{session_data}")
237
+
238
  selected_samples = session_data['selected_samples']
239
  methods = session_data['methods']
240
  current_index = session_data['current_index']
241
 
242
+ logger.info(f"current_index:\n{current_index}")
243
+
244
  if current_index >= len(selected_samples):
245
+ return redirect(url_for('completed', username=username))
246
 
247
  sample = selected_samples[current_index]
248
+ logger.info(f"sample:\n{sample}")
249
+
250
  method_a, method_b = methods
251
 
252
+ # Find matching files for both methods
253
+
254
+ method_a_dir = get_method_dir(method_a)
255
+ method_b_dir = get_method_dir(method_b)
256
+
257
+ # for category in ['TP', 'TN', 'FP', 'FN']:
258
+ category = sample['category']
259
+ dir_a = f'htmls_{method_a_dir.upper()}/{category}'
260
+ dir_b = f'htmls_{method_b_dir.upper()}/{category}'
261
+
262
+ file_a = os.path.join(dir_a, sample['file'])
263
+ file_b = os.path.join(dir_b, sample['file'])
264
+ logger.info(f"file_a:\n{file_a}")
265
+ logger.info(f"file_b:\n{file_a}")
266
 
267
+ # files_a = os.listdir(dir_a)
268
+ # files_b = os.listdir(dir_b)
269
+ #
270
+ # matching_files = set(files_a) & set(files_b)
271
+ # if matching_files:
272
+ # file_a = os.path.join(dir_a, next(iter(matching_files)))
273
+ # file_b = os.path.join(dir_b, next(iter(matching_files)))
274
+ # break
275
+
276
+ if not file_a or not file_b:
277
  logger.error(f"Missing files for comparison at index {current_index}")
278
  session_data['current_index'] += 1
279
+ save_session_data(username, session_data)
280
+ return redirect(url_for('experiment', username=username))
281
 
282
  visualization_a = url_for('send_visualization', filename=file_a)
283
  visualization_b = url_for('send_visualization', filename=file_b)
284
 
285
  statement = """
286
+ Please note that in select row function, starting index is 0 for Chain-of-Table 1 for Dater and Index * represents the selection of the whole Table.
287
+ You are now given two explanations that describe the reasoning process of the Table QA model.
288
+ Please analyze the explanations and determine which one provides a clearer and more accurate reasoning process.
289
  """
290
 
291
  return render_template('experiment.html',
 
295
  visualization_b=visualization_b,
296
  method_a=method_a,
297
  method_b=method_b,
298
+ username=username)
299
  except Exception as e:
300
  logger.exception(f"An error occurred in the experiment route: {e}")
301
  return "An error occurred", 500
302
 
303
+ def get_method_dir(method):
304
+ if method == 'No-XAI':
305
+ return 'NO_XAI'
306
+ elif method == 'Dater':
307
+ return 'DATER'
308
+ elif method == 'Chain-of-Table':
309
+ return 'COT'
310
+ elif method == 'Plan-of-SQLs':
311
+ return 'POS'
312
+
313
+ def get_visualization_dir(method):
314
+ if method == "No-XAI":
315
+ return 'htmls_NO_XAI'
316
+ elif method == "Dater":
317
+ return 'htmls_DATER'
318
+ elif method == "Chain-of-Table":
319
+ return 'htmls_COT'
320
+ else: # Plan-of-SQLs
321
+ return 'htmls_POS'
322
+
323
 
324
  @app.route('/feedback', methods=['POST'])
325
  def feedback():
326
  try:
327
+ username = request.form['username']
328
  feedback = request.form['feedback']
329
 
330
+ session_data = load_session_data(username)
331
  if not session_data:
332
+ logger.error(f"No session data found for user: {username}")
333
  return redirect(url_for('index'))
334
 
335
+ # Store the feedback
336
  session_data['responses'].append({
337
  'sample_id': session_data['current_index'],
338
  'preferred_method': feedback,
339
  'timestamp': datetime.now().isoformat()
340
  })
341
 
342
+ # Move to the next sample
343
  session_data['current_index'] += 1
344
+
345
+ # Save updated session data
346
+ save_session_data(username, session_data)
347
+ logger.info(f"Feedback saved for user {username}, sample {session_data['current_index'] - 1}")
348
 
349
  if session_data['current_index'] >= len(session_data['selected_samples']):
350
+ return redirect(url_for('completed', username=username))
351
 
352
+ return redirect(url_for('experiment', username=username))
353
  except Exception as e:
354
  logger.exception(f"Error in feedback route: {e}")
355
  return "An error occurred", 500
356
 
357
 
358
+ @app.route('/completed/<username>')
359
+ def completed(username):
360
  try:
361
+ session_data = load_session_data(username)
362
  if not session_data:
363
+ logger.error(f"No session data found for user: {username}")
364
  return redirect(url_for('index'))
365
 
366
  session_data['end_time'] = datetime.now().isoformat()
367
+
368
  methods = session_data['methods']
369
  responses = session_data['responses']
370
 
 
379
  preferences[method] = round((preferences[method] / total_responses) * 100, 2)
380
 
381
  session_data['preferences'] = preferences
382
+
383
+ # Save the final updated session data
384
+ save_session_data(username, session_data)
385
 
386
  return render_template('completed.html', preferences=preferences)
387
  except Exception as e:
 
392
  @app.route('/visualizations/<path:filename>')
393
  def send_visualization(filename):
394
  logger.info(f"Attempting to serve file: {filename}")
395
+ # Ensure the path is safe and doesn't allow access to files outside the intended directory
396
  base_dir = os.getcwd()
397
  file_path = os.path.normpath(os.path.join(base_dir, filename))
398
  if not file_path.startswith(base_dir):
 
408
 
409
 
410
  if __name__ == "__main__":
411
+ os.makedirs('session_data', exist_ok=True) # Ensure the directory for session files exists
412
  app.run(host="0.0.0.0", port=7860, debug=True)