saridormi commited on
Commit
1060d5d
·
1 Parent(s): f053717

fix the app

Browse files
Files changed (2) hide show
  1. .env +1 -0
  2. app.py +199 -79
.env CHANGED
@@ -4,5 +4,6 @@ HF_INPUT_DATASET_PATH="commit_message_generation/predictions/o1-preview-2024-09-
4
  HF_INPUT_DATASET_ID_COLUMN="hash"
5
  HF_INPUT_DATASET_COLUMN_A="reference"
6
  HF_INPUT_DATASET_COLUMN_B="prediction"
 
7
  HF_OUTPUT_DATASET="saridormi/labels"
8
  HF_OUTPUT_DATASET_DIR="cmg"
 
4
  HF_INPUT_DATASET_ID_COLUMN="hash"
5
  HF_INPUT_DATASET_COLUMN_A="reference"
6
  HF_INPUT_DATASET_COLUMN_B="prediction"
7
+ #HF_INPUT_DATASET_URL_COLUMN="hash" <-- optional
8
  HF_OUTPUT_DATASET="saridormi/labels"
9
  HF_OUTPUT_DATASET_DIR="cmg"
app.py CHANGED
@@ -6,6 +6,7 @@ import datetime
6
  import logging
7
  from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
8
  from dotenv import load_dotenv
 
9
 
10
  load_dotenv()
11
 
@@ -15,15 +16,16 @@ HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH")
15
  HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
16
  HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
17
  HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
 
18
  HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
19
  HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
20
  INSTRUCTIONS = """
21
  # Pairwise Model Output Labeling
22
 
23
  Please compare the two model outputs shown below and select which one you think is better.
24
- - Choose "Left is better" if the left output is superior
25
- - Choose "Right is better" if the right output is superior
26
- - Choose "Tie" if they are equally good or bad
27
  - Choose "Can't choose" if you cannot make a determination
28
  """
29
  SAVE_EVERY_N_EXAMPLES = 5
@@ -31,9 +33,10 @@ SAVE_EVERY_N_EXAMPLES = 5
31
 
32
  class PairwiseLabeler:
33
  def __init__(self):
34
- self.current_index = 0
35
- self.results = []
36
  self.df = self.read_hf_dataset()
 
37
 
38
  def __len__(self):
39
  return len(self.df)
@@ -59,94 +62,137 @@ class PairwiseLabeler:
59
  HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
60
  HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
61
  }
 
 
 
 
 
62
  return pd.DataFrame(sample_data)
63
 
64
- def get_current_pair(self):
65
- if self.current_index >= len(self.df):
66
- return None, None, None
 
 
 
67
 
68
- item = self.df.iloc[self.current_index]
69
- item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index}")
70
  left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
71
  right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
72
 
73
- return item_id, left_text, right_text
 
 
 
 
74
 
75
- def submit_judgment(self, item_id, left_text, right_text, choice):
76
  if item_id is None:
77
- return item_id, left_text, right_text, self.current_index
 
 
 
78
 
 
 
 
 
 
79
  # Record the judgment
80
  result = {
81
  "item_id": item_id,
82
- "generation_a": left_text,
83
- "generation_b": right_text,
84
  "judgment": choice,
85
  "timestamp": datetime.datetime.now().isoformat(),
86
- "labeler_id": str(uuid.uuid4())[:8] # Anonymous ID for the labeling session
87
  }
88
 
89
- self.results.append(result)
90
 
91
  # Move to next item
92
- self.current_index += 1
93
 
94
  # Save results periodically
95
- if len(self.results) % SAVE_EVERY_N_EXAMPLES == 0:
96
- self.save_results()
97
 
98
  # Get next pair
99
- next_id, next_left, next_right = self.get_current_pair()
100
- return next_id, next_left, next_right, self.current_index
 
 
 
 
101
 
102
- def save_results(self):
103
- if not self.results:
104
  return
105
 
106
  try:
107
  # Convert results to dataset format
108
- results_df = pd.DataFrame(self.results)
109
  results_df.to_json("temp.jsonl", orient="records", lines=True)
110
 
111
  # Push to Hugging Face Hub
112
  try:
113
- num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR)])
114
  except Exception as e:
115
  num_files = 0
116
- upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, f"results_{num_files+1}.jsonl"), path_or_fileobj="temp.jsonl")
 
 
 
 
 
 
 
 
117
  os.remove("temp.jsonl")
118
- self.results = []
119
- logging.info(f"Saved {len(self.results)} results to {HF_OUTPUT_DATASET}")
 
 
120
  except Exception as e:
121
  logging.error(f"Error saving results: {e}")
 
122
 
123
  # Initialize the labeler
124
  labeler = PairwiseLabeler()
125
 
126
- # Get the first pair
127
- initial_id, initial_left, initial_right = labeler.get_current_pair()
 
128
 
129
  with gr.Blocks() as app:
 
 
 
 
130
  gr.Markdown(INSTRUCTIONS)
131
 
 
 
 
 
 
 
 
132
  with gr.Row():
133
  with gr.Column():
134
  left_output = gr.Textbox(
135
- value=initial_left,
136
- label="Model Output A",
137
  lines=10,
138
  interactive=False
139
  )
140
 
141
  with gr.Column():
142
  right_output = gr.Textbox(
143
- value=initial_right,
144
- label="Model Output B",
145
  lines=10,
146
  interactive=False
147
  )
148
 
149
- item_id = gr.Textbox(value=initial_id, visible=False)
150
 
151
  with gr.Row():
152
  left_btn = gr.Button("⬅️ A is better", variant="primary")
@@ -155,55 +201,129 @@ with gr.Blocks() as app:
155
  cant_choose_btn = gr.Button("🤔 Can't choose")
156
 
157
  current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
158
- value=labeler.current_index,
159
- interactive=False,
160
- label='sample_ind',
161
- info=f"Samples labeled (out of {len(labeler)})",
162
- show_label=False,
163
- container=False,
164
- scale=5)
165
 
166
- def judge_left(item_id, left_text, right_text):
167
- return judge("A is better", item_id, left_text, right_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- def judge_right(item_id, left_text, right_text):
170
- return judge("B is better", item_id, left_text, right_text)
171
 
172
- def judge_tie(item_id, left_text, right_text):
173
- return judge("Tie", item_id, left_text, right_text)
174
 
175
- def judge_cant_choose(item_id, left_text, right_text):
176
- return judge("Can't choose", item_id, left_text, right_text)
177
 
178
- def judge(choice, item_id, left_text, right_text):
179
- new_id, new_left, new_right, new_index = labeler.submit_judgment(
180
- item_id, left_text, right_text, choice
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  )
182
- return new_id, new_left, new_right, new_index
183
-
184
- left_btn.click(
185
- judge_left,
186
- inputs=[item_id, left_output, right_output],
187
- outputs=[item_id, left_output, right_output, current_sample_sld]
188
- )
189
-
190
- right_btn.click(
191
- judge_right,
192
- inputs=[item_id, left_output, right_output],
193
- outputs=[item_id, left_output, right_output, current_sample_sld]
194
- )
195
-
196
- tie_btn.click(
197
- judge_tie,
198
- inputs=[item_id, left_output, right_output],
199
- outputs=[item_id, left_output, right_output, current_sample_sld]
200
- )
201
-
202
- cant_choose_btn.click(
203
- judge_cant_choose,
204
- inputs=[item_id, left_output, right_output],
205
- outputs=[item_id, left_output, right_output, current_sample_sld]
206
- )
207
 
208
  if __name__ == "__main__":
209
  app.launch()
 
6
  import logging
7
  from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
8
  from dotenv import load_dotenv
9
+ from collections import defaultdict
10
 
11
  load_dotenv()
12
 
 
16
  HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
17
  HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
18
  HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
19
+ HF_INPUT_DATASET_URL_COLUMN = os.getenv("HF_INPUT_DATASET_URL_COLUMN")
20
  HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
21
  HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
22
  INSTRUCTIONS = """
23
  # Pairwise Model Output Labeling
24
 
25
  Please compare the two model outputs shown below and select which one you think is better.
26
+ - Choose "A is better" if the output from Model A (left) is superior
27
+ - Choose "B is better" if the output from Model B (right) is superior
28
+ - Choose "Tie" if you think they are equally good or bad
29
  - Choose "Can't choose" if you cannot make a determination
30
  """
31
  SAVE_EVERY_N_EXAMPLES = 5
 
33
 
34
  class PairwiseLabeler:
35
  def __init__(self):
36
+ self.current_index = defaultdict(int)
37
+ self.results = defaultdict(list)
38
  self.df = self.read_hf_dataset()
39
+ self.has_url_column = HF_INPUT_DATASET_URL_COLUMN and HF_INPUT_DATASET_URL_COLUMN in self.df.columns
40
 
41
  def __len__(self):
42
  return len(self.df)
 
62
  HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
63
  HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
64
  }
65
+
66
+ # Add URL column to sample data if specified
67
+ if HF_INPUT_DATASET_URL_COLUMN:
68
+ sample_data[HF_INPUT_DATASET_URL_COLUMN] = [f"https://example.com/sample_{i}" for i in range(SAVE_EVERY_N_EXAMPLES)]
69
+
70
  return pd.DataFrame(sample_data)
71
 
72
+ def get_current_pair(self, session_id):
73
+ if self.current_index[session_id] >= len(self.df):
74
+ if self.has_url_column:
75
+ return None, None, None, None
76
+ else:
77
+ return None, None, None
78
 
79
+ item = self.df.iloc[self.current_index[session_id]]
80
+ item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index[session_id]}")
81
  left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
82
  right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
83
 
84
+ if self.has_url_column:
85
+ url = item.get(HF_INPUT_DATASET_URL_COLUMN, "")
86
+ return item_id, left_text, right_text, url
87
+ else:
88
+ return item_id, left_text, right_text
89
 
90
+ def submit_judgment(self, item_id, left_text, right_text, choice, session_id):
91
  if item_id is None:
92
+ if self.has_url_column:
93
+ return item_id, left_text, right_text, None, self.current_index[session_id]
94
+ else:
95
+ return item_id, left_text, right_text, self.current_index[session_id]
96
 
97
+ # Get the current URL if available
98
+ current_url = None
99
+ if self.has_url_column:
100
+ current_url = self.df.iloc[self.current_index[session_id]].get(HF_INPUT_DATASET_URL_COLUMN, "")
101
+
102
  # Record the judgment
103
  result = {
104
  "item_id": item_id,
 
 
105
  "judgment": choice,
106
  "timestamp": datetime.datetime.now().isoformat(),
107
+ "labeler_id": session_id
108
  }
109
 
110
+ self.results[session_id].append(result)
111
 
112
  # Move to next item
113
+ self.current_index[session_id] += 1
114
 
115
  # Save results periodically
116
+ if len(self.results[session_id]) % SAVE_EVERY_N_EXAMPLES == 0:
117
+ self.save_results(session_id)
118
 
119
  # Get next pair
120
+ if self.has_url_column:
121
+ next_id, next_left, next_right, next_url = self.get_current_pair(session_id)
122
+ return next_id, next_left, next_right, next_url, self.current_index[session_id]
123
+ else:
124
+ next_id, next_left, next_right = self.get_current_pair(session_id)
125
+ return next_id, next_left, next_right, self.current_index[session_id]
126
 
127
+ def save_results(self, session_id):
128
+ if not self.results[session_id]:
129
  return
130
 
131
  try:
132
  # Convert results to dataset format
133
+ results_df = pd.DataFrame(self.results[session_id])
134
  results_df.to_json("temp.jsonl", orient="records", lines=True)
135
 
136
  # Push to Hugging Face Hub
137
  try:
138
+ num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR) if session_id in _.path])
139
  except Exception as e:
140
  num_files = 0
141
+
142
+ # Use session_id in filename to avoid conflicts
143
+ filename = f"results_{session_id}_{num_files+1}.jsonl"
144
+ upload_file(
145
+ repo_id=HF_OUTPUT_DATASET,
146
+ repo_type="dataset",
147
+ path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, filename),
148
+ path_or_fileobj="temp.jsonl"
149
+ )
150
  os.remove("temp.jsonl")
151
+
152
+ # Clear saved results
153
+ self.results[session_id] = []
154
+ logging.info(f"Saved results for session {session_id} to {HF_OUTPUT_DATASET}/{filename}")
155
  except Exception as e:
156
  logging.error(f"Error saving results: {e}")
157
+ # Keep results in memory to try saving again later
158
 
159
  # Initialize the labeler
160
  labeler = PairwiseLabeler()
161
 
162
+ # Create a unique session ID
163
+ def create_new_session():
164
+ return str(uuid.uuid4())[:8]
165
 
166
  with gr.Blocks() as app:
167
+ # State for the session ID
168
+ session_id = gr.State(value=None)
169
+
170
+ # The actual interface components will be created here
171
  gr.Markdown(INSTRUCTIONS)
172
 
173
+ # URL display component - only shown if URL column is defined
174
+ url_display = None
175
+ if labeler.has_url_column:
176
+ url_display = gr.HTML(label="Reference URL")
177
+
178
+ session_id_display = gr.Textbox(label="Session Information", interactive=False)
179
+
180
  with gr.Row():
181
  with gr.Column():
182
  left_output = gr.Textbox(
183
+ label="Model A Output",
 
184
  lines=10,
185
  interactive=False
186
  )
187
 
188
  with gr.Column():
189
  right_output = gr.Textbox(
190
+ label="Model B Output",
 
191
  lines=10,
192
  interactive=False
193
  )
194
 
195
+ item_id = gr.Textbox(visible=False)
196
 
197
  with gr.Row():
198
  left_btn = gr.Button("⬅️ A is better", variant="primary")
 
201
  cant_choose_btn = gr.Button("🤔 Can't choose")
202
 
203
  current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
204
+ interactive=False,
205
+ label='sample_ind',
206
+ info=f"Samples labeled (out of {len(labeler)})",
207
+ show_label=False,
208
+ container=False,
209
+ scale=5)
 
210
 
211
+ # Initialize the session and get the first pair
212
+ def init_session():
213
+ new_session_id = create_new_session()
214
+
215
+ if labeler.has_url_column:
216
+ initial_id, initial_left, initial_right, initial_url = labeler.get_current_pair(new_session_id)
217
+ url_html = f'<a href="{initial_url}" target="_blank">{initial_url}</a>' if initial_url else ""
218
+
219
+ return (
220
+ new_session_id, # session_id state
221
+ f"Session ID: {new_session_id}", # session_id_display
222
+ url_html, # url_display
223
+ initial_left, # left_output
224
+ initial_right, # right_output
225
+ initial_id, # item_id
226
+ labeler.current_index[new_session_id] # current_sample_sld
227
+ )
228
+ else:
229
+ initial_id, initial_left, initial_right = labeler.get_current_pair(new_session_id)
230
+
231
+ return (
232
+ new_session_id, # session_id state
233
+ f"Session ID: {new_session_id}", # session_id_display
234
+ initial_left, # left_output
235
+ initial_right, # right_output
236
+ initial_id, # item_id
237
+ labeler.current_index[new_session_id] # current_sample_sld
238
+ )
239
+
240
+ # Run the initialization when the app loads
241
+ if labeler.has_url_column:
242
+ app.load(
243
+ init_session,
244
+ inputs=None,
245
+ outputs=[session_id, session_id_display, url_display, left_output, right_output, item_id, current_sample_sld]
246
+ )
247
+ else:
248
+ app.load(
249
+ init_session,
250
+ inputs=None,
251
+ outputs=[session_id, session_id_display, left_output, right_output, item_id, current_sample_sld]
252
+ )
253
 
254
+ def judge_left(session_id, item_id, left_text, right_text):
255
+ return judge("A is better", session_id, item_id, left_text, right_text)
256
 
257
+ def judge_right(session_id, item_id, left_text, right_text):
258
+ return judge("B is better", session_id, item_id, left_text, right_text)
259
 
260
+ def judge_tie(session_id, item_id, left_text, right_text):
261
+ return judge("Tie", session_id, item_id, left_text, right_text)
262
 
263
+ def judge_cant_choose(session_id, item_id, left_text, right_text):
264
+ return judge("Can't choose", session_id, item_id, left_text, right_text)
265
+
266
+ def judge(choice, session_id, item_id, left_text, right_text):
267
+ if labeler.has_url_column:
268
+ new_id, new_left, new_right, new_url, new_index = labeler.submit_judgment(
269
+ item_id, left_text, right_text, choice, session_id
270
+ )
271
+ url_html = f'<a href="{new_url}" target="_blank">{new_url}</a>' if new_url else ""
272
+ return new_id, new_left, new_right, url_html, new_index
273
+ else:
274
+ new_id, new_left, new_right, new_index = labeler.submit_judgment(
275
+ item_id, left_text, right_text, choice, session_id
276
+ )
277
+ return new_id, new_left, new_right, new_index
278
+
279
+ if labeler.has_url_column:
280
+ left_btn.click(
281
+ judge_left,
282
+ inputs=[session_id, item_id, left_output, right_output],
283
+ outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
284
+ )
285
+
286
+ right_btn.click(
287
+ judge_right,
288
+ inputs=[session_id, item_id, left_output, right_output],
289
+ outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
290
+ )
291
+
292
+ tie_btn.click(
293
+ judge_tie,
294
+ inputs=[session_id, item_id, left_output, right_output],
295
+ outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
296
+ )
297
+
298
+ cant_choose_btn.click(
299
+ judge_cant_choose,
300
+ inputs=[session_id, item_id, left_output, right_output],
301
+ outputs=[item_id, left_output, right_output, url_display, current_sample_sld]
302
+ )
303
+ else:
304
+ left_btn.click(
305
+ judge_left,
306
+ inputs=[session_id, item_id, left_output, right_output],
307
+ outputs=[item_id, left_output, right_output, current_sample_sld]
308
+ )
309
+
310
+ right_btn.click(
311
+ judge_right,
312
+ inputs=[session_id, item_id, left_output, right_output],
313
+ outputs=[item_id, left_output, right_output, current_sample_sld]
314
+ )
315
+
316
+ tie_btn.click(
317
+ judge_tie,
318
+ inputs=[session_id, item_id, left_output, right_output],
319
+ outputs=[item_id, left_output, right_output, current_sample_sld]
320
+ )
321
+
322
+ cant_choose_btn.click(
323
+ judge_cant_choose,
324
+ inputs=[session_id, item_id, left_output, right_output],
325
+ outputs=[item_id, left_output, right_output, current_sample_sld]
326
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  if __name__ == "__main__":
329
  app.launch()