jdev8 commited on
Commit
9eff11a
·
verified ·
1 Parent(s): fde65a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -127
app.py CHANGED
@@ -17,23 +17,20 @@ HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
17
  HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
18
  HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
19
  HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
 
20
  INSTRUCTIONS = """
21
  # Pairwise Model Output Labeling
22
-
23
  Please compare the two model outputs shown below and select which one you think is better.
24
  - Choose "Left is better" if the left output is superior
25
  - Choose "Right is better" if the right output is superior
26
  - Choose "Tie" if they are equally good or bad
27
  - Choose "Can't choose" if you cannot make a determination
28
  """
29
- SAVE_EVERY_N_EXAMPLES = 5
30
-
31
-
32
  class PairwiseLabeler:
33
  def __init__(self):
34
- self.current_index = 0
35
- self.results = []
36
  self.df = self.read_hf_dataset()
 
37
 
38
  def __len__(self):
39
  return len(self.df)
@@ -44,7 +41,7 @@ class PairwiseLabeler:
44
  if local_file.endswith(".json"):
45
  return pd.read_json(local_file)
46
  elif local_file.endswith(".jsonl"):
47
- return pd.read_json(local_file, orient="records",lines=True)
48
  elif local_file.endswith(".csv"):
49
  return pd.read_csv(local_file)
50
  elif local_file.endswith(".parquet"):
@@ -52,158 +49,112 @@ class PairwiseLabeler:
52
  else:
53
  raise ValueError(f"Unsupported file type: {local_file}")
54
  except Exception as e:
55
- # Fallback to sample data if loading fails
56
  logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.")
57
  sample_data = {
58
- HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
59
- HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
60
- HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
61
  }
62
  return pd.DataFrame(sample_data)
63
-
64
- def get_current_pair(self):
65
- if self.current_index >= len(self.df):
66
  return None, None, None
67
-
68
- item = self.df.iloc[self.current_index]
69
- item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index}")
70
  left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
71
  right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
72
 
73
  return item_id, left_text, right_text
74
-
75
- def submit_judgment(self, item_id, left_text, right_text, choice):
76
  if item_id is None:
77
- return item_id, left_text, right_text, self.current_index
78
-
79
- # Record the judgment
80
- result = {
81
- "item_id": item_id,
82
- "generation_a": left_text,
83
- "generation_b": right_text,
84
- "judgment": choice,
85
- "timestamp": datetime.datetime.now().isoformat(),
86
- "labeler_id": str(uuid.uuid4())[:8] # Anonymous ID for the labeling session
87
- }
88
-
89
- self.results.append(result)
90
 
91
- # Move to next item
92
- self.current_index += 1
 
 
 
 
93
 
94
- # Save results periodically
95
- if len(self.results) % SAVE_EVERY_N_EXAMPLES == 0:
96
- self.save_results()
 
 
 
 
 
 
 
 
 
97
 
98
- # Get next pair
99
- next_id, next_left, next_right = self.get_current_pair()
100
- return next_id, next_left, next_right, self.current_index
101
-
102
- def save_results(self):
103
- if not self.results:
 
 
 
 
104
  return
105
-
106
  try:
107
- # Convert results to dataset format
108
- results_df = pd.DataFrame(self.results)
109
- results_df.to_json("temp.jsonl", orient="records", lines=True)
110
-
111
  # Push to Hugging Face Hub
112
- try:
113
- num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR)])
114
- except Exception as e:
115
- num_files = 0
116
- upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, f"results_{num_files+1}.jsonl"), path_or_fileobj="temp.jsonl")
117
- os.remove("temp.jsonl")
118
- self.results = []
119
- logging.info(f"Saved {len(self.results)} results to {HF_OUTPUT_DATASET}")
120
  except Exception as e:
121
  logging.error(f"Error saving results: {e}")
122
 
123
  # Initialize the labeler
124
  labeler = PairwiseLabeler()
125
 
126
- # Get the first pair
127
- initial_id, initial_left, initial_right = labeler.get_current_pair()
128
-
129
  with gr.Blocks() as app:
130
  gr.Markdown(INSTRUCTIONS)
131
-
 
 
 
132
  with gr.Row():
133
  with gr.Column():
134
- left_output = gr.Textbox(
135
- value=initial_left,
136
- label="Model Output A",
137
- lines=10,
138
- interactive=False
139
- )
140
-
141
  with gr.Column():
142
- right_output = gr.Textbox(
143
- value=initial_right,
144
- label="Model Output B",
145
- lines=10,
146
- interactive=False
147
- )
148
 
149
- item_id = gr.Textbox(value=initial_id, visible=False)
150
 
151
  with gr.Row():
152
- left_btn = gr.Button("⬅️ A is better", variant="primary")
153
- right_btn = gr.Button("➡️ B is better", variant="primary")
154
- tie_btn = gr.Button("🤝 Tie", variant="primary")
155
  cant_choose_btn = gr.Button("🤔 Can't choose")
156
 
157
- current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
158
- value=labeler.current_index,
159
- interactive=False,
160
- label='sample_ind',
161
- info=f"Samples labeled (out of {len(labeler)})",
162
- show_label=False,
163
- container=False,
164
- scale=5)
165
-
166
- def judge_left(item_id, left_text, right_text):
167
- return judge("A is better", item_id, left_text, right_text)
168
-
169
- def judge_right(item_id, left_text, right_text):
170
- return judge("B is better", item_id, left_text, right_text)
171
-
172
- def judge_tie(item_id, left_text, right_text):
173
- return judge("Tie", item_id, left_text, right_text)
174
-
175
- def judge_cant_choose(item_id, left_text, right_text):
176
- return judge("Can't choose", item_id, left_text, right_text)
177
-
178
- def judge(choice, item_id, left_text, right_text):
179
- new_id, new_left, new_right, new_index = labeler.submit_judgment(
180
- item_id, left_text, right_text, choice
181
- )
182
- return new_id, new_left, new_right, new_index
183
-
184
- left_btn.click(
185
- judge_left,
186
- inputs=[item_id, left_output, right_output],
187
- outputs=[item_id, left_output, right_output, current_sample_sld]
188
- )
189
-
190
- right_btn.click(
191
- judge_right,
192
- inputs=[item_id, left_output, right_output],
193
- outputs=[item_id, left_output, right_output, current_sample_sld]
194
- )
195
-
196
- tie_btn.click(
197
- judge_tie,
198
- inputs=[item_id, left_output, right_output],
199
- outputs=[item_id, left_output, right_output, current_sample_sld]
200
- )
201
-
202
- cant_choose_btn.click(
203
- judge_cant_choose,
204
- inputs=[item_id, left_output, right_output],
205
- outputs=[item_id, left_output, right_output, current_sample_sld]
206
- )
207
 
208
  if __name__ == "__main__":
209
  app.launch()
 
17
  HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
18
  HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
19
  HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
20
+
21
  INSTRUCTIONS = """
22
  # Pairwise Model Output Labeling
 
23
  Please compare the two model outputs shown below and select which one you think is better.
24
  - Choose "Left is better" if the left output is superior
25
  - Choose "Right is better" if the right output is superior
26
  - Choose "Tie" if they are equally good or bad
27
  - Choose "Can't choose" if you cannot make a determination
28
  """
29
+
 
 
30
  class PairwiseLabeler:
31
  def __init__(self):
 
 
32
  self.df = self.read_hf_dataset()
33
+ self.results = {}
34
 
35
  def __len__(self):
36
  return len(self.df)
 
41
  if local_file.endswith(".json"):
42
  return pd.read_json(local_file)
43
  elif local_file.endswith(".jsonl"):
44
+ return pd.read_json(local_file, orient="records", lines=True)
45
  elif local_file.endswith(".csv"):
46
  return pd.read_csv(local_file)
47
  elif local_file.endswith(".parquet"):
 
49
  else:
50
  raise ValueError(f"Unsupported file type: {local_file}")
51
  except Exception as e:
 
52
  logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.")
53
  sample_data = {
54
+ HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(5)],
55
+ HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(5)],
56
+ HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(5)],
57
  }
58
  return pd.DataFrame(sample_data)
59
+
60
+ def get_current_pair(self, user_id, user_index):
61
+ if user_index >= len(self.df):
62
  return None, None, None
63
+
64
+ item = self.df.iloc[user_index]
65
+ item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{user_index}")
66
  left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
67
  right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
68
 
69
  return item_id, left_text, right_text
70
+
71
+ def submit_judgment(self, user_id, user_index, item_id, left_text, right_text, choice):
72
  if item_id is None:
73
+ return None, None, None, user_index
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # Store user votes uniquely
76
+ if user_id not in self.results:
77
+ self.results[user_id] = []
78
+
79
+ # Check if user already voted for this item
80
+ existing_vote = next((r for r in self.results[user_id] if r["item_id"] == item_id), None)
81
 
82
+ if existing_vote:
83
+ existing_vote["judgment"] = choice
84
+ existing_vote["timestamp"] = datetime.datetime.now().isoformat()
85
+ else:
86
+ self.results[user_id].append({
87
+ "item_id": item_id,
88
+ "generation_a": left_text,
89
+ "generation_b": right_text,
90
+ "judgment": choice,
91
+ "timestamp": datetime.datetime.now().isoformat(),
92
+ "labeler_id": user_id
93
+ })
94
 
95
+ # Save immediately
96
+ self.save_results(user_id)
97
+
98
+ # Move to the next item
99
+ user_index += 1
100
+ next_id, next_left, next_right = self.get_current_pair(user_id, user_index)
101
+ return next_id, next_left, next_right, user_index
102
+
103
+ def save_results(self, user_id):
104
+ if user_id not in self.results or not self.results[user_id]:
105
  return
106
+
107
  try:
108
+ results_df = pd.DataFrame(self.results[user_id])
109
+ filename = f"results_{user_id}.jsonl"
110
+ results_df.to_json(filename, orient="records", lines=True)
111
+
112
  # Push to Hugging Face Hub
113
+ upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset",
114
+ path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, filename),
115
+ path_or_fileobj=filename)
116
+
117
+ os.remove(filename)
 
 
 
118
  except Exception as e:
119
  logging.error(f"Error saving results: {e}")
120
 
121
  # Initialize the labeler
122
  labeler = PairwiseLabeler()
123
 
124
+ # Gradio UI
 
 
125
  with gr.Blocks() as app:
126
  gr.Markdown(INSTRUCTIONS)
127
+
128
+ user_id = gr.Textbox(label="Enter your user ID", interactive=True)
129
+ user_index = gr.State(0) # Track each user's progress
130
+
131
  with gr.Row():
132
  with gr.Column():
133
+ left_output = gr.Textbox(label="Model Output A", lines=10, interactive=False)
 
 
 
 
 
 
134
  with gr.Column():
135
+ right_output = gr.Textbox(label="Model Output B", lines=10, interactive=False)
 
 
 
 
 
136
 
137
+ item_id = gr.Textbox(visible=False)
138
 
139
  with gr.Row():
140
+ left_btn = gr.Button("⬅️ A is better")
141
+ right_btn = gr.Button("➡️ B is better")
142
+ tie_btn = gr.Button("🤝 Tie")
143
  cant_choose_btn = gr.Button("🤔 Can't choose")
144
 
145
+ def load_first_pair(user_id):
146
+ if not user_id:
147
+ return None, None, None, 0
148
+ return labeler.get_current_pair(user_id, 0) + (0,)
149
+
150
+ def judge(choice, user_id, user_index, item_id, left_text, right_text):
151
+ return labeler.submit_judgment(user_id, user_index, item_id, left_text, right_text, choice)
152
+
153
+ user_id.submit(load_first_pair, inputs=[user_id], outputs=[item_id, left_output, right_output, user_index])
154
+ left_btn.click(judge, inputs=[gr.State("A is better"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
155
+ right_btn.click(judge, inputs=[gr.State("B is better"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
156
+ tie_btn.click(judge, inputs=[gr.State("Tie"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
157
+ cant_choose_btn.click(judge, inputs=[gr.State("Can't choose"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  if __name__ == "__main__":
160
  app.launch()