nityathakkar commited on
Commit
a8d8165
·
verified ·
1 Parent(s): 1f784b0

added judge to pick best feedback (user can define # iterations)

Browse files
Files changed (1) hide show
  1. app.py +75 -5
app.py CHANGED
@@ -168,6 +168,67 @@ def create_feedback(review, pdf_text, agent_prompt, model):
168
 
169
  return feedback
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  def critic(review, feedback, pdf_text, critic_prompt, model):
173
 
@@ -412,7 +473,8 @@ if option == "OpenReview paper ID":
412
  reviewer_id = st.text_input("Enter reviewer ID (e.g. gNxe):")
413
  reviewer_id = reviewer_id.strip()
414
  model = st.text_input("Which model? ('gpt' or 'claude')")
415
- if paper_id and reviewer_id and model:
 
416
  upload_file = False
417
  user_input = True
418
  submissions = load_ICLR_submissions()
@@ -423,7 +485,8 @@ elif option == "Upload PDF with Review":
423
  user_text = st.text_area("Enter review:")
424
  uploaded_file = st.file_uploader("Upload PDF", type="pdf")
425
  model = st.text_input("Which model? ('GPT' or 'Claude')")
426
- if user_text and uploaded_file and model:
 
427
  upload_file = True
428
  user_input = True
429
  review, pdf = user_text, uploaded_file
@@ -436,10 +499,17 @@ elif option == "Upload PDF with Review":
436
  if user_input:
437
  annotations = load_annotations()
438
 
 
439
  # Run your pipeline to generate the dataframe based on user input
440
- feedback = create_feedback(review, pdf_text, agent_prompt, model)
441
-
442
- revised_feedback = critic(review, feedback, pdf_text, critic_prompt, model)
 
 
 
 
 
 
443
 
444
  st.title(f'Review feedback')
445
 
 
168
 
169
  return feedback
170
 
171
+ def pick_best_review(feedback_list, agent_prompt, review, pdf_text, model):
172
+ prompt = f"You are given a list containing output from multiple iterations of the same agent. The agent was given the following prompt: {agent_prompt}. Your task is to go through each iteration of feedback and pick the best overall feedback for the review. This can be portions of feedback from each iteration, or one set of feedback in its entirety. Please return the feedback in the exact format and wording it appears in. You will be given the original review, the list of feedback from the different agents, and the paper the review is about."
173
+
174
+ messages = [{
175
+ "role": "user",
176
+ "content": [
177
+ {
178
+ "type": "text",
179
+ "text": prompt
180
+ },
181
+ {
182
+ "type": "text",
183
+ "text": "Here is the ML conference review"
184
+ },
185
+ {
186
+ "type": "text",
187
+ "text": json.dumps(review) #json.dumps(review)
188
+ },
189
+ {
190
+ "type": "text",
191
+ "text": "Here is the feedback about the review"
192
+ },
193
+ {
194
+ "type": "text",
195
+ "text": feedback_list
196
+ },
197
+ {
198
+ "type": "text",
199
+ "text": "Finally, read the paper this review was written about"
200
+ },
201
+ {
202
+ "type": "text",
203
+ "text": pdf_text
204
+ }
205
+ ]}]
206
+
207
+ if model == 'gpt':
208
+ headers = {
209
+ "Content-Type": "application/json",
210
+ "Authorization": f"Bearer {openai_api_key}"
211
+ }
212
+ payload = {
213
+ "model": "gpt-4o-mini ,
214
+ "messages": messages,
215
+ "max_tokens": 1000,
216
+ "temperature": 0.2
217
+ }
218
+
219
+ try:
220
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
221
+ best_feedback = response.json()["choices"][0]["message"]["content"]
222
+ except Exception as e:
223
+ print(f"An unexpected error occurred: {e}")
224
+ best_feedback = "an error occured"
225
+
226
+ else:
227
+ message = client_anthropic.messages.create(max_tokens=1024, messages=messages, model="claude-3-5-sonnet-20240620")
228
+ best_feedback = message.content[0].text
229
+
230
+ return best_feedback
231
+
232
 
233
  def critic(review, feedback, pdf_text, critic_prompt, model):
234
 
 
473
  reviewer_id = st.text_input("Enter reviewer ID (e.g. gNxe):")
474
  reviewer_id = reviewer_id.strip()
475
  model = st.text_input("Which model? ('gpt' or 'claude')")
476
+ iterations = st.text_input("How many iterations to run agent for?")
477
+ if paper_id and reviewer_id and model and iterations:
478
  upload_file = False
479
  user_input = True
480
  submissions = load_ICLR_submissions()
 
485
  user_text = st.text_area("Enter review:")
486
  uploaded_file = st.file_uploader("Upload PDF", type="pdf")
487
  model = st.text_input("Which model? ('GPT' or 'Claude')")
488
+ iterations = st.text_input("How many iterations to run agent for?")
489
+ if user_text and uploaded_file and model and iterations:
490
  upload_file = True
491
  user_input = True
492
  review, pdf = user_text, uploaded_file
 
499
  if user_input:
500
  annotations = load_annotations()
501
 
502
+ feedback_list = []
503
  # Run your pipeline to generate the dataframe based on user input
504
+ if iterations > 1:
505
+ for _ in range(iterations):
506
+ feedback = create_feedback(review, pdf_text, agent_prompt, model)
507
+ feedback_list.append(feedback)
508
+ best_feedback = pick_best_review(feedback_list, agent_prompt, review, pdf_text, model)
509
+ else:
510
+ best_feedback = create_feedback(review, pdf_text, agent_prompt, model)
511
+
512
+ revised_feedback = critic(review, best_feedback, pdf_text, critic_prompt, model)
513
 
514
  st.title(f'Review feedback')
515