Spaces:
Sleeping
Sleeping
added judge to pick best feedback (user can define # iterations)
Browse files
app.py
CHANGED
@@ -168,6 +168,67 @@ def create_feedback(review, pdf_text, agent_prompt, model):
|
|
168 |
|
169 |
return feedback
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
def critic(review, feedback, pdf_text, critic_prompt, model):
|
173 |
|
@@ -412,7 +473,8 @@ if option == "OpenReview paper ID":
|
|
412 |
reviewer_id = st.text_input("Enter reviewer ID (e.g. gNxe):")
|
413 |
reviewer_id = reviewer_id.strip()
|
414 |
model = st.text_input("Which model? ('gpt' or 'claude')")
|
415 |
-
|
|
|
416 |
upload_file = False
|
417 |
user_input = True
|
418 |
submissions = load_ICLR_submissions()
|
@@ -423,7 +485,8 @@ elif option == "Upload PDF with Review":
|
|
423 |
user_text = st.text_area("Enter review:")
|
424 |
uploaded_file = st.file_uploader("Upload PDF", type="pdf")
|
425 |
model = st.text_input("Which model? ('GPT' or 'Claude')")
|
426 |
-
|
|
|
427 |
upload_file = True
|
428 |
user_input = True
|
429 |
review, pdf = user_text, uploaded_file
|
@@ -436,10 +499,17 @@ elif option == "Upload PDF with Review":
|
|
436 |
if user_input:
|
437 |
annotations = load_annotations()
|
438 |
|
|
|
439 |
# Run your pipeline to generate the dataframe based on user input
|
440 |
-
|
441 |
-
|
442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
|
444 |
st.title(f'Review feedback')
|
445 |
|
|
|
168 |
|
169 |
return feedback
|
170 |
|
171 |
+
def pick_best_review(feedback_list, agent_prompt, review, pdf_text, model):
|
172 |
+
prompt = f"You are given a list containing output from multiple iterations of the same agent. The agent was given the following prompt: {agent_prompt}. Your task is to go through each iteration of feedback and pick the best overall feedback for the review. This can be portions of feedback from each iteration, or one set of feedback in its entirety. Please return the feedback in the exact format and wording it appears in. You will be given the original review, the list of feedback from the different agents, and the paper the review is about."
|
173 |
+
|
174 |
+
messages = [{
|
175 |
+
"role": "user",
|
176 |
+
"content": [
|
177 |
+
{
|
178 |
+
"type": "text",
|
179 |
+
"text": prompt
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"type": "text",
|
183 |
+
"text": "Here is the ML conference review"
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"type": "text",
|
187 |
+
"text": json.dumps(review) #json.dumps(review)
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"type": "text",
|
191 |
+
"text": "Here is the feedback about the review"
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"type": "text",
|
195 |
+
"text": feedback_list
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"type": "text",
|
199 |
+
"text": "Finally, read the paper this review was written about"
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"type": "text",
|
203 |
+
"text": pdf_text
|
204 |
+
}
|
205 |
+
]}]
|
206 |
+
|
207 |
+
if model == 'gpt':
|
208 |
+
headers = {
|
209 |
+
"Content-Type": "application/json",
|
210 |
+
"Authorization": f"Bearer {openai_api_key}"
|
211 |
+
}
|
212 |
+
payload = {
|
213 |
+
"model": "gpt-4o-mini ,
|
214 |
+
"messages": messages,
|
215 |
+
"max_tokens": 1000,
|
216 |
+
"temperature": 0.2
|
217 |
+
}
|
218 |
+
|
219 |
+
try:
|
220 |
+
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
221 |
+
best_feedback = response.json()["choices"][0]["message"]["content"]
|
222 |
+
except Exception as e:
|
223 |
+
print(f"An unexpected error occurred: {e}")
|
224 |
+
best_feedback = "an error occured"
|
225 |
+
|
226 |
+
else:
|
227 |
+
message = client_anthropic.messages.create(max_tokens=1024, messages=messages, model="claude-3-5-sonnet-20240620")
|
228 |
+
best_feedback = message.content[0].text
|
229 |
+
|
230 |
+
return best_feedback
|
231 |
+
|
232 |
|
233 |
def critic(review, feedback, pdf_text, critic_prompt, model):
|
234 |
|
|
|
473 |
reviewer_id = st.text_input("Enter reviewer ID (e.g. gNxe):")
|
474 |
reviewer_id = reviewer_id.strip()
|
475 |
model = st.text_input("Which model? ('gpt' or 'claude')")
|
476 |
+
iterations = st.text_input("How many iterations to run agent for?")
|
477 |
+
if paper_id and reviewer_id and model and iterations:
|
478 |
upload_file = False
|
479 |
user_input = True
|
480 |
submissions = load_ICLR_submissions()
|
|
|
485 |
user_text = st.text_area("Enter review:")
|
486 |
uploaded_file = st.file_uploader("Upload PDF", type="pdf")
|
487 |
model = st.text_input("Which model? ('GPT' or 'Claude')")
|
488 |
+
iterations = st.text_input("How many iterations to run agent for?")
|
489 |
+
if user_text and uploaded_file and model and iterations:
|
490 |
upload_file = True
|
491 |
user_input = True
|
492 |
review, pdf = user_text, uploaded_file
|
|
|
499 |
if user_input:
|
500 |
annotations = load_annotations()
|
501 |
|
502 |
+
feedback_list = []
|
503 |
# Run your pipeline to generate the dataframe based on user input
|
504 |
+
if iterations > 1:
|
505 |
+
for _ in range(iterations):
|
506 |
+
feedback = create_feedback(review, pdf_text, agent_prompt, model)
|
507 |
+
feedback_list.append(feedback)
|
508 |
+
best_feedback = pick_best_review(feedback_list, agent_prompt, review, pdf_text, model)
|
509 |
+
else:
|
510 |
+
best_feedback = create_feedback(review, pdf_text, agent_prompt, model)
|
511 |
+
|
512 |
+
revised_feedback = critic(review, best_feedback, pdf_text, critic_prompt, model)
|
513 |
|
514 |
st.title(f'Review feedback')
|
515 |
|