Spaces:

ICLR-experiment
/

demo

Sleeping

App Files Files Community

nityathakkar commited on Aug 28, 2024

Commit

a8d8165

verified ·

1 Parent(s): 1f784b0

added judge to pick best feedback (user can define # iterations)

Browse files

Files changed (1) hide show

app.py +75 -5

app.py CHANGED Viewed

@@ -168,6 +168,67 @@ def create_feedback(review, pdf_text, agent_prompt, model):
     return feedback
 def critic(review, feedback, pdf_text, critic_prompt, model):
@@ -412,7 +473,8 @@ if option == "OpenReview paper ID":
     reviewer_id = st.text_input("Enter reviewer ID (e.g. gNxe):")
     reviewer_id = reviewer_id.strip()
     model = st.text_input("Which model? ('gpt' or 'claude')")
-    if paper_id and reviewer_id and model:
         upload_file = False
         user_input = True
         submissions = load_ICLR_submissions()
@@ -423,7 +485,8 @@ elif option == "Upload PDF with Review":
     user_text = st.text_area("Enter review:")
     uploaded_file = st.file_uploader("Upload PDF", type="pdf")
     model = st.text_input("Which model? ('GPT' or 'Claude')")
-    if user_text and uploaded_file and model:
         upload_file = True
         user_input = True
         review, pdf = user_text, uploaded_file
@@ -436,10 +499,17 @@ elif option == "Upload PDF with Review":
 if user_input:
     annotations = load_annotations()
     # Run your pipeline to generate the dataframe based on user input
-    feedback = create_feedback(review, pdf_text, agent_prompt, model)
-    revised_feedback = critic(review, feedback, pdf_text, critic_prompt, model)
     st.title(f'Review feedback')

     return feedback
+def pick_best_review(feedback_list, agent_prompt, review, pdf_text, model):
+    prompt = f"You are given a list containing output from multiple iterations of the same agent. The agent was given the following prompt: {agent_prompt}. Your task is to go through each iteration of feedback and pick the best overall feedback for the review. This can be portions of feedback from each iteration, or one set of feedback in its entirety. Please return the feedback in the exact format and wording it appears in. You will be given the original review, the list of feedback from the different agents, and the paper the review is about."
+    messages = [{
+        "role": "user",
+        "content": [
+      {
+        "type": "text",
+        "text": prompt
+      },
+      {
+        "type": "text",
+        "text": "Here is the ML conference review"
+      },
+      {
+        "type": "text",
+        "text": json.dumps(review) #json.dumps(review)
+      },
+      {
+        "type": "text",
+        "text": "Here is the feedback about the review"
+      },
+      {
+        "type": "text",
+        "text": feedback_list
+      },
+      {
+        "type": "text",
+        "text": "Finally, read the paper this review was written about"
+      },
+      {
+        "type": "text",
+        "text": pdf_text
+      }
+      ]}]
+    if model == 'gpt':
+        headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {openai_api_key}"
+        }
+        payload = {
+        "model": "gpt-4o-mini	,
+        "messages": messages,
+        "max_tokens": 1000,
+        "temperature": 0.2
+        }
+        try:
+            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+            best_feedback = response.json()["choices"][0]["message"]["content"]
+        except Exception as e:
+            print(f"An unexpected error occurred: {e}")
+            best_feedback = "an error occured"
+    else:
+        message = client_anthropic.messages.create(max_tokens=1024, messages=messages, model="claude-3-5-sonnet-20240620")
+        best_feedback = message.content[0].text
+  return best_feedback
 def critic(review, feedback, pdf_text, critic_prompt, model):
     reviewer_id = st.text_input("Enter reviewer ID (e.g. gNxe):")
     reviewer_id = reviewer_id.strip()
     model = st.text_input("Which model? ('gpt' or 'claude')")
+    iterations = st.text_input("How many iterations to run agent for?")
+    if paper_id and reviewer_id and model and iterations:
         upload_file = False
         user_input = True
         submissions = load_ICLR_submissions()
     user_text = st.text_area("Enter review:")
     uploaded_file = st.file_uploader("Upload PDF", type="pdf")
     model = st.text_input("Which model? ('GPT' or 'Claude')")
+    iterations = st.text_input("How many iterations to run agent for?")
+    if user_text and uploaded_file and model and iterations:
         upload_file = True
         user_input = True
         review, pdf = user_text, uploaded_file
 if user_input:
     annotations = load_annotations()
+    feedback_list = []
     # Run your pipeline to generate the dataframe based on user input
+    if iterations > 1:
+        for _ in range(iterations):
+            feedback = create_feedback(review, pdf_text, agent_prompt, model)
+            feedback_list.append(feedback)
+        best_feedback = pick_best_review(feedback_list, agent_prompt, review, pdf_text, model)
+    else:
+        best_feedback = create_feedback(review, pdf_text, agent_prompt, model)
+    revised_feedback = critic(review, best_feedback, pdf_text, critic_prompt, model)
     st.title(f'Review feedback')