Spaces:
Sleeping
Sleeping
add function to check for hallucination stats
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import ast
|
|
|
|
| 4 |
import PyPDF2
|
| 5 |
from PyPDF2 import PdfReader
|
| 6 |
import openreview
|
|
@@ -20,6 +21,16 @@ client_openai = OpenAI(api_key=openai_api_key)
|
|
| 20 |
anthropic_api_key = os.environ["ANTHROPIC_API_KEY"]
|
| 21 |
client_anthropic = Anthropic(api_key=anthropic_api_key)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
# load manual (human) annotations
|
| 24 |
def load_annotations():
|
| 25 |
path = './annotations_8_26.csv'
|
|
@@ -451,6 +462,9 @@ if user_input:
|
|
| 451 |
best_feedback = create_feedback(review, pdf_text, agent_prompt, model)
|
| 452 |
|
| 453 |
revised_feedback = critic(review, best_feedback, pdf_text, critic_prompt, model)
|
|
|
|
|
|
|
|
|
|
| 454 |
revised_feedback = revised_feedback.replace("<quote>", "'").replace("</quote>", "'")
|
| 455 |
|
| 456 |
st.title(f'Review feedback')
|
|
@@ -489,6 +503,7 @@ if user_input:
|
|
| 489 |
# Column 4: Display revised feedback (from critic)
|
| 490 |
with col4:
|
| 491 |
st.subheader('Revised Feedback')
|
|
|
|
| 492 |
st.write(revised_feedback)
|
| 493 |
else:
|
| 494 |
|
|
@@ -512,6 +527,7 @@ if user_input:
|
|
| 512 |
# Column 3: Display revised feedback (from critic)
|
| 513 |
with col3:
|
| 514 |
st.subheader('Revised Feedback')
|
|
|
|
| 515 |
st.write(revised_feedback)
|
| 516 |
|
| 517 |
else:
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import ast
|
| 4 |
+
import re
|
| 5 |
import PyPDF2
|
| 6 |
from PyPDF2 import PdfReader
|
| 7 |
import openreview
|
|
|
|
| 21 |
anthropic_api_key = os.environ["ANTHROPIC_API_KEY"]
|
| 22 |
client_anthropic = Anthropic(api_key=anthropic_api_key)
|
| 23 |
|
| 24 |
+
def parse_quotes(input_string, pdf_text):
|
| 25 |
+
# Find all matches of <quote>...</quote> and extract the content between the tags
|
| 26 |
+
matches = re.findall(r'<quote>(.*?)</quote>', input_string)
|
| 27 |
+
|
| 28 |
+
count = len(matches)
|
| 29 |
+
extracted_texts = matches
|
| 30 |
+
match_count = sum(1 for text in extracted_texts if text in pdf_text)
|
| 31 |
+
|
| 32 |
+
return count, match_count
|
| 33 |
+
|
| 34 |
# load manual (human) annotations
|
| 35 |
def load_annotations():
|
| 36 |
path = './annotations_8_26.csv'
|
|
|
|
| 462 |
best_feedback = create_feedback(review, pdf_text, agent_prompt, model)
|
| 463 |
|
| 464 |
revised_feedback = critic(review, best_feedback, pdf_text, critic_prompt, model)
|
| 465 |
+
|
| 466 |
+
count, match_count = parse_quotes(revised_feedback, pdf_text)
|
| 467 |
+
|
| 468 |
revised_feedback = revised_feedback.replace("<quote>", "'").replace("</quote>", "'")
|
| 469 |
|
| 470 |
st.title(f'Review feedback')
|
|
|
|
| 503 |
# Column 4: Display revised feedback (from critic)
|
| 504 |
with col4:
|
| 505 |
st.subheader('Revised Feedback')
|
| 506 |
+
st.write(f"Quotes found verbatim in pdf text: {match_count}/{count}")
|
| 507 |
st.write(revised_feedback)
|
| 508 |
else:
|
| 509 |
|
|
|
|
| 527 |
# Column 3: Display revised feedback (from critic)
|
| 528 |
with col3:
|
| 529 |
st.subheader('Revised Feedback')
|
| 530 |
+
st.write(f"Quotes found verbatim in pdf text: {match_count}/{count}")
|
| 531 |
st.write(revised_feedback)
|
| 532 |
|
| 533 |
else:
|