Spaces:
Running
Running
Commit
·
ce5740f
1
Parent(s):
223116a
Adde api key warning
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import shutil
|
|
10 |
|
11 |
import pickle
|
12 |
docs = None
|
|
|
13 |
|
14 |
#title
|
15 |
st.title("Encode knowledge from papers with cited references")
|
@@ -20,11 +21,11 @@ api_key_url = 'https://help.openai.com/en/articles/4936850-where-do-i-find-my-se
|
|
20 |
api_key = st.text_input('OpenAI API Key',
|
21 |
placeholder='sk-...',
|
22 |
help=f"['What is that?']({api_key_url})",
|
23 |
-
type="password")
|
24 |
-
|
25 |
-
# st.write('The current movie title is', title)
|
26 |
-
api_key = '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'
|
27 |
os.environ["OPENAI_API_KEY"] = f"{api_key}" #
|
|
|
|
|
28 |
import paperqa
|
29 |
|
30 |
|
@@ -83,10 +84,10 @@ def call_arXiv_API(search_query, search_by='all', sort_by='relevance', max_resul
|
|
83 |
pdf_info=list(zip(pdf_titles, pdf_urls, pdf_authors, pdf_categories, folder_names, pdf_citation))
|
84 |
|
85 |
# Check number of available files
|
86 |
-
print('Requesting {max_results} files'.format(max_results=max_results))
|
87 |
if len(pdf_urls)<int(max_results):
|
88 |
matching_pdf_num=len(pdf_urls)
|
89 |
-
print('Only {matching_pdf_num} files available'.format(matching_pdf_num=matching_pdf_num))
|
90 |
return pdf_info, pdf_citation
|
91 |
|
92 |
|
@@ -95,7 +96,7 @@ def download_pdf(pdf_info):
|
|
95 |
# if len(os.listdir(f'./{folder_name}') ) != 0:
|
96 |
# check folder is empty to avoid using papers from old runs:
|
97 |
# os.remove(f'./{folder_name}/*')
|
98 |
-
|
99 |
for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
|
100 |
|
101 |
pdf_title=p[0]
|
@@ -115,18 +116,15 @@ def download_pdf(pdf_info):
|
|
115 |
currP.write(r.content)
|
116 |
if i == 0:
|
117 |
st.markdown("###### Papers found:")
|
118 |
-
st.markdown(f
|
119 |
-
|
120 |
-
|
|
|
|
|
121 |
|
|
|
|
|
122 |
|
123 |
-
# #subtitle
|
124 |
-
# st.markdown("## Optical Character Recognition - Using `easyocr`, `streamlit` - hosted on 🤗 Spaces")
|
125 |
-
|
126 |
-
# st.markdown("Link to the app - [image-to-text-app on 🤗 Spaces](https://huggingface.co/spaces/Amrrs/image-to-text-app)")
|
127 |
-
|
128 |
-
# #image uploader
|
129 |
-
# image = st.file_uploader(label = "Upload your image here",type=['png','jpg','jpeg'])
|
130 |
|
131 |
max_results_current = 1
|
132 |
max_results = max_results_current
|
@@ -144,7 +142,7 @@ def search_click_callback(search_query, max_results):
|
|
144 |
with st.form(key='columns_in_form', clear_on_submit = False):
|
145 |
c1, c2 = st.columns([8,1])
|
146 |
with c1:
|
147 |
-
search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value='
|
148 |
)#search_query, max_results_current))
|
149 |
|
150 |
with c2:
|
@@ -158,7 +156,7 @@ if searchButton:
|
|
158 |
pdf_info = search_click_callback(search_query, max_results)
|
159 |
if 'pdf_info' not in st.session_state:
|
160 |
st.session_state['pdf_info'] = pdf_info
|
161 |
-
print(f'This is PDF info from search:{pdf_info}')
|
162 |
|
163 |
|
164 |
# def tokenize_callback():
|
@@ -196,14 +194,14 @@ def answer_callback(question_query):
|
|
196 |
if docs is None:
|
197 |
# my_bar.progress(0.2, "Please wait...")
|
198 |
pdf_info = st.session_state['pdf_info']
|
199 |
-
print('buliding docs')
|
200 |
docs = paperqa.Docs()
|
201 |
pdf_paths = [f"{p[4]}/{p[0]}.pdf" for p in pdf_info]
|
202 |
pdf_citations = [p[5] for p in pdf_info]
|
203 |
print(list(zip(pdf_paths, pdf_citations)))
|
204 |
|
205 |
-
for d, c in
|
206 |
-
print(d,c)
|
207 |
docs.add(d, c)
|
208 |
# docs._build_faiss_index()
|
209 |
answer = docs.query(question_query)
|
@@ -216,10 +214,12 @@ def answer_callback(question_query):
|
|
216 |
|
217 |
form = st.form(key='question_form')
|
218 |
question_query = form.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
|
219 |
-
value='
|
220 |
submitButton = form.form_submit_button('Submit')
|
221 |
|
222 |
if submitButton:
|
|
|
|
|
223 |
st.text_area("Answer:", answer_callback(question_query), height=600)
|
224 |
|
225 |
# with st.form(key='question_form', clear_on_submit = False):
|
|
|
10 |
|
11 |
import pickle
|
12 |
docs = None
|
13 |
+
api_key = ''
|
14 |
|
15 |
#title
|
16 |
st.title("Encode knowledge from papers with cited references")
|
|
|
21 |
api_key = st.text_input('OpenAI API Key',
|
22 |
placeholder='sk-...',
|
23 |
help=f"['What is that?']({api_key_url})",
|
24 |
+
type="password")
|
25 |
+
|
|
|
|
|
26 |
os.environ["OPENAI_API_KEY"] = f"{api_key}" #
|
27 |
+
if len(api_key) != 51:
|
28 |
+
st.warning('Please enter a valid OpenAI API key.', icon="⚠️")
|
29 |
import paperqa
|
30 |
|
31 |
|
|
|
84 |
pdf_info=list(zip(pdf_titles, pdf_urls, pdf_authors, pdf_categories, folder_names, pdf_citation))
|
85 |
|
86 |
# Check number of available files
|
87 |
+
# print('Requesting {max_results} files'.format(max_results=max_results))
|
88 |
if len(pdf_urls)<int(max_results):
|
89 |
matching_pdf_num=len(pdf_urls)
|
90 |
+
# print('Only {matching_pdf_num} files available'.format(matching_pdf_num=matching_pdf_num))
|
91 |
return pdf_info, pdf_citation
|
92 |
|
93 |
|
|
|
96 |
# if len(os.listdir(f'./{folder_name}') ) != 0:
|
97 |
# check folder is empty to avoid using papers from old runs:
|
98 |
# os.remove(f'./{folder_name}/*')
|
99 |
+
all_reference_text = []
|
100 |
for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
|
101 |
|
102 |
pdf_title=p[0]
|
|
|
116 |
currP.write(r.content)
|
117 |
if i == 0:
|
118 |
st.markdown("###### Papers found:")
|
119 |
+
st.markdown(f"{i+1}. {pdf_citation}")
|
120 |
+
time.sleep(0.15)
|
121 |
+
all_reference_text.append(f"{i+1}. {pdf_citation}\n")
|
122 |
+
if 'all_reference_text' not in st.session_state:
|
123 |
+
st.session_state['all_reference_text'] = ' '.join(all_reference_text)
|
124 |
|
125 |
+
# print(all_reference_text)
|
126 |
+
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
max_results_current = 1
|
130 |
max_results = max_results_current
|
|
|
142 |
with st.form(key='columns_in_form', clear_on_submit = False):
|
143 |
c1, c2 = st.columns([8,1])
|
144 |
with c1:
|
145 |
+
search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value=''
|
146 |
)#search_query, max_results_current))
|
147 |
|
148 |
with c2:
|
|
|
156 |
pdf_info = search_click_callback(search_query, max_results)
|
157 |
if 'pdf_info' not in st.session_state:
|
158 |
st.session_state['pdf_info'] = pdf_info
|
159 |
+
# print(f'This is PDF info from search:{pdf_info}')
|
160 |
|
161 |
|
162 |
# def tokenize_callback():
|
|
|
194 |
if docs is None:
|
195 |
# my_bar.progress(0.2, "Please wait...")
|
196 |
pdf_info = st.session_state['pdf_info']
|
197 |
+
# print('buliding docs')
|
198 |
docs = paperqa.Docs()
|
199 |
pdf_paths = [f"{p[4]}/{p[0]}.pdf" for p in pdf_info]
|
200 |
pdf_citations = [p[5] for p in pdf_info]
|
201 |
print(list(zip(pdf_paths, pdf_citations)))
|
202 |
|
203 |
+
for d, c in zip(pdf_paths, pdf_citations):
|
204 |
+
# print(d,c)
|
205 |
docs.add(d, c)
|
206 |
# docs._build_faiss_index()
|
207 |
answer = docs.query(question_query)
|
|
|
214 |
|
215 |
form = st.form(key='question_form')
|
216 |
question_query = form.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
|
217 |
+
value='')
|
218 |
submitButton = form.form_submit_button('Submit')
|
219 |
|
220 |
if submitButton:
|
221 |
+
with st.expander("Found papers:", expanded=True):
|
222 |
+
st.write(f"{st.session_state['all_reference_text']}")
|
223 |
st.text_area("Answer:", answer_callback(question_query), height=600)
|
224 |
|
225 |
# with st.form(key='question_form', clear_on_submit = False):
|