Spaces:
Running
Running
File size: 3,537 Bytes
409fff7 df44d29 51c1624 75c3b48 df44d29 b6f12dc 1ef9098 409fff7 9580320 51c1624 02129a7 51c1624 409fff7 06d5048 c4550f6 38a5648 1b7fefd df44d29 ce5740f df44d29 ce5740f a2f7c22 df44d29 76d5fd8 df44d29 b6f12dc df44d29 ce5740f df44d29 b6f12dc 1ef9098 a63b5cf df44d29 a2f7c22 b6f12dc b0f64e3 48bab4b 223116a b6f12dc 48bab4b b6f12dc ce5740f b6f12dc ce5740f b6f12dc df44d29 a63b5cf 48bab4b 0286a86 df44d29 ce5740f df44d29 ce5740f 4128cf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st #Web App
import os
from PIL import Image
from utils import *
import pickle
docs = None
api_key = ' '
st.set_page_config(layout="wide")
image = Image.open('arxiv_decode.png')
st.image(image, width=1000)
#title
st.title("Answering questions from scientific papers")
st.markdown("##### This tool will allow you to ask questions and get answers based on scientific papers. It uses OpenAI's GPT models, and you must have your own API key. Each query is about 10k tokens, which costs about only $0.20 on your own API key which is charged by OpenAI.")
st.markdown("##### Current version searches on [ArXiv](https://arxiv.org) papers only. 🚧Under development🚧")
st.markdown("Used libraries:\n * [PaperQA](https://github.com/whitead/paper-qa) \n* [langchain](https://github.com/hwchase17/langchain)")
api_key_url = 'https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key'
api_key = st.text_input('OpenAI API Key',
placeholder='sk-...',
help=f"['What is that?']({api_key_url})",
type="password")
os.environ["OPENAI_API_KEY"] = f"{api_key}" #
if len(api_key) != 51:
st.warning('Please enter a valid OpenAI API key.', icon="⚠️")
max_results_current = 5
max_results = max_results_current
def search_click_callback(search_query, max_results):
global pdf_info, pdf_citation
pdf_info, pdf_citation = call_arXiv_API(f'{search_query}', max_results=max_results)
download_pdf(pdf_info)
return pdf_info
with st.form(key='columns_in_form', clear_on_submit = False):
c1, c2 = st.columns([8,1])
with c1:
search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value=''
)#search_query, max_results_current))
with c2:
max_results = st.text_input("Max papers", value=max_results_current)
max_results_current = max_results_current
searchButton = st.form_submit_button(label = 'Search')
if searchButton:
global pdf_info
pdf_info = search_click_callback(search_query, max_results)
if 'pdf_info' not in st.session_state:
st.session_state.key = 'pdf_info'
st.session_state['pdf_info'] = pdf_info
def answer_callback(question_query):
import paperqa
global docs
progress_text = "Please wait..."
# my_bar = st.progress(0, text = progress_text)
st.info('Please wait...', icon="🔥")
if docs is None:
# my_bar.progress(0.2, "Please wait...")
pdf_info = st.session_state['pdf_info']
# print('buliding docs')
docs = paperqa.Docs()
pdf_paths = [f"{p[4]}/{p[0]}.pdf" for p in pdf_info]
pdf_citations = [p[5] for p in pdf_info]
print(list(zip(pdf_paths, pdf_citations)))
for d, c in zip(pdf_paths, pdf_citations):
# print(d,c)
docs.add(d, c)
# docs._build_faiss_index()
answer = docs.query(question_query)
# print(answer.formatted_answer)
# my_bar.progress(1.0, "Done!")
st.success('Voila!')
return answer.formatted_answer
form = st.form(key='question_form')
question_query = form.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
value='')
submitButton = form.form_submit_button('Submit')
if submitButton:
with st.expander("Found papers:", expanded=True):
st.write(f"{st.session_state['all_reference_text']}")
st.text_area("Answer:", answer_callback(question_query), height=600)
|