Spaces:
Runtime error
Runtime error
File size: 4,091 Bytes
c42ad4e 0d406fd 4f91ef7 5c37675 c42ad4e 5f00b64 4f91ef7 bed055f 0d406fd 486c2a5 0d406fd 274b11e c42ad4e cfb8ed0 c42ad4e 0d406fd c42ad4e 0d406fd 1e17bf9 1fb5180 1e17bf9 0d406fd 1e17bf9 5f00b64 5a69600 0d406fd 5a69600 0d406fd e8ec388 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2
import requests
from transformers import pipeline
def get_pdf_text(pdf_path):
# creating a pdf file object
pdfFileObj = open(pdf_path, 'rb')
# creating a pdf reader object
pdf_reader = PyPDF2.PdfReader(pdfFileObj)
# extract text
total_text_list = []
for i in range(len(pdf_reader.pages)):
page_text = pdf_reader.pages[i].extract_text()
total_text_list.append(page_text)
pdf_text = " ".join(total_text_list)
pdfFileObj.close()
return pdf_text
# sum_model = pipeline("text2text-generation", model="yasminesarraj/flan-t5-small-samsum")
headers = {"Authorization": f"Bearer {st.secrets['HF_AUTH']}"}
def create_tags(payload):
API_URL_TAGS = "https://api-inference.huggingface.co/models/fabiochiu/t5-base-tag-generation"
response = requests.post(API_URL_TAGS, headers=headers, json=payload)
return response.json()
def summarize_text(payload):
API_URL = "https://api-inference.huggingface.co/models/yasminesarraj/flan-t5-small-samsum"
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
# Start of the app code
tab_your_paper, tab_general_topics = st.tabs(["Summarize your paper(s)", "Research topics"])
with tab_your_paper:
html("", height=10)
st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
""")
pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
if pdf_files:
recently_added = []
for pdf in pdf_files:
# Saving the files
pdf_data = pdf.getvalue()
pdf_path = os.path.join(pdf.name)
with open(pdf_path, "wb") as f:
f.write(pdf_data)
recently_added.append(pdf_path)
pdfs_content_list = []
for recent_pdf in recently_added:
# Reading the pdf files
pdf_content = get_pdf_text(recent_pdf)
print("**", pdf_content)
pdfs_content_list.append(pdf_content)
# Delete the files
os.remove(recent_pdf)
all_text_together = " ".join(pdfs_content_list)
try:
tags = create_tags({
"inputs": all_text_together,
})[0]["generated_text"]
tags_available = True
except:
tags_available = False
try:
summary = summarize_text({
"inputs": "Summarize: "+all_text_together
})[0]["summary_text"]
sum_available = True
except:
sum_available = False
col1, col2 = st.columns(2)
if sum_available == True:
with col1:
if len(recently_added) > 1:
st.markdown("#### Summary of your paper(s):")
else:
st.markdown("#### Summary of your paper:")
st.write(summary)
# else:
# with col1:
# st.write(sum_model(all_text_together))
else:
with col1:
st.markdown("#### Summary currently unavailable.")
if tags_available == True:
with col2:
if len(recently_added) > 1:
st.markdown("#### Identified topics of your paper(s):")
else:
st.markdown("#### Identified topics of your paper:")
st.write(tags)
else:
with col2:
st.markdown("#### Topics currently unavailable")
with st.expander("See your total text"):
st.write(all_text_together)
with tab_general_topics:
html("", height=10)
st.header("See the status of a research topic through a summary of the most cited papers")
st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"]) |