Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +68 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
import fitz # PyMuPDF
|
4 |
+
import openai
|
5 |
+
import os
|
6 |
+
|
7 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
8 |
+
|
9 |
+
def extract_text_from_pdf(pdf_file):
|
10 |
+
with fitz.open("pdf", pdf_file.read()) as doc:
|
11 |
+
return "".join([page.get_text() for page in doc])
|
12 |
+
|
13 |
+
def clean_text(text):
|
14 |
+
text = text.replace("\n", " ").replace(" ", " ")
|
15 |
+
return text.strip()
|
16 |
+
|
17 |
+
def split_text(text, max_tokens=1500):
|
18 |
+
words = text.split()
|
19 |
+
return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
|
20 |
+
|
21 |
+
def summarize_chunk(chunk, prompt):
|
22 |
+
response = openai.ChatCompletion.create(
|
23 |
+
model="gpt-3.5-turbo",
|
24 |
+
messages=[
|
25 |
+
{"role": "system", "content": prompt},
|
26 |
+
{"role": "user", "content": chunk}
|
27 |
+
],
|
28 |
+
temperature=0.5,
|
29 |
+
max_tokens=500
|
30 |
+
)
|
31 |
+
return response.choices[0].message["content"].strip()
|
32 |
+
|
33 |
+
def summarize_paper(file):
|
34 |
+
text = extract_text_from_pdf(file)
|
35 |
+
cleaned_text = clean_text(text)
|
36 |
+
chunks = split_text(cleaned_text)
|
37 |
+
|
38 |
+
full_summary = ""
|
39 |
+
for chunk in chunks:
|
40 |
+
summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:")
|
41 |
+
full_summary += summary + "\n\n"
|
42 |
+
|
43 |
+
elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:")
|
44 |
+
why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.")
|
45 |
+
tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.")
|
46 |
+
|
47 |
+
return full_summary, elim5, why_matters, tldr
|
48 |
+
|
49 |
+
with gr.Blocks() as demo:
|
50 |
+
gr.Markdown("π **Paper News Summarizer** β Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
|
51 |
+
with gr.Row():
|
52 |
+
with gr.Column(scale=1):
|
53 |
+
file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
|
54 |
+
clear_btn = gr.Button("Clear")
|
55 |
+
submit_btn = gr.Button("Submit")
|
56 |
+
with gr.Column(scale=2):
|
57 |
+
summary_output = gr.Textbox(label="π Full Summary", lines=10, show_copy_button=True)
|
58 |
+
elim5_output = gr.Textbox(label="π§ ELI5 Summary", lines=2, show_copy_button=True)
|
59 |
+
why_output = gr.Textbox(label="π― Why It Matters", lines=2, show_copy_button=True)
|
60 |
+
tldr_output = gr.Textbox(label="β‘ TL;DR", lines=2, show_copy_button=True)
|
61 |
+
|
62 |
+
def process(file):
|
63 |
+
return summarize_paper(file)
|
64 |
+
|
65 |
+
submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output])
|
66 |
+
clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output])
|
67 |
+
|
68 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
openai==1.3.5
|
3 |
+
PyMuPDF==1.22.0
|
4 |
+
gradio==4.18.0
|