Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,68 +1,99 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
import fitz # PyMuPDF
|
4 |
-
import openai
|
5 |
import os
|
|
|
|
|
6 |
|
|
|
7 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def clean_text(text):
|
14 |
-
|
15 |
-
return text.strip()
|
16 |
|
17 |
def split_text(text, max_tokens=1500):
|
18 |
words = text.split()
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
|
|
|
22 |
response = openai.ChatCompletion.create(
|
23 |
model="gpt-3.5-turbo",
|
24 |
messages=[
|
25 |
{"role": "system", "content": prompt},
|
26 |
-
{"role": "user", "content":
|
27 |
],
|
28 |
temperature=0.5,
|
29 |
max_tokens=500
|
30 |
)
|
31 |
-
return response.choices[0].message
|
32 |
|
33 |
-
def
|
34 |
-
|
35 |
-
|
36 |
-
chunks = split_text(
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
tldr =
|
|
|
46 |
|
47 |
-
return full_summary,
|
48 |
|
49 |
with gr.Blocks() as demo:
|
50 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
51 |
with gr.Row():
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
submit_btn = gr.Button("Submit")
|
56 |
-
with gr.Column(scale=2):
|
57 |
-
summary_output = gr.Textbox(label="π Full Summary", lines=10, show_copy_button=True)
|
58 |
-
elim5_output = gr.Textbox(label="π§ ELI5 Summary", lines=2, show_copy_button=True)
|
59 |
-
why_output = gr.Textbox(label="π― Why It Matters", lines=2, show_copy_button=True)
|
60 |
-
tldr_output = gr.Textbox(label="β‘ TL;DR", lines=2, show_copy_button=True)
|
61 |
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
submit_btn.click(fn=
|
66 |
-
clear_btn.click(fn=lambda: (
|
67 |
|
68 |
-
|
|
|
|
1 |
+
# app.py
|
2 |
import gradio as gr
|
3 |
import fitz # PyMuPDF
|
|
|
4 |
import os
|
5 |
+
import openai
|
6 |
+
from dotenv import load_dotenv
|
7 |
|
8 |
+
# Load secret from environment variable (make sure OPENAI_API_KEY is set in Hugging Face Secrets)
|
9 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
10 |
|
11 |
+
# Clean and split PDF text
|
12 |
+
def extract_text_from_pdf(file):
|
13 |
+
text = ""
|
14 |
+
with fitz.open(stream=file.read(), filetype="pdf") as doc:
|
15 |
+
for page in doc:
|
16 |
+
text += page.get_text()
|
17 |
+
return text
|
18 |
|
19 |
def clean_text(text):
|
20 |
+
return ' '.join(text.replace("\n", " ").split())
|
|
|
21 |
|
22 |
def split_text(text, max_tokens=1500):
|
23 |
words = text.split()
|
24 |
+
chunks = []
|
25 |
+
chunk = []
|
26 |
+
tokens = 0
|
27 |
+
for word in words:
|
28 |
+
tokens += 1
|
29 |
+
chunk.append(word)
|
30 |
+
if tokens >= max_tokens:
|
31 |
+
chunks.append(" ".join(chunk))
|
32 |
+
chunk = []
|
33 |
+
tokens = 0
|
34 |
+
if chunk:
|
35 |
+
chunks.append(" ".join(chunk))
|
36 |
+
return chunks
|
37 |
|
38 |
+
# OpenAI call
|
39 |
+
def summarize_text(text, prompt):
|
40 |
response = openai.ChatCompletion.create(
|
41 |
model="gpt-3.5-turbo",
|
42 |
messages=[
|
43 |
{"role": "system", "content": prompt},
|
44 |
+
{"role": "user", "content": text},
|
45 |
],
|
46 |
temperature=0.5,
|
47 |
max_tokens=500
|
48 |
)
|
49 |
+
return response.choices[0].message.content.strip()
|
50 |
|
51 |
+
def generate_summaries(pdf):
|
52 |
+
raw_text = extract_text_from_pdf(pdf)
|
53 |
+
text = clean_text(raw_text)
|
54 |
+
chunks = split_text(text)
|
55 |
|
56 |
+
summary_prompt = """
|
57 |
+
Summarize this academic text in a clear, concise way suitable for a general audience.
|
58 |
+
"""
|
59 |
+
eli5_prompt = """
|
60 |
+
Explain the key ideas in the following text like I'm 5 years old.
|
61 |
+
"""
|
62 |
+
tldr_prompt = """
|
63 |
+
Give a 1-line TL;DR of the research paper.
|
64 |
+
"""
|
65 |
+
importance_prompt = """
|
66 |
+
Why is this research important? Mention societal or technological relevance in a short paragraph.
|
67 |
+
"""
|
68 |
|
69 |
+
full_summary = "\n\n".join([summarize_text(chunk, summary_prompt) for chunk in chunks])
|
70 |
+
eli5 = summarize_text(text, eli5_prompt)
|
71 |
+
tldr = summarize_text(text, tldr_prompt)
|
72 |
+
why_matters = summarize_text(text, importance_prompt)
|
73 |
|
74 |
+
return full_summary, eli5, tldr, why_matters
|
75 |
|
76 |
with gr.Blocks() as demo:
|
77 |
+
gr.Markdown("""
|
78 |
+
# π§ Paper News Summarizer
|
79 |
+
Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.
|
80 |
+
""")
|
81 |
+
|
82 |
with gr.Row():
|
83 |
+
pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=['.pdf'])
|
84 |
+
clear_btn = gr.Button("Clear")
|
85 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
with gr.Row():
|
88 |
+
with gr.Column():
|
89 |
+
full_summary_output = gr.Textbox(label="π Full Summary", lines=12, interactive=False)
|
90 |
+
eli5_output = gr.Textbox(label="π§ ELI5 Explanation", lines=4, interactive=False)
|
91 |
+
with gr.Column():
|
92 |
+
tldr_output = gr.Textbox(label="β‘ TL;DR (1-line)", lines=2, interactive=False)
|
93 |
+
why_output = gr.Textbox(label="π― Why It Matters", lines=6, interactive=False)
|
94 |
|
95 |
+
submit_btn.click(fn=generate_summaries, inputs=[pdf_input], outputs=[full_summary_output, eli5_output, tldr_output, why_output])
|
96 |
+
clear_btn.click(fn=lambda: (None, "", "", ""), inputs=[], outputs=[pdf_input, full_summary_output, eli5_output, tldr_output])
|
97 |
|
98 |
+
if __name__ == "__main__":
|
99 |
+
demo.launch()
|