Shuja1401 commited on
Commit
abd6513
Β·
verified Β·
1 Parent(s): ad67130

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +68 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import fitz # PyMuPDF
4
+ import openai
5
+ import os
6
+
7
+ openai.api_key = os.getenv("OPENAI_API_KEY")
8
+
9
+ def extract_text_from_pdf(pdf_file):
10
+ with fitz.open("pdf", pdf_file.read()) as doc:
11
+ return "".join([page.get_text() for page in doc])
12
+
13
+ def clean_text(text):
14
+ text = text.replace("\n", " ").replace(" ", " ")
15
+ return text.strip()
16
+
17
+ def split_text(text, max_tokens=1500):
18
+ words = text.split()
19
+ return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
20
+
21
+ def summarize_chunk(chunk, prompt):
22
+ response = openai.ChatCompletion.create(
23
+ model="gpt-3.5-turbo",
24
+ messages=[
25
+ {"role": "system", "content": prompt},
26
+ {"role": "user", "content": chunk}
27
+ ],
28
+ temperature=0.5,
29
+ max_tokens=500
30
+ )
31
+ return response.choices[0].message["content"].strip()
32
+
33
+ def summarize_paper(file):
34
+ text = extract_text_from_pdf(file)
35
+ cleaned_text = clean_text(text)
36
+ chunks = split_text(cleaned_text)
37
+
38
+ full_summary = ""
39
+ for chunk in chunks:
40
+ summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:")
41
+ full_summary += summary + "\n\n"
42
+
43
+ elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:")
44
+ why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.")
45
+ tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.")
46
+
47
+ return full_summary, elim5, why_matters, tldr
48
+
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("πŸ“„ **Paper News Summarizer** β€” Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
51
+ with gr.Row():
52
+ with gr.Column(scale=1):
53
+ file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
54
+ clear_btn = gr.Button("Clear")
55
+ submit_btn = gr.Button("Submit")
56
+ with gr.Column(scale=2):
57
+ summary_output = gr.Textbox(label="πŸ“˜ Full Summary", lines=10, show_copy_button=True)
58
+ elim5_output = gr.Textbox(label="πŸ§’ ELI5 Summary", lines=2, show_copy_button=True)
59
+ why_output = gr.Textbox(label="🎯 Why It Matters", lines=2, show_copy_button=True)
60
+ tldr_output = gr.Textbox(label="⚑ TL;DR", lines=2, show_copy_button=True)
61
+
62
+ def process(file):
63
+ return summarize_paper(file)
64
+
65
+ submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output])
66
+ clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output])
67
+
68
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+ openai==1.3.5
3
+ PyMuPDF==1.22.0
4
+ gradio==4.18.0