Shuja1401 commited on
Commit
41e443c
Β·
verified Β·
1 Parent(s): abd6513

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -38
app.py CHANGED
@@ -1,68 +1,99 @@
1
-
2
  import gradio as gr
3
  import fitz # PyMuPDF
4
- import openai
5
  import os
 
 
6
 
 
7
  openai.api_key = os.getenv("OPENAI_API_KEY")
8
 
9
- def extract_text_from_pdf(pdf_file):
10
- with fitz.open("pdf", pdf_file.read()) as doc:
11
- return "".join([page.get_text() for page in doc])
 
 
 
 
12
 
13
  def clean_text(text):
14
- text = text.replace("\n", " ").replace(" ", " ")
15
- return text.strip()
16
 
17
  def split_text(text, max_tokens=1500):
18
  words = text.split()
19
- return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- def summarize_chunk(chunk, prompt):
 
22
  response = openai.ChatCompletion.create(
23
  model="gpt-3.5-turbo",
24
  messages=[
25
  {"role": "system", "content": prompt},
26
- {"role": "user", "content": chunk}
27
  ],
28
  temperature=0.5,
29
  max_tokens=500
30
  )
31
- return response.choices[0].message["content"].strip()
32
 
33
- def summarize_paper(file):
34
- text = extract_text_from_pdf(file)
35
- cleaned_text = clean_text(text)
36
- chunks = split_text(cleaned_text)
37
 
38
- full_summary = ""
39
- for chunk in chunks:
40
- summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:")
41
- full_summary += summary + "\n\n"
 
 
 
 
 
 
 
 
42
 
43
- elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:")
44
- why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.")
45
- tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.")
 
46
 
47
- return full_summary, elim5, why_matters, tldr
48
 
49
  with gr.Blocks() as demo:
50
- gr.Markdown("πŸ“„ **Paper News Summarizer** β€” Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
 
 
 
 
51
  with gr.Row():
52
- with gr.Column(scale=1):
53
- file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
54
- clear_btn = gr.Button("Clear")
55
- submit_btn = gr.Button("Submit")
56
- with gr.Column(scale=2):
57
- summary_output = gr.Textbox(label="πŸ“˜ Full Summary", lines=10, show_copy_button=True)
58
- elim5_output = gr.Textbox(label="πŸ§’ ELI5 Summary", lines=2, show_copy_button=True)
59
- why_output = gr.Textbox(label="🎯 Why It Matters", lines=2, show_copy_button=True)
60
- tldr_output = gr.Textbox(label="⚑ TL;DR", lines=2, show_copy_button=True)
61
 
62
- def process(file):
63
- return summarize_paper(file)
 
 
 
 
 
64
 
65
- submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output])
66
- clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output])
67
 
68
- demo.launch()
 
 
1
+ # app.py
2
  import gradio as gr
3
  import fitz # PyMuPDF
 
4
  import os
5
+ import openai
6
+ from dotenv import load_dotenv
7
 
8
+ # Load secret from environment variable (make sure OPENAI_API_KEY is set in Hugging Face Secrets)
9
  openai.api_key = os.getenv("OPENAI_API_KEY")
10
 
11
+ # Clean and split PDF text
12
+ def extract_text_from_pdf(file):
13
+ text = ""
14
+ with fitz.open(stream=file.read(), filetype="pdf") as doc:
15
+ for page in doc:
16
+ text += page.get_text()
17
+ return text
18
 
19
  def clean_text(text):
20
+ return ' '.join(text.replace("\n", " ").split())
 
21
 
22
  def split_text(text, max_tokens=1500):
23
  words = text.split()
24
+ chunks = []
25
+ chunk = []
26
+ tokens = 0
27
+ for word in words:
28
+ tokens += 1
29
+ chunk.append(word)
30
+ if tokens >= max_tokens:
31
+ chunks.append(" ".join(chunk))
32
+ chunk = []
33
+ tokens = 0
34
+ if chunk:
35
+ chunks.append(" ".join(chunk))
36
+ return chunks
37
 
38
+ # OpenAI call
39
+ def summarize_text(text, prompt):
40
  response = openai.ChatCompletion.create(
41
  model="gpt-3.5-turbo",
42
  messages=[
43
  {"role": "system", "content": prompt},
44
+ {"role": "user", "content": text},
45
  ],
46
  temperature=0.5,
47
  max_tokens=500
48
  )
49
+ return response.choices[0].message.content.strip()
50
 
51
+ def generate_summaries(pdf):
52
+ raw_text = extract_text_from_pdf(pdf)
53
+ text = clean_text(raw_text)
54
+ chunks = split_text(text)
55
 
56
+ summary_prompt = """
57
+ Summarize this academic text in a clear, concise way suitable for a general audience.
58
+ """
59
+ eli5_prompt = """
60
+ Explain the key ideas in the following text like I'm 5 years old.
61
+ """
62
+ tldr_prompt = """
63
+ Give a 1-line TL;DR of the research paper.
64
+ """
65
+ importance_prompt = """
66
+ Why is this research important? Mention societal or technological relevance in a short paragraph.
67
+ """
68
 
69
+ full_summary = "\n\n".join([summarize_text(chunk, summary_prompt) for chunk in chunks])
70
+ eli5 = summarize_text(text, eli5_prompt)
71
+ tldr = summarize_text(text, tldr_prompt)
72
+ why_matters = summarize_text(text, importance_prompt)
73
 
74
+ return full_summary, eli5, tldr, why_matters
75
 
76
  with gr.Blocks() as demo:
77
+ gr.Markdown("""
78
+ # 🧠 Paper News Summarizer
79
+ Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.
80
+ """)
81
+
82
  with gr.Row():
83
+ pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=['.pdf'])
84
+ clear_btn = gr.Button("Clear")
85
+ submit_btn = gr.Button("Submit", variant="primary")
 
 
 
 
 
 
86
 
87
+ with gr.Row():
88
+ with gr.Column():
89
+ full_summary_output = gr.Textbox(label="πŸ“˜ Full Summary", lines=12, interactive=False)
90
+ eli5_output = gr.Textbox(label="πŸ§’ ELI5 Explanation", lines=4, interactive=False)
91
+ with gr.Column():
92
+ tldr_output = gr.Textbox(label="⚑ TL;DR (1-line)", lines=2, interactive=False)
93
+ why_output = gr.Textbox(label="🎯 Why It Matters", lines=6, interactive=False)
94
 
95
+ submit_btn.click(fn=generate_summaries, inputs=[pdf_input], outputs=[full_summary_output, eli5_output, tldr_output, why_output])
96
+ clear_btn.click(fn=lambda: (None, "", "", ""), inputs=[], outputs=[pdf_input, full_summary_output, eli5_output, tldr_output])
97
 
98
+ if __name__ == "__main__":
99
+ demo.launch()