Shuja1401 commited on
Commit
84f0807
Β·
verified Β·
1 Parent(s): 41e443c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -73
app.py CHANGED
@@ -1,14 +1,12 @@
1
- # app.py
2
  import gradio as gr
3
  import fitz # PyMuPDF
4
- import os
5
  import openai
6
- from dotenv import load_dotenv
7
 
8
- # Load secret from environment variable (make sure OPENAI_API_KEY is set in Hugging Face Secrets)
9
  openai.api_key = os.getenv("OPENAI_API_KEY")
10
 
11
- # Clean and split PDF text
12
  def extract_text_from_pdf(file):
13
  text = ""
14
  with fitz.open(stream=file.read(), filetype="pdf") as doc:
@@ -16,84 +14,78 @@ def extract_text_from_pdf(file):
16
  text += page.get_text()
17
  return text
18
 
19
- def clean_text(text):
20
- return ' '.join(text.replace("\n", " ").split())
21
-
22
  def split_text(text, max_tokens=1500):
23
- words = text.split()
24
- chunks = []
25
- chunk = []
26
- tokens = 0
27
- for word in words:
28
- tokens += 1
29
- chunk.append(word)
30
- if tokens >= max_tokens:
31
- chunks.append(" ".join(chunk))
32
- chunk = []
33
- tokens = 0
34
- if chunk:
35
- chunks.append(" ".join(chunk))
36
- return chunks
37
-
38
- # OpenAI call
39
- def summarize_text(text, prompt):
40
- response = openai.ChatCompletion.create(
41
- model="gpt-3.5-turbo",
42
- messages=[
43
- {"role": "system", "content": prompt},
44
- {"role": "user", "content": text},
45
- ],
46
- temperature=0.5,
47
- max_tokens=500
48
- )
49
- return response.choices[0].message.content.strip()
50
 
51
- def generate_summaries(pdf):
52
- raw_text = extract_text_from_pdf(pdf)
53
- text = clean_text(raw_text)
54
  chunks = split_text(text)
 
 
 
 
 
 
 
 
 
 
55
 
56
- summary_prompt = """
57
- Summarize this academic text in a clear, concise way suitable for a general audience.
58
- """
59
- eli5_prompt = """
60
- Explain the key ideas in the following text like I'm 5 years old.
61
- """
62
- tldr_prompt = """
63
- Give a 1-line TL;DR of the research paper.
64
- """
65
- importance_prompt = """
66
- Why is this research important? Mention societal or technological relevance in a short paragraph.
67
- """
68
 
69
- full_summary = "\n\n".join([summarize_text(chunk, summary_prompt) for chunk in chunks])
70
- eli5 = summarize_text(text, eli5_prompt)
71
- tldr = summarize_text(text, tldr_prompt)
72
- why_matters = summarize_text(text, importance_prompt)
 
 
 
 
73
 
74
- return full_summary, eli5, tldr, why_matters
 
 
 
 
 
 
 
75
 
76
- with gr.Blocks() as demo:
77
- gr.Markdown("""
78
- # 🧠 Paper News Summarizer
79
- Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.
80
- """)
 
 
 
 
 
 
 
81
 
 
 
 
82
  with gr.Row():
83
- pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=['.pdf'])
84
- clear_btn = gr.Button("Clear")
85
- submit_btn = gr.Button("Submit", variant="primary")
86
 
87
  with gr.Row():
88
- with gr.Column():
89
- full_summary_output = gr.Textbox(label="πŸ“˜ Full Summary", lines=12, interactive=False)
90
- eli5_output = gr.Textbox(label="πŸ§’ ELI5 Explanation", lines=4, interactive=False)
91
- with gr.Column():
92
- tldr_output = gr.Textbox(label="⚑ TL;DR (1-line)", lines=2, interactive=False)
93
- why_output = gr.Textbox(label="🎯 Why It Matters", lines=6, interactive=False)
94
 
95
- submit_btn.click(fn=generate_summaries, inputs=[pdf_input], outputs=[full_summary_output, eli5_output, tldr_output, why_output])
96
- clear_btn.click(fn=lambda: (None, "", "", ""), inputs=[], outputs=[pdf_input, full_summary_output, eli5_output, tldr_output])
97
 
98
- if __name__ == "__main__":
99
- demo.launch()
 
 
1
  import gradio as gr
2
  import fitz # PyMuPDF
 
3
  import openai
4
+ import os
5
 
6
+ # Load API key from Hugging Face secret
7
  openai.api_key = os.getenv("OPENAI_API_KEY")
8
 
9
+ # Extract text from PDF
10
  def extract_text_from_pdf(file):
11
  text = ""
12
  with fitz.open(stream=file.read(), filetype="pdf") as doc:
 
14
  text += page.get_text()
15
  return text
16
 
17
+ # Split text into chunks
 
 
18
  def split_text(text, max_tokens=1500):
19
+ import textwrap
20
+ return textwrap.wrap(text, max_tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Generate summary
23
+ def generate_summary(text):
 
24
  chunks = split_text(text)
25
+ summaries = []
26
+ for i, chunk in enumerate(chunks):
27
+ print(f"⏳ Summarizing chunk {i + 1}/{len(chunks)}...")
28
+ response = openai.ChatCompletion.create(
29
+ model="gpt-3.5-turbo",
30
+ messages=[{"role": "user", "content": f"Summarize the following:\n{chunk}"}],
31
+ temperature=0.5,
32
+ )
33
+ summaries.append(response['choices'][0]['message']['content'].strip())
34
+ return "\n\n".join(summaries)
35
 
36
+ # Generate all formats
37
+ def summarize_paper(pdf_file):
38
+ try:
39
+ raw_text = extract_text_from_pdf(pdf_file)
40
+ summary = generate_summary(raw_text)
 
 
 
 
 
 
 
41
 
42
+ response = openai.ChatCompletion.create(
43
+ model="gpt-3.5-turbo",
44
+ messages=[
45
+ {"role": "user", "content": f"Make an ELI5 version of this:\n{summary}"}
46
+ ],
47
+ temperature=0.7,
48
+ )
49
+ eli5 = response['choices'][0]['message']['content'].strip()
50
 
51
+ response = openai.ChatCompletion.create(
52
+ model="gpt-3.5-turbo",
53
+ messages=[
54
+ {"role": "user", "content": f"Write a TL;DR of this:\n{summary}"}
55
+ ],
56
+ temperature=0.7,
57
+ )
58
+ tldr = response['choices'][0]['message']['content'].strip()
59
 
60
+ response = openai.ChatCompletion.create(
61
+ model="gpt-3.5-turbo",
62
+ messages=[
63
+ {"role": "user", "content": f"Why does this research matter? {summary}"}
64
+ ],
65
+ temperature=0.7,
66
+ )
67
+ why_it_matters = response['choices'][0]['message']['content'].strip()
68
+
69
+ return summary, eli5, tldr, why_it_matters
70
+ except Exception as e:
71
+ return f"❌ Error: {str(e)}", "", "", ""
72
 
73
+ # Gradio Interface
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown("πŸ“„ **Paper News Summarizer**\nUpload a research paper PDF and get a human-friendly summary, ELI5, TL;DR, and why it matters.")
76
  with gr.Row():
77
+ pdf_file = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
78
+ submit_btn = gr.Button("Submit")
79
+ clear_btn = gr.Button("Clear")
80
 
81
  with gr.Row():
82
+ full_summary = gr.Textbox(label="πŸ“˜ Full Summary", lines=10, interactive=False)
83
+ eli5_summary = gr.Textbox(label="πŸ§’ ELI5", lines=5, interactive=False)
84
+ with gr.Row():
85
+ tldr_summary = gr.Textbox(label="⚑ TL;DR", lines=2, interactive=False)
86
+ importance = gr.Textbox(label="🎯 Why It Matters", lines=5, interactive=False)
 
87
 
88
+ submit_btn.click(summarize_paper, inputs=pdf_file, outputs=[full_summary, eli5_summary, tldr_summary, importance])
89
+ clear_btn.click(lambda: ("", "", "", ""), outputs=[full_summary, eli5_summary, tldr_summary, importance])
90
 
91
+ demo.launch()