Spaces:

Shuja1401
/

paper-news-summarizer

Sleeping

App Files Files Community

Shuja1401 commited on 26 days ago

Commit

41e443c

verified ·

1 Parent(s): abd6513

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -38

app.py CHANGED Viewed

@@ -1,68 +1,99 @@
 import gradio as gr
 import fitz  # PyMuPDF
-import openai
 import os
 openai.api_key = os.getenv("OPENAI_API_KEY")
-def extract_text_from_pdf(pdf_file):
-    with fitz.open("pdf", pdf_file.read()) as doc:
-        return "".join([page.get_text() for page in doc])
 def clean_text(text):
-    text = text.replace("\n", " ").replace("  ", " ")
-    return text.strip()
 def split_text(text, max_tokens=1500):
     words = text.split()
-    return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
-def summarize_chunk(chunk, prompt):
     response = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=[
             {"role": "system", "content": prompt},
-            {"role": "user", "content": chunk}
         ],
         temperature=0.5,
         max_tokens=500
     )
-    return response.choices[0].message["content"].strip()
-def summarize_paper(file):
-    text = extract_text_from_pdf(file)
-    cleaned_text = clean_text(text)
-    chunks = split_text(cleaned_text)
-    full_summary = ""
-    for chunk in chunks:
-        summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:")
-        full_summary += summary + "\n\n"
-    elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:")
-    why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.")
-    tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.")
-    return full_summary, elim5, why_matters, tldr
 with gr.Blocks() as demo:
-    gr.Markdown("📄 **Paper News Summarizer** — Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
     with gr.Row():
-        with gr.Column(scale=1):
-            file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
-            clear_btn = gr.Button("Clear")
-            submit_btn = gr.Button("Submit")
-        with gr.Column(scale=2):
-            summary_output = gr.Textbox(label="📘 Full Summary", lines=10, show_copy_button=True)
-            elim5_output = gr.Textbox(label="🧒 ELI5 Summary", lines=2, show_copy_button=True)
-            why_output = gr.Textbox(label="🎯 Why It Matters", lines=2, show_copy_button=True)
-            tldr_output = gr.Textbox(label="⚡ TL;DR", lines=2, show_copy_button=True)
-    def process(file):
-        return summarize_paper(file)
-    submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output])
-    clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output])
-demo.launch()

+# app.py
 import gradio as gr
 import fitz  # PyMuPDF
 import os
+import openai
+from dotenv import load_dotenv
+# Load secret from environment variable (make sure OPENAI_API_KEY is set in Hugging Face Secrets)
 openai.api_key = os.getenv("OPENAI_API_KEY")
+# Clean and split PDF text
+def extract_text_from_pdf(file):
+    text = ""
+    with fitz.open(stream=file.read(), filetype="pdf") as doc:
+        for page in doc:
+            text += page.get_text()
+    return text
 def clean_text(text):
+    return ' '.join(text.replace("\n", " ").split())
 def split_text(text, max_tokens=1500):
     words = text.split()
+    chunks = []
+    chunk = []
+    tokens = 0
+    for word in words:
+        tokens += 1
+        chunk.append(word)
+        if tokens >= max_tokens:
+            chunks.append(" ".join(chunk))
+            chunk = []
+            tokens = 0
+    if chunk:
+        chunks.append(" ".join(chunk))
+    return chunks
+# OpenAI call
+def summarize_text(text, prompt):
     response = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=[
             {"role": "system", "content": prompt},
+            {"role": "user", "content": text},
         ],
         temperature=0.5,
         max_tokens=500
     )
+    return response.choices[0].message.content.strip()
+def generate_summaries(pdf):
+    raw_text = extract_text_from_pdf(pdf)
+    text = clean_text(raw_text)
+    chunks = split_text(text)
+    summary_prompt = """
+        Summarize this academic text in a clear, concise way suitable for a general audience.
+    """
+    eli5_prompt = """
+        Explain the key ideas in the following text like I'm 5 years old.
+    """
+    tldr_prompt = """
+        Give a 1-line TL;DR of the research paper.
+    """
+    importance_prompt = """
+        Why is this research important? Mention societal or technological relevance in a short paragraph.
+    """
+    full_summary = "\n\n".join([summarize_text(chunk, summary_prompt) for chunk in chunks])
+    eli5 = summarize_text(text, eli5_prompt)
+    tldr = summarize_text(text, tldr_prompt)
+    why_matters = summarize_text(text, importance_prompt)
+    return full_summary, eli5, tldr, why_matters
 with gr.Blocks() as demo:
+    gr.Markdown("""
+    # 🧠 Paper News Summarizer
+    Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.
+    """)
     with gr.Row():
+        pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=['.pdf'])
+        clear_btn = gr.Button("Clear")
+        submit_btn = gr.Button("Submit", variant="primary")
+    with gr.Row():
+        with gr.Column():
+            full_summary_output = gr.Textbox(label="📘 Full Summary", lines=12, interactive=False)
+            eli5_output = gr.Textbox(label="🧒 ELI5 Explanation", lines=4, interactive=False)
+        with gr.Column():
+            tldr_output = gr.Textbox(label="⚡ TL;DR (1-line)", lines=2, interactive=False)
+            why_output = gr.Textbox(label="🎯 Why It Matters", lines=6, interactive=False)
+    submit_btn.click(fn=generate_summaries, inputs=[pdf_input], outputs=[full_summary_output, eli5_output, tldr_output, why_output])
+    clear_btn.click(fn=lambda: (None, "", "", ""), inputs=[], outputs=[pdf_input, full_summary_output, eli5_output, tldr_output])
+if __name__ == "__main__":
+    demo.launch()