Spaces:

Shuja1401
/

paper-news-summarizer

Sleeping

App Files Files Community

Shuja1401 commited on 18 days ago

Commit

84f0807

verified ·

1 Parent(s): 41e443c

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -73

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
-# app.py
 import gradio as gr
 import fitz  # PyMuPDF
-import os
 import openai
-from dotenv import load_dotenv
-# Load secret from environment variable (make sure OPENAI_API_KEY is set in Hugging Face Secrets)
 openai.api_key = os.getenv("OPENAI_API_KEY")
-# Clean and split PDF text
 def extract_text_from_pdf(file):
     text = ""
     with fitz.open(stream=file.read(), filetype="pdf") as doc:
@@ -16,84 +14,78 @@ def extract_text_from_pdf(file):
             text += page.get_text()
     return text
-def clean_text(text):
-    return ' '.join(text.replace("\n", " ").split())
 def split_text(text, max_tokens=1500):
-    words = text.split()
-    chunks = []
-    chunk = []
-    tokens = 0
-    for word in words:
-        tokens += 1
-        chunk.append(word)
-        if tokens >= max_tokens:
-            chunks.append(" ".join(chunk))
-            chunk = []
-            tokens = 0
-    if chunk:
-        chunks.append(" ".join(chunk))
-    return chunks
-# OpenAI call
-def summarize_text(text, prompt):
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": prompt},
-            {"role": "user", "content": text},
-        ],
-        temperature=0.5,
-        max_tokens=500
-    )
-    return response.choices[0].message.content.strip()
-def generate_summaries(pdf):
-    raw_text = extract_text_from_pdf(pdf)
-    text = clean_text(raw_text)
     chunks = split_text(text)
-    summary_prompt = """
-        Summarize this academic text in a clear, concise way suitable for a general audience.
-    """
-    eli5_prompt = """
-        Explain the key ideas in the following text like I'm 5 years old.
-    """
-    tldr_prompt = """
-        Give a 1-line TL;DR of the research paper.
-    """
-    importance_prompt = """
-        Why is this research important? Mention societal or technological relevance in a short paragraph.
-    """
-    full_summary = "\n\n".join([summarize_text(chunk, summary_prompt) for chunk in chunks])
-    eli5 = summarize_text(text, eli5_prompt)
-    tldr = summarize_text(text, tldr_prompt)
-    why_matters = summarize_text(text, importance_prompt)
-    return full_summary, eli5, tldr, why_matters
-with gr.Blocks() as demo:
-    gr.Markdown("""
-    # 🧠 Paper News Summarizer
-    Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.
-    """)
     with gr.Row():
-        pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=['.pdf'])
-        clear_btn = gr.Button("Clear")
-        submit_btn = gr.Button("Submit", variant="primary")
     with gr.Row():
-        with gr.Column():
-            full_summary_output = gr.Textbox(label="📘 Full Summary", lines=12, interactive=False)
-            eli5_output = gr.Textbox(label="🧒 ELI5 Explanation", lines=4, interactive=False)
-        with gr.Column():
-            tldr_output = gr.Textbox(label="⚡ TL;DR (1-line)", lines=2, interactive=False)
-            why_output = gr.Textbox(label="🎯 Why It Matters", lines=6, interactive=False)
-    submit_btn.click(fn=generate_summaries, inputs=[pdf_input], outputs=[full_summary_output, eli5_output, tldr_output, why_output])
-    clear_btn.click(fn=lambda: (None, "", "", ""), inputs=[], outputs=[pdf_input, full_summary_output, eli5_output, tldr_output])
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import fitz  # PyMuPDF
 import openai
+import os
+# Load API key from Hugging Face secret
 openai.api_key = os.getenv("OPENAI_API_KEY")
+# Extract text from PDF
 def extract_text_from_pdf(file):
     text = ""
     with fitz.open(stream=file.read(), filetype="pdf") as doc:
             text += page.get_text()
     return text
+# Split text into chunks
 def split_text(text, max_tokens=1500):
+    import textwrap
+    return textwrap.wrap(text, max_tokens)
+# Generate summary
+def generate_summary(text):
     chunks = split_text(text)
+    summaries = []
+    for i, chunk in enumerate(chunks):
+        print(f"⏳ Summarizing chunk {i + 1}/{len(chunks)}...")
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": f"Summarize the following:\n{chunk}"}],
+            temperature=0.5,
+        )
+        summaries.append(response['choices'][0]['message']['content'].strip())
+    return "\n\n".join(summaries)
+# Generate all formats
+def summarize_paper(pdf_file):
+    try:
+        raw_text = extract_text_from_pdf(pdf_file)
+        summary = generate_summary(raw_text)
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": f"Make an ELI5 version of this:\n{summary}"}
+            ],
+            temperature=0.7,
+        )
+        eli5 = response['choices'][0]['message']['content'].strip()
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": f"Write a TL;DR of this:\n{summary}"}
+            ],
+            temperature=0.7,
+        )
+        tldr = response['choices'][0]['message']['content'].strip()
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": f"Why does this research matter? {summary}"}
+            ],
+            temperature=0.7,
+        )
+        why_it_matters = response['choices'][0]['message']['content'].strip()
+        return summary, eli5, tldr, why_it_matters
+    except Exception as e:
+        return f"❌ Error: {str(e)}", "", "", ""
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("📄 **Paper News Summarizer**\nUpload a research paper PDF and get a human-friendly summary, ELI5, TL;DR, and why it matters.")
     with gr.Row():
+        pdf_file = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
+    submit_btn = gr.Button("Submit")
+    clear_btn = gr.Button("Clear")
     with gr.Row():
+        full_summary = gr.Textbox(label="📘 Full Summary", lines=10, interactive=False)
+        eli5_summary = gr.Textbox(label="🧒 ELI5", lines=5, interactive=False)
+    with gr.Row():
+        tldr_summary = gr.Textbox(label="⚡ TL;DR", lines=2, interactive=False)
+        importance = gr.Textbox(label="🎯 Why It Matters", lines=5, interactive=False)
+    submit_btn.click(summarize_paper, inputs=pdf_file, outputs=[full_summary, eli5_summary, tldr_summary, importance])
+    clear_btn.click(lambda: ("", "", "", ""), outputs=[full_summary, eli5_summary, tldr_summary, importance])
+demo.launch()