Spaces:

Shuja1401
/

paper-news-summarizer

Sleeping

App Files Files Community

Shuja1401 commited on Aug 5

Commit

e1e7abc

verified ·

1 Parent(s): f40dac9

Create app.py

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# -*- coding: utf-8 -*-
+"""Paper_News_Gradio_App.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1_JHJRpT4KWmECR-ep25CGZ0eM55Bm7TK
+"""
+!pip install -q gradio PyMuPDF tiktoken openai
+import gradio as gr
+import fitz  # PyMuPDF
+import re
+import tiktoken
+import time
+from openai import OpenAI
+import os
+# Set your API key securely
+os.environ["OPENAI_API_KEY"] = "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"  # Replace with your API key
+client = OpenAI()
+# --- Step 3: Extract and clean PDF text ---
+def extract_text_from_pdf(pdf_file_path):
+    text = ""
+    with fitz.open(pdf_file_path) as doc:
+        for page in doc:
+            text += page.get_text()
+    return text
+def clean_text(text):
+    text = re.sub(r'\s+', ' ', text)
+    text = re.sub(r'[^\x20-\x7E]+', '', text)
+    return text.strip()
+def split_into_chunks(text, max_tokens=1000):
+    encoding = tiktoken.get_encoding("cl100k_base")
+    words = text.split()
+    chunks, current_chunk, current_tokens = [], [], 0
+    for word in words:
+        tokens = len(encoding.encode(word))
+        if current_tokens + tokens > max_tokens:
+            chunks.append(" ".join(current_chunk))
+            current_chunk, current_tokens = [word], tokens
+        else:
+            current_chunk.append(word)
+            current_tokens += tokens
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+def summarize_chunk(chunk):
+    try:
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that summarizes documents."},
+                {"role": "user", "content": f"Summarize the following text:\n\n{chunk}"}
+            ],
+            temperature=0.3
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error: {e}"
+def generate_special_summaries(summary_text):
+    prompt = f"""
+From the text below, generate the following:
+1. ELI5 (Explain Like I’m 5)
+2. Why It Matters
+3. TL;DR (One-line summary)
+Text:
+\"\"\"
+{summary_text}
+\"\"\"
+    """
+    response = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are an expert summarizer."},
+            {"role": "user", "content": prompt}
+        ]
+    )
+    full_reply = response.choices[0].message.content.strip()
+    # Optional: extract segments using string splitting (or just return raw if formatted well)
+    return full_reply
+def process_pdf(pdf_file):
+    try:
+        raw_text = extract_text_from_pdf(pdf_file)
+        cleaned_text = clean_text(raw_text)
+        chunks = split_into_chunks(cleaned_text)
+        summaries = []
+        for i, chunk in enumerate(chunks):
+            summary = summarize_chunk(chunk)
+            summaries.append(summary)
+            time.sleep(1.5)
+        full_summary = "\n\n".join(summaries)
+        special = generate_special_summaries(full_summary)
+        # Split the special summary into parts
+        eli5, why_matters, tldr = "", "", ""
+        for section in special.split("\n\n"):
+            if section.lower().startswith("1. eli5"):
+                eli5 = section.replace("1. ELI5:", "").strip()
+            elif section.lower().startswith("2. why"):
+                why_matters = section.replace("2. Why It Matters:", "").strip()
+            elif section.lower().startswith("3. tl;dr") or section.lower().startswith("3. tldr"):
+                tldr = section.replace("3. TL;DR:", "").replace("3. Tldr:", "").strip()
+        return full_summary, eli5, why_matters, tldr
+    except Exception as e:
+        error_msg = f"❌ Error: {str(e)}"
+        return error_msg, error_msg, error_msg, error_msg
+with gr.Blocks() as demo:
+    gr.Markdown("### 📚 Paper News Summarizer")
+    gr.Markdown("Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
+    with gr.Row():
+        pdf_input = gr.File(label="📄 Upload Research Paper (PDF)", file_types=[".pdf"])
+        submit_btn = gr.Button("Submit", variant="primary")
+        clear_btn = gr.Button("Clear")
+    summary_output = gr.Textbox(label="📘 Full Summary", lines=10)
+    eli5_output = gr.Textbox(label="🧒 ELI5", lines=3)
+    why_output = gr.Textbox(label="🎯 Why It Matters", lines=3)
+    tldr_output = gr.Textbox(label="⚡ TL;DR", lines=2)
+    submit_btn.click(fn=process_pdf, inputs=pdf_input,
+                     outputs=[summary_output, eli5_output, why_output, tldr_output])
+    clear_btn.click(lambda: ("", "", "", ""), outputs=[summary_output, eli5_output, why_output, tldr_output])
+demo.launch(debug=True)