Spaces:

Shuja1401
/

paper-news-summarizer

Sleeping

App Files Files Community

Shuja1401 commited on 29 days ago

Commit

04a1c23

verified ·

1 Parent(s): d27cd71

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -146

app.py DELETED Viewed

@@ -1,146 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Paper_News_Gradio_App.ipynb
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1_JHJRpT4KWmECR-ep25CGZ0eM55Bm7TK
-"""
-!pip install -q gradio PyMuPDF tiktoken openai
-import gradio as gr
-import fitz  # PyMuPDF
-import re
-import tiktoken
-import time
-from openai import OpenAI
-import os
-# Set your API key securely
-os.environ["OPENAI_API_KEY"] = "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"  # Replace with your API key
-client = OpenAI()
-# --- Step 3: Extract and clean PDF text ---
-def extract_text_from_pdf(pdf_file_path):
-    text = ""
-    with fitz.open(pdf_file_path) as doc:
-        for page in doc:
-            text += page.get_text()
-    return text
-def clean_text(text):
-    text = re.sub(r'\s+', ' ', text)
-    text = re.sub(r'[^\x20-\x7E]+', '', text)
-    return text.strip()
-def split_into_chunks(text, max_tokens=1000):
-    encoding = tiktoken.get_encoding("cl100k_base")
-    words = text.split()
-    chunks, current_chunk, current_tokens = [], [], 0
-    for word in words:
-        tokens = len(encoding.encode(word))
-        if current_tokens + tokens > max_tokens:
-            chunks.append(" ".join(current_chunk))
-            current_chunk, current_tokens = [word], tokens
-        else:
-            current_chunk.append(word)
-            current_tokens += tokens
-    if current_chunk:
-        chunks.append(" ".join(current_chunk))
-    return chunks
-def summarize_chunk(chunk):
-    try:
-        response = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant that summarizes documents."},
-                {"role": "user", "content": f"Summarize the following text:\n\n{chunk}"}
-            ],
-            temperature=0.3
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"Error: {e}"
-def generate_special_summaries(summary_text):
-    prompt = f"""
-From the text below, generate the following:
-1. ELI5 (Explain Like I’m 5)
-2. Why It Matters
-3. TL;DR (One-line summary)
-Text:
-\"\"\"
-{summary_text}
-\"\"\"
-    """
-    response = client.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "You are an expert summarizer."},
-            {"role": "user", "content": prompt}
-        ]
-    )
-    full_reply = response.choices[0].message.content.strip()
-    # Optional: extract segments using string splitting (or just return raw if formatted well)
-    return full_reply
-def process_pdf(pdf_file):
-    try:
-        raw_text = extract_text_from_pdf(pdf_file)
-        cleaned_text = clean_text(raw_text)
-        chunks = split_into_chunks(cleaned_text)
-        summaries = []
-        for i, chunk in enumerate(chunks):
-            summary = summarize_chunk(chunk)
-            summaries.append(summary)
-            time.sleep(1.5)
-        full_summary = "\n\n".join(summaries)
-        special = generate_special_summaries(full_summary)
-        # Split the special summary into parts
-        eli5, why_matters, tldr = "", "", ""
-        for section in special.split("\n\n"):
-            if section.lower().startswith("1. eli5"):
-                eli5 = section.replace("1. ELI5:", "").strip()
-            elif section.lower().startswith("2. why"):
-                why_matters = section.replace("2. Why It Matters:", "").strip()
-            elif section.lower().startswith("3. tl;dr") or section.lower().startswith("3. tldr"):
-                tldr = section.replace("3. TL;DR:", "").replace("3. Tldr:", "").strip()
-        return full_summary, eli5, why_matters, tldr
-    except Exception as e:
-        error_msg = f"❌ Error: {str(e)}"
-        return error_msg, error_msg, error_msg, error_msg
-with gr.Blocks() as demo:
-    gr.Markdown("### 📚 Paper News Summarizer")
-    gr.Markdown("Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
-    with gr.Row():
-        pdf_input = gr.File(label="📄 Upload Research Paper (PDF)", file_types=[".pdf"])
-        submit_btn = gr.Button("Submit", variant="primary")
-        clear_btn = gr.Button("Clear")
-    summary_output = gr.Textbox(label="📘 Full Summary", lines=10)
-    eli5_output = gr.Textbox(label="🧒 ELI5", lines=3)
-    why_output = gr.Textbox(label="🎯 Why It Matters", lines=3)
-    tldr_output = gr.Textbox(label="⚡ TL;DR", lines=2)
-    submit_btn.click(fn=process_pdf, inputs=pdf_input,
-                     outputs=[summary_output, eli5_output, why_output, tldr_output])
-    clear_btn.click(lambda: ("", "", "", ""), outputs=[summary_output, eli5_output, why_output, tldr_output])
-demo.launch(debug=True)