File size: 2,696 Bytes
abd6513
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

import gradio as gr
import fitz  # PyMuPDF
import openai
import os

openai.api_key = os.getenv("OPENAI_API_KEY")

def extract_text_from_pdf(pdf_file):
    with fitz.open("pdf", pdf_file.read()) as doc:
        return "".join([page.get_text() for page in doc])

def clean_text(text):
    text = text.replace("\n", " ").replace("  ", " ")
    return text.strip()

def split_text(text, max_tokens=1500):
    words = text.split()
    return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]

def summarize_chunk(chunk, prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": chunk}
        ],
        temperature=0.5,
        max_tokens=500
    )
    return response.choices[0].message["content"].strip()

def summarize_paper(file):
    text = extract_text_from_pdf(file)
    cleaned_text = clean_text(text)
    chunks = split_text(cleaned_text)

    full_summary = ""
    for chunk in chunks:
        summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:")
        full_summary += summary + "\n\n"

    elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:")
    why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.")
    tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.")

    return full_summary, elim5, why_matters, tldr

with gr.Blocks() as demo:
    gr.Markdown("πŸ“„ **Paper News Summarizer** β€” Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
            clear_btn = gr.Button("Clear")
            submit_btn = gr.Button("Submit")
        with gr.Column(scale=2):
            summary_output = gr.Textbox(label="πŸ“˜ Full Summary", lines=10, show_copy_button=True)
            elim5_output = gr.Textbox(label="πŸ§’ ELI5 Summary", lines=2, show_copy_button=True)
            why_output = gr.Textbox(label="🎯 Why It Matters", lines=2, show_copy_button=True)
            tldr_output = gr.Textbox(label="⚑ TL;DR", lines=2, show_copy_button=True)

    def process(file):
        return summarize_paper(file)

    submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output])
    clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output])

demo.launch()