Spaces:
Sleeping
Sleeping
import gradio as gr | |
import fitz # PyMuPDF | |
import openai | |
import os | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
def extract_text_from_pdf(pdf_file): | |
with fitz.open("pdf", pdf_file.read()) as doc: | |
return "".join([page.get_text() for page in doc]) | |
def clean_text(text): | |
text = text.replace("\n", " ").replace(" ", " ") | |
return text.strip() | |
def split_text(text, max_tokens=1500): | |
words = text.split() | |
return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)] | |
def summarize_chunk(chunk, prompt): | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": prompt}, | |
{"role": "user", "content": chunk} | |
], | |
temperature=0.5, | |
max_tokens=500 | |
) | |
return response.choices[0].message["content"].strip() | |
def summarize_paper(file): | |
text = extract_text_from_pdf(file) | |
cleaned_text = clean_text(text) | |
chunks = split_text(cleaned_text) | |
full_summary = "" | |
for chunk in chunks: | |
summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:") | |
full_summary += summary + "\n\n" | |
elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:") | |
why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.") | |
tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.") | |
return full_summary, elim5, why_matters, tldr | |
with gr.Blocks() as demo: | |
gr.Markdown("π **Paper News Summarizer** β Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"]) | |
clear_btn = gr.Button("Clear") | |
submit_btn = gr.Button("Submit") | |
with gr.Column(scale=2): | |
summary_output = gr.Textbox(label="π Full Summary", lines=10, show_copy_button=True) | |
elim5_output = gr.Textbox(label="π§ ELI5 Summary", lines=2, show_copy_button=True) | |
why_output = gr.Textbox(label="π― Why It Matters", lines=2, show_copy_button=True) | |
tldr_output = gr.Textbox(label="β‘ TL;DR", lines=2, show_copy_button=True) | |
def process(file): | |
return summarize_paper(file) | |
submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output]) | |
clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output]) | |
demo.launch() | |