Shuja1401's picture
Upload 2 files
abd6513 verified
raw
history blame
2.7 kB
import gradio as gr
import fitz # PyMuPDF
import openai
import os
openai.api_key = os.getenv("OPENAI_API_KEY")
def extract_text_from_pdf(pdf_file):
with fitz.open("pdf", pdf_file.read()) as doc:
return "".join([page.get_text() for page in doc])
def clean_text(text):
text = text.replace("\n", " ").replace(" ", " ")
return text.strip()
def split_text(text, max_tokens=1500):
words = text.split()
return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
def summarize_chunk(chunk, prompt):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": chunk}
],
temperature=0.5,
max_tokens=500
)
return response.choices[0].message["content"].strip()
def summarize_paper(file):
text = extract_text_from_pdf(file)
cleaned_text = clean_text(text)
chunks = split_text(cleaned_text)
full_summary = ""
for chunk in chunks:
summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:")
full_summary += summary + "\n\n"
elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:")
why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.")
tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.")
return full_summary, elim5, why_matters, tldr
with gr.Blocks() as demo:
gr.Markdown("πŸ“„ **Paper News Summarizer** β€” Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Submit")
with gr.Column(scale=2):
summary_output = gr.Textbox(label="πŸ“˜ Full Summary", lines=10, show_copy_button=True)
elim5_output = gr.Textbox(label="πŸ§’ ELI5 Summary", lines=2, show_copy_button=True)
why_output = gr.Textbox(label="🎯 Why It Matters", lines=2, show_copy_button=True)
tldr_output = gr.Textbox(label="⚑ TL;DR", lines=2, show_copy_button=True)
def process(file):
return summarize_paper(file)
submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output])
clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output])
demo.launch()