Spaces:
Sleeping
Sleeping
File size: 2,696 Bytes
abd6513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import gradio as gr
import fitz # PyMuPDF
import openai
import os
openai.api_key = os.getenv("OPENAI_API_KEY")
def extract_text_from_pdf(pdf_file):
with fitz.open("pdf", pdf_file.read()) as doc:
return "".join([page.get_text() for page in doc])
def clean_text(text):
text = text.replace("\n", " ").replace(" ", " ")
return text.strip()
def split_text(text, max_tokens=1500):
words = text.split()
return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
def summarize_chunk(chunk, prompt):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": chunk}
],
temperature=0.5,
max_tokens=500
)
return response.choices[0].message["content"].strip()
def summarize_paper(file):
text = extract_text_from_pdf(file)
cleaned_text = clean_text(text)
chunks = split_text(cleaned_text)
full_summary = ""
for chunk in chunks:
summary = summarize_chunk(chunk, "Summarize this academic research paper for a general audience:")
full_summary += summary + "\n\n"
elim5 = summarize_chunk(cleaned_text, "Explain this paper to a 10-year-old in simple terms:")
why_matters = summarize_chunk(cleaned_text, "Why does this research matter? Explain its significance.")
tldr = summarize_chunk(cleaned_text, "Give a 1-line TL;DR summary of this paper.")
return full_summary, elim5, why_matters, tldr
with gr.Blocks() as demo:
gr.Markdown("π **Paper News Summarizer** β Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Submit")
with gr.Column(scale=2):
summary_output = gr.Textbox(label="π Full Summary", lines=10, show_copy_button=True)
elim5_output = gr.Textbox(label="π§ ELI5 Summary", lines=2, show_copy_button=True)
why_output = gr.Textbox(label="π― Why It Matters", lines=2, show_copy_button=True)
tldr_output = gr.Textbox(label="β‘ TL;DR", lines=2, show_copy_button=True)
def process(file):
return summarize_paper(file)
submit_btn.click(fn=process, inputs=file_input, outputs=[summary_output, elim5_output, why_output, tldr_output])
clear_btn.click(fn=lambda: ("", "", "", ""), inputs=[], outputs=[summary_output, elim5_output, why_output, tldr_output])
demo.launch()
|