Spaces:
Sleeping
Sleeping
import gradio as gr | |
import fitz # PyMuPDF | |
import openai | |
import os | |
# Load API key from Hugging Face secret | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# Extract text from PDF | |
def extract_text_from_pdf(file): | |
text = "" | |
with fitz.open(stream=file.read(), filetype="pdf") as doc: | |
for page in doc: | |
text += page.get_text() | |
return text | |
# Split text into chunks | |
def split_text(text, max_tokens=1500): | |
import textwrap | |
return textwrap.wrap(text, max_tokens) | |
# Generate summary | |
def generate_summary(text): | |
chunks = split_text(text) | |
summaries = [] | |
for i, chunk in enumerate(chunks): | |
print(f"β³ Summarizing chunk {i + 1}/{len(chunks)}...") | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": f"Summarize the following:\n{chunk}"}], | |
temperature=0.5, | |
) | |
summaries.append(response['choices'][0]['message']['content'].strip()) | |
return "\n\n".join(summaries) | |
# Generate all formats | |
def summarize_paper(pdf_file): | |
try: | |
raw_text = extract_text_from_pdf(pdf_file) | |
summary = generate_summary(raw_text) | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "user", "content": f"Make an ELI5 version of this:\n{summary}"} | |
], | |
temperature=0.7, | |
) | |
eli5 = response['choices'][0]['message']['content'].strip() | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "user", "content": f"Write a TL;DR of this:\n{summary}"} | |
], | |
temperature=0.7, | |
) | |
tldr = response['choices'][0]['message']['content'].strip() | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "user", "content": f"Why does this research matter? {summary}"} | |
], | |
temperature=0.7, | |
) | |
why_it_matters = response['choices'][0]['message']['content'].strip() | |
return summary, eli5, tldr, why_it_matters | |
except Exception as e: | |
return f"β Error: {str(e)}", "", "", "" | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("π **Paper News Summarizer**\nUpload a research paper PDF and get a human-friendly summary, ELI5, TL;DR, and why it matters.") | |
with gr.Row(): | |
pdf_file = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"]) | |
submit_btn = gr.Button("Submit") | |
clear_btn = gr.Button("Clear") | |
with gr.Row(): | |
full_summary = gr.Textbox(label="π Full Summary", lines=10, interactive=False) | |
eli5_summary = gr.Textbox(label="π§ ELI5", lines=5, interactive=False) | |
with gr.Row(): | |
tldr_summary = gr.Textbox(label="β‘ TL;DR", lines=2, interactive=False) | |
importance = gr.Textbox(label="π― Why It Matters", lines=5, interactive=False) | |
submit_btn.click(summarize_paper, inputs=pdf_file, outputs=[full_summary, eli5_summary, tldr_summary, importance]) | |
clear_btn.click(lambda: ("", "", "", ""), outputs=[full_summary, eli5_summary, tldr_summary, importance]) | |
demo.launch() | |