Spaces:
Sleeping
Sleeping
import gradio as gr | |
from summarize import summarize_text | |
from pdf2text import convert_pdf_to_text | |
from PyPDF2 import PdfReader | |
def is_scanned_pdf(pdf_path): | |
try: | |
reader = PdfReader(pdf_path) | |
for page in reader.pages: | |
if page.extract_text(): | |
return False | |
return True | |
except: | |
return True # fallback: assume scanned if error | |
def summarize_from_text(text): | |
return summarize_text(text) | |
def summarize_from_pdf(pdf_file): | |
scanned = is_scanned_pdf(pdf_file.name) | |
text_dict = convert_pdf_to_text(pdf_file.name, ocr_model=scanned) | |
text = text_dict["converted_text"] | |
return summarize_text(text) | |
with gr.Blocks() as demo: | |
gr.Markdown("# DocSummarizer πβ¨\nUpload a PDF or paste text to summarize") | |
with gr.Tab("Summarize Text"): | |
input_text = gr.Textbox(label="Text to Summarize", lines=20) | |
output_text = gr.Textbox(label="Summarized Text", lines=10) | |
summarize_button = gr.Button("Summarize") | |
summarize_button.click(fn=summarize_from_text, inputs=input_text, outputs=output_text) | |
with gr.Tab("Summarize PDF"): | |
pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
output_summary = gr.Textbox(label="Summarized Text", lines=10) | |
pdf_button = gr.Button("Summarize PDF") | |
pdf_button.click(fn=summarize_from_pdf, inputs=pdf_file, outputs=output_summary) | |
demo.launch() | |