import gradio as gr from summarize import summarize_text from pdf2text import convert_pdf_to_text from PyPDF2 import PdfReader def is_scanned_pdf(pdf_path): try: reader = PdfReader(pdf_path) for page in reader.pages: if page.extract_text(): return False return True except: return True # fallback: assume scanned if error def summarize_from_text(text): return summarize_text(text) def summarize_from_pdf(pdf_file): scanned = is_scanned_pdf(pdf_file.name) text_dict = convert_pdf_to_text(pdf_file.name, ocr_model=scanned) text = text_dict["converted_text"] return summarize_text(text) with gr.Blocks() as demo: gr.Markdown("# DocSummarizer 📄✨\nUpload a PDF or paste text to summarize") with gr.Tab("Summarize Text"): input_text = gr.Textbox(label="Text to Summarize", lines=20) output_text = gr.Textbox(label="Summarized Text", lines=10) summarize_button = gr.Button("Summarize") summarize_button.click(fn=summarize_from_text, inputs=input_text, outputs=output_text) with gr.Tab("Summarize PDF"): pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"]) output_summary = gr.Textbox(label="Summarized Text", lines=10) pdf_button = gr.Button("Summarize PDF") pdf_button.click(fn=summarize_from_pdf, inputs=pdf_file, outputs=output_summary) demo.launch()