Jimmy0866's picture
Upload 3 files
04dd521 verified
import gradio as gr
from summarize import summarize_text
from pdf2text import convert_pdf_to_text
from PyPDF2 import PdfReader
def is_scanned_pdf(pdf_path):
try:
reader = PdfReader(pdf_path)
for page in reader.pages:
if page.extract_text():
return False
return True
except:
return True # fallback: assume scanned if error
def summarize_from_text(text):
return summarize_text(text)
def summarize_from_pdf(pdf_file):
scanned = is_scanned_pdf(pdf_file.name)
text_dict = convert_pdf_to_text(pdf_file.name, ocr_model=scanned)
text = text_dict["converted_text"]
return summarize_text(text)
with gr.Blocks() as demo:
gr.Markdown("# DocSummarizer πŸ“„βœ¨\nUpload a PDF or paste text to summarize")
with gr.Tab("Summarize Text"):
input_text = gr.Textbox(label="Text to Summarize", lines=20)
output_text = gr.Textbox(label="Summarized Text", lines=10)
summarize_button = gr.Button("Summarize")
summarize_button.click(fn=summarize_from_text, inputs=input_text, outputs=output_text)
with gr.Tab("Summarize PDF"):
pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
output_summary = gr.Textbox(label="Summarized Text", lines=10)
pdf_button = gr.Button("Summarize PDF")
pdf_button.click(fn=summarize_from_pdf, inputs=pdf_file, outputs=output_summary)
demo.launch()