Spaces:
Running
Running
File size: 1,431 Bytes
04dd521 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
from summarize import summarize_text
from pdf2text import convert_pdf_to_text
from PyPDF2 import PdfReader
def is_scanned_pdf(pdf_path):
try:
reader = PdfReader(pdf_path)
for page in reader.pages:
if page.extract_text():
return False
return True
except:
return True # fallback: assume scanned if error
def summarize_from_text(text):
return summarize_text(text)
def summarize_from_pdf(pdf_file):
scanned = is_scanned_pdf(pdf_file.name)
text_dict = convert_pdf_to_text(pdf_file.name, ocr_model=scanned)
text = text_dict["converted_text"]
return summarize_text(text)
with gr.Blocks() as demo:
gr.Markdown("# DocSummarizer πβ¨\nUpload a PDF or paste text to summarize")
with gr.Tab("Summarize Text"):
input_text = gr.Textbox(label="Text to Summarize", lines=20)
output_text = gr.Textbox(label="Summarized Text", lines=10)
summarize_button = gr.Button("Summarize")
summarize_button.click(fn=summarize_from_text, inputs=input_text, outputs=output_text)
with gr.Tab("Summarize PDF"):
pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
output_summary = gr.Textbox(label="Summarized Text", lines=10)
pdf_button = gr.Button("Summarize PDF")
pdf_button.click(fn=summarize_from_pdf, inputs=pdf_file, outputs=output_summary)
demo.launch()
|