File size: 1,431 Bytes
04dd521
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
from summarize import summarize_text
from pdf2text import convert_pdf_to_text
from PyPDF2 import PdfReader

def is_scanned_pdf(pdf_path):
    try:
        reader = PdfReader(pdf_path)
        for page in reader.pages:
            if page.extract_text():
                return False
        return True
    except:
        return True  # fallback: assume scanned if error

def summarize_from_text(text):
    return summarize_text(text)

def summarize_from_pdf(pdf_file):
    scanned = is_scanned_pdf(pdf_file.name)
    text_dict = convert_pdf_to_text(pdf_file.name, ocr_model=scanned)
    text = text_dict["converted_text"]
    return summarize_text(text)

with gr.Blocks() as demo:
    gr.Markdown("# DocSummarizer πŸ“„βœ¨\nUpload a PDF or paste text to summarize")

    with gr.Tab("Summarize Text"):
        input_text = gr.Textbox(label="Text to Summarize", lines=20)
        output_text = gr.Textbox(label="Summarized Text", lines=10)
        summarize_button = gr.Button("Summarize")
        summarize_button.click(fn=summarize_from_text, inputs=input_text, outputs=output_text)

    with gr.Tab("Summarize PDF"):
        pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
        output_summary = gr.Textbox(label="Summarized Text", lines=10)
        pdf_button = gr.Button("Summarize PDF")
        pdf_button.click(fn=summarize_from_pdf, inputs=pdf_file, outputs=output_summary)

demo.launch()