|
import gradio as gr |
|
import PyPDF2 |
|
import json |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
try: |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
extracted_text = "" |
|
for page in pdf_reader.pages: |
|
extracted_text += page.extract_text() |
|
|
|
|
|
parsed_data = { |
|
"Total Pages": len(pdf_reader.pages), |
|
"Extracted Text Preview": extracted_text[:500] |
|
} |
|
return json.dumps(parsed_data, indent=2) |
|
except Exception as e: |
|
return f"Error processing file: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Curify Parse Prototype") |
|
gr.Markdown("Upload a PDF document to extract and view structured data.") |
|
|
|
with gr.Row(): |
|
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) |
|
result_output = gr.Textbox(label="Extracted Data", lines=20) |
|
|
|
extract_button = gr.Button("Extract Data") |
|
extract_button.click(extract_text_from_pdf, inputs=pdf_input, outputs=result_output) |
|
|
|
demo.launch(share=True) |
|
|