import gradio as gr import PyPDF2 import json # Function to extract text from PDF def extract_text_from_pdf(pdf_file): try: pdf_reader = PyPDF2.PdfReader(pdf_file) extracted_text = "" for page in pdf_reader.pages: extracted_text += page.extract_text() # Basic parsing example (can be expanded for specific fields like "Invoice Number") parsed_data = { "Total Pages": len(pdf_reader.pages), "Extracted Text Preview": extracted_text[:500] # Show first 500 characters } return json.dumps(parsed_data, indent=2) except Exception as e: return f"Error processing file: {str(e)}" # Gradio interface with gr.Blocks() as demo: gr.Markdown("# Curify Parse Prototype") gr.Markdown("Upload a PDF document to extract and view structured data.") with gr.Row(): pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) result_output = gr.Textbox(label="Extracted Data", lines=20) extract_button = gr.Button("Extract Data") extract_button.click(extract_text_from_pdf, inputs=pdf_input, outputs=result_output) demo.launch(share=True)