parse / app.py
qqwjq1981's picture
Upload 2 files
fbd6bc6 verified
raw
history blame
1.19 kB
import gradio as gr
import PyPDF2
import json
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
extracted_text = ""
for page in pdf_reader.pages:
extracted_text += page.extract_text()
# Basic parsing example (can be expanded for specific fields like "Invoice Number")
parsed_data = {
"Total Pages": len(pdf_reader.pages),
"Extracted Text Preview": extracted_text[:500] # Show first 500 characters
}
return json.dumps(parsed_data, indent=2)
except Exception as e:
return f"Error processing file: {str(e)}"
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Curify Parse Prototype")
gr.Markdown("Upload a PDF document to extract and view structured data.")
with gr.Row():
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
result_output = gr.Textbox(label="Extracted Data", lines=20)
extract_button = gr.Button("Extract Data")
extract_button.click(extract_text_from_pdf, inputs=pdf_input, outputs=result_output)
demo.launch(share=True)