qqwjq1981 commited on
Commit
fbd6bc6
·
verified ·
1 Parent(s): 90f30e2

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +34 -0
  2. requirements.txt +20 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import PyPDF2
3
+ import json
4
+
5
+ # Function to extract text from PDF
6
+ def extract_text_from_pdf(pdf_file):
7
+ try:
8
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
9
+ extracted_text = ""
10
+ for page in pdf_reader.pages:
11
+ extracted_text += page.extract_text()
12
+
13
+ # Basic parsing example (can be expanded for specific fields like "Invoice Number")
14
+ parsed_data = {
15
+ "Total Pages": len(pdf_reader.pages),
16
+ "Extracted Text Preview": extracted_text[:500] # Show first 500 characters
17
+ }
18
+ return json.dumps(parsed_data, indent=2)
19
+ except Exception as e:
20
+ return f"Error processing file: {str(e)}"
21
+
22
+ # Gradio interface
23
+ with gr.Blocks() as demo:
24
+ gr.Markdown("# Curify Parse Prototype")
25
+ gr.Markdown("Upload a PDF document to extract and view structured data.")
26
+
27
+ with gr.Row():
28
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
29
+ result_output = gr.Textbox(label="Extracted Data", lines=20)
30
+
31
+ extract_button = gr.Button("Extract Data")
32
+ extract_button.click(extract_text_from_pdf, inputs=pdf_input, outputs=result_output)
33
+
34
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ qrcode
2
+ PyPDF2
3
+ flask
4
+ json
5
+ gradio
6
+ newspaper3k
7
+ transformers
8
+ sentence-transformers
9
+ openai
10
+ todoist-api-python
11
+ flask
12
+ twilio
13
+ fastapi
14
+ uvicorn
15
+ ffmpy
16
+ google-cloud-storage
17
+ fpdf
18
+ markdown
19
+ nest_asyncio
20
+ reportlab