Upload 2 files
Browse files- app.py +34 -0
- requirements.txt +20 -0
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import PyPDF2
|
3 |
+
import json
|
4 |
+
|
5 |
+
# Function to extract text from PDF
|
6 |
+
def extract_text_from_pdf(pdf_file):
|
7 |
+
try:
|
8 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
9 |
+
extracted_text = ""
|
10 |
+
for page in pdf_reader.pages:
|
11 |
+
extracted_text += page.extract_text()
|
12 |
+
|
13 |
+
# Basic parsing example (can be expanded for specific fields like "Invoice Number")
|
14 |
+
parsed_data = {
|
15 |
+
"Total Pages": len(pdf_reader.pages),
|
16 |
+
"Extracted Text Preview": extracted_text[:500] # Show first 500 characters
|
17 |
+
}
|
18 |
+
return json.dumps(parsed_data, indent=2)
|
19 |
+
except Exception as e:
|
20 |
+
return f"Error processing file: {str(e)}"
|
21 |
+
|
22 |
+
# Gradio interface
|
23 |
+
with gr.Blocks() as demo:
|
24 |
+
gr.Markdown("# Curify Parse Prototype")
|
25 |
+
gr.Markdown("Upload a PDF document to extract and view structured data.")
|
26 |
+
|
27 |
+
with gr.Row():
|
28 |
+
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
29 |
+
result_output = gr.Textbox(label="Extracted Data", lines=20)
|
30 |
+
|
31 |
+
extract_button = gr.Button("Extract Data")
|
32 |
+
extract_button.click(extract_text_from_pdf, inputs=pdf_input, outputs=result_output)
|
33 |
+
|
34 |
+
demo.launch(share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
qrcode
|
2 |
+
PyPDF2
|
3 |
+
flask
|
4 |
+
json
|
5 |
+
gradio
|
6 |
+
newspaper3k
|
7 |
+
transformers
|
8 |
+
sentence-transformers
|
9 |
+
openai
|
10 |
+
todoist-api-python
|
11 |
+
flask
|
12 |
+
twilio
|
13 |
+
fastapi
|
14 |
+
uvicorn
|
15 |
+
ffmpy
|
16 |
+
google-cloud-storage
|
17 |
+
fpdf
|
18 |
+
markdown
|
19 |
+
nest_asyncio
|
20 |
+
reportlab
|