LEAHWA commited on
Commit
1aca446
·
verified ·
1 Parent(s): 1196bc4

Upload 2 files

Browse files
Files changed (2) hide show
  1. OCR.py +50 -0
  2. app.py +64 -0
OCR.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
4
+ from PIL import Image
5
+ import io
6
+
7
+ # Set environment variable
8
+ os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
9
+
10
+ # Model and device setup
11
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
+ model_id = "google/paligemma-3b-mix-224"
13
+
14
+ # Load model and processor
15
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).to(device)
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+
18
+ def extract_text_from_image(image_content):
19
+ image = Image.open(io.BytesIO(image_content))
20
+
21
+ # Prompt for detecting text
22
+ prompt = "Extract all relevant details from this invoice."
23
+
24
+ # Prepare inputs for the model
25
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
26
+ input_len = inputs["input_ids"].shape[-1]
27
+
28
+ with torch.inference_mode():
29
+ # Generate the output
30
+ generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
31
+ generation = generation[0][input_len:]
32
+ decoded = processor.decode(generation, skip_special_tokens=True)
33
+
34
+ return decoded
35
+
36
+ def extract_text_from_pdf(pdf_content):
37
+ # For simplicity, let's assume you're converting the PDF to images first
38
+ # You may use libraries like pdf2image to convert PDF pages to images
39
+ # Then call extract_text_from_image for each image
40
+ pass
41
+
42
+ def extract_invoice_details(text):
43
+ # Implement your logic to extract invoice details from the text
44
+ details = {}
45
+ # Example extraction logic
46
+ details['Invoice Number'] = re.search(r'Invoice Number: (\S+)', text).group(1) if re.search(r'Invoice Number: (\S+)', text) else 'N/A'
47
+ details['Amount'] = re.search(r'Total Amount Due: (\S+)', text).group(1) if re.search(r'Total Amount Due: (\S+)', text) else 'N/A'
48
+ details['Invoice Date'] = re.search(r'Invoice Date: (\S+)', text).group(1) if re.search(r'Invoice Date: (\S+)', text) else 'N/A'
49
+ details['Due Date'] = re.search(r'Due Date: (\S+)', text).group(1) if re.search(r'Due Date: (\S+)', text) else 'N/A'
50
+ return details
app.py CHANGED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dataclasses import dataclass
3
+ import pytesseract
4
+ from PIL import Image
5
+ import io
6
+ import re
7
+ import cv2
8
+ import numpy as np
9
+ import OCR
10
+
11
+ from OCR import *
12
+
13
+ # Initialize chat history
14
+ if "messages" not in st.session_state:
15
+ st.session_state.messages = [{"role": "Invoice Reader", "content": "Submit an invoice and I will read it."}]
16
+
17
+ # Display chat messages from history on app rerun
18
+ for message in st.session_state.messages:
19
+ with st.chat_message(message["role"]):
20
+ st.markdown(message["content"])
21
+
22
+ USER = "user"
23
+ ASSISTANT = "Invoice Reader"
24
+
25
+ # Accept file uploads
26
+ uploaded_file = st.file_uploader("Upload an invoice", type=["pdf", "png", "jpg", "jpeg"])
27
+ if uploaded_file is not None:
28
+ # Display uploaded file content
29
+ file_content = uploaded_file.getvalue()
30
+ st.session_state.messages.append({"role": USER, "content": f"Uploaded file: {uploaded_file.name}"})
31
+ with st.chat_message(USER):
32
+ st.markdown(f"Uploaded file: {uploaded_file.name}")
33
+
34
+ # Preprocess and extract text from image or PDF
35
+ try:
36
+ if uploaded_file.type == "application/pdf":
37
+ text = extract_text_from_pdf(file_content)
38
+ else:
39
+ text = extract_text_from_image(file_content)
40
+
41
+ # Extract specific details
42
+ details = extract_invoice_details(text)
43
+
44
+ # Create and display assistant's response to extracted text
45
+ assistant_response = (
46
+ f"Extracted text from the uploaded file:\n\n{text}\n\n"
47
+ f"**Extracted Details:**\n"
48
+ f"**Invoice Number:** {details['Invoice Number']}\n"
49
+
50
+ f"**Amount:** {details['Amount']}\n"
51
+ f"**Invoice Date:** {details['Invoice Date']}\n"
52
+ f"**Due Date:** {details['Due Date']}"
53
+ )
54
+ st.session_state.messages.append({"role": ASSISTANT, "content": assistant_response})
55
+ with st.chat_message(ASSISTANT):
56
+ st.markdown(assistant_response)
57
+ except Exception as e:
58
+ error_message = f"An error occurred while processing the file: {e}"
59
+ st.session_state.messages.append({"role": ASSISTANT, "content": error_message})
60
+ with st.chat_message(ASSISTANT):
61
+ st.markdown(error_message)
62
+
63
+
64
+ #streamlit run C:/Users/leahw/PycharmProjects/Int-to-Artificial-Intelligence-Final-Project/app.py