Spaces:
Build error
Build error
File size: 5,357 Bytes
435fe24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from flask import Flask, request, jsonify
import pymssql
import pandas as pd
import torch
import cv2
import pytesseract
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
# Initialize Flask app
app = Flask(__name__)
# Initialize model and processor
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-AWQ", torch_dtype="auto")
if torch.cuda.is_available():
model.to("cuda")
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-AWQ")
pytesseract.pytesseract_cmd = r'/usr/bin/tesseract'
# Function to preprocess the image for OCR
def preprocess_image(image_path):
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
return binary
# Function to extract text using OCR
def ocr_extract_text(image_path):
preprocessed_image = preprocess_image(image_path)
return pytesseract.image_to_string(preprocessed_image)
# Function to process image and extract details
def process_image(image_path):
try:
messages = [{
"role": "user",
"content": [
{"type": "image", "image": image_path},
{"type": "text", "text": (
"Extract the following details from the invoice:\n"
"- 'invoice_number'\n"
"- 'date'\n"
"- 'place'\n"
"- 'amount' (monetary value in the relevant currency)\n"
"- 'category' (based on the invoice type)"
)}
]
}]
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
inputs = inputs.to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=128)
output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
return parse_details(output_text[0])
except Exception as e:
print(f"Model failed, falling back to OCR: {e}")
ocr_text = ocr_extract_text(image_path)
return parse_details(ocr_text)
# Function to parse details from text
def parse_details(details):
parsed_data = {
"Invoice Number": None,
"Date": None,
"Place": None,
"Amount": None,
"Category": None
}
lines = details.split("\n")
for line in lines:
lower_line = line.lower()
if "invoice" in lower_line:
parsed_data["Invoice Number"] = line.split(":")[-1].strip()
elif "date" in lower_line:
parsed_data["Date"] = line.split(":")[-1].strip()
elif "place" in lower_line:
parsed_data["Place"] = line.split(":")[-1].strip()
elif any(keyword in lower_line for keyword in ["total", "amount", "cost"]):
parsed_data["Amount"] = line.split(":")[-1].strip()
else:
parsed_data["Category"] = "General"
return parsed_data
# Function to store DataFrame to Azure SQL Database
def store_to_azure_sql(dataframe):
conn_str = (
"Driver={ODBC Driver 17 for SQL Server};"
"Server=35.227.148.156;" # Hardcoded IP address
"Database=dbo.Invoices;"
"UID=pio-admin;"
"PWD=Poctest123#;"
)
try:
with pymssql.connect(conn_str) as conn:
cursor = conn.cursor()
create_table_query = """
IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='Invoices' AND xtype='U')
CREATE TABLE Invoices (
InvoiceNumber NVARCHAR(255),
Date NVARCHAR(255),
Place NVARCHAR(255),
Amount NVARCHAR(255),
Category NVARCHAR(255)
)
"""
cursor.execute(create_table_query)
for _, row in dataframe.iterrows():
insert_query = """
INSERT INTO Invoices (InvoiceNumber, Date, Place, Amount, Category)
VALUES (%s, %s, %s, %s, %s)
"""
cursor.execute(insert_query, row['Invoice Number'], row['Date'], row['Place'], row['Amount'], row['Category'])
conn.commit()
print("Data successfully stored in Azure SQL Database.")
except Exception as e:
print(f"Error storing data to database: {e}")
@app.route('/process_invoice', methods=['POST'])
def process_invoice():
try:
# Get the image file from the request
image_file = request.files['file']
image_path = "temp_image.jpg"
image_file.save(image_path)
# Process the image
details = process_image(image_path)
# Convert details to a DataFrame
df = pd.DataFrame([details])
# Store in Azure SQL
store_to_azure_sql(df)
# Return the extracted details and status
return jsonify({"extracted_details": details, "status": "Data stored successfully"})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)
|