File size: 5,357 Bytes
435fe24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from flask import Flask, request, jsonify
import pymssql
import pandas as pd
import torch
import cv2
import pytesseract
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info

# Initialize Flask app
app = Flask(__name__)

# Initialize model and processor
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-AWQ", torch_dtype="auto")
if torch.cuda.is_available():
    model.to("cuda")
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-AWQ")

pytesseract.pytesseract_cmd = r'/usr/bin/tesseract'

# Function to preprocess the image for OCR
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
    return binary

# Function to extract text using OCR
def ocr_extract_text(image_path):
    preprocessed_image = preprocess_image(image_path)
    return pytesseract.image_to_string(preprocessed_image)

# Function to process image and extract details
def process_image(image_path):
    try:
        messages = [{
            "role": "user",
            "content": [
                {"type": "image", "image": image_path},
                {"type": "text", "text": (
                    "Extract the following details from the invoice:\n"
                    "- 'invoice_number'\n"
                    "- 'date'\n"
                    "- 'place'\n"
                    "- 'amount' (monetary value in the relevant currency)\n"
                    "- 'category' (based on the invoice type)"
                )}
            ]
        }]

        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        image_inputs, video_inputs = process_vision_info(messages)
        inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
        inputs = inputs.to(model.device)

        generated_ids = model.generate(**inputs, max_new_tokens=128)
        output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

        return parse_details(output_text[0])

    except Exception as e:
        print(f"Model failed, falling back to OCR: {e}")
        ocr_text = ocr_extract_text(image_path)
        return parse_details(ocr_text)

# Function to parse details from text
def parse_details(details):
    parsed_data = {
        "Invoice Number": None,
        "Date": None,
        "Place": None,
        "Amount": None,
        "Category": None
    }

    lines = details.split("\n")
    for line in lines:
        lower_line = line.lower()
        if "invoice" in lower_line:
            parsed_data["Invoice Number"] = line.split(":")[-1].strip()
        elif "date" in lower_line:
            parsed_data["Date"] = line.split(":")[-1].strip()
        elif "place" in lower_line:
            parsed_data["Place"] = line.split(":")[-1].strip()
        elif any(keyword in lower_line for keyword in ["total", "amount", "cost"]):
            parsed_data["Amount"] = line.split(":")[-1].strip()
        else:
            parsed_data["Category"] = "General"

    return parsed_data

# Function to store DataFrame to Azure SQL Database
def store_to_azure_sql(dataframe):
    conn_str = (
        "Driver={ODBC Driver 17 for SQL Server};"
        "Server=35.227.148.156;"  # Hardcoded IP address
        "Database=dbo.Invoices;"
        "UID=pio-admin;"
        "PWD=Poctest123#;"
    )
    try:
        with pymssql.connect(conn_str) as conn:
            cursor = conn.cursor()
            create_table_query = """
            IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='Invoices' AND xtype='U')
            CREATE TABLE Invoices (
                InvoiceNumber NVARCHAR(255),
                Date NVARCHAR(255),
                Place NVARCHAR(255),
                Amount NVARCHAR(255),
                Category NVARCHAR(255)
            )
            """
            cursor.execute(create_table_query)

            for _, row in dataframe.iterrows():
                insert_query = """
                INSERT INTO Invoices (InvoiceNumber, Date, Place, Amount, Category)
                VALUES (%s, %s, %s, %s, %s)
                """
                cursor.execute(insert_query, row['Invoice Number'], row['Date'], row['Place'], row['Amount'], row['Category'])
            conn.commit()
            print("Data successfully stored in Azure SQL Database.")
    except Exception as e:
        print(f"Error storing data to database: {e}")

@app.route('/process_invoice', methods=['POST'])
def process_invoice():
    try:
        # Get the image file from the request
        image_file = request.files['file']
        image_path = "temp_image.jpg"
        image_file.save(image_path)

        # Process the image
        details = process_image(image_path)

        # Convert details to a DataFrame
        df = pd.DataFrame([details])

        # Store in Azure SQL
        store_to_azure_sql(df)

        # Return the extracted details and status
        return jsonify({"extracted_details": details, "status": "Data stored successfully"})

    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)