general_invoice_parser / ocr_request.py
omdivyatej's picture
requirements
85c096c
# t.py
import requests
import openai
import json
import ai_json
def ocr_request(file_path):
url = 'https://app.nanonets.com/api/v2/OCR/Model/99a96f48-fa67-461d-a17d-8475af701b17/LabelFile/?async=false'
data = {'file': open(file_path, 'rb')}
response = requests.post(url, auth=requests.auth.HTTPBasicAuth('12ac2745-5e44-11ee-bb98-ea6b2bf28c31', ''), files=data)
response = response.json()
response_data = response["result"][0]["prediction"]
for element in response_data:
if element['label'] == 'table':
table_data = element['cells']
elif element['label'] == 'invoice_number':
invoice_number = element['ocr_text']
output_1 = {
'invoice_number': invoice_number,
'table_data':table_data
}
keys_to_remove = [
'xmin', 'ymin', 'xmax', 'ymax', 'id', 'label_id', 'verification_status',
'failed_validation', 'status', 'score', 'row_label', 'col_span',
'row_span', 'row', 'col']
for item in output_1["table_data"]:
for key in keys_to_remove:
item.pop(key, None)
print("Before sending to gpt", output_1)
gpt_response = ai_json.handle_creating_json(output_1)
return gpt_response