Spaces:

omdivyatej
/

general_invoice_parser

Runtime error

App Files Files Community

general_invoice_parser / app.py

omdivyatej

update app.py

c8ec340 almost 2 years ago

raw

history blame

4.33 kB

	# app.py
	import gradio as gr
	import pandas as pd # Import pandas
	from ocr_request import ocr_request
	import os
	from dotenv import load_dotenv
	import openai
	import json

	def process_file(files):
	response_arr = []
	# Send the uploaded file to the function from ocr_request.py
	for file in files:
	response = ocr_request(file.name)
	response_arr.append(response)

	print("Main file :", response_arr)

	#i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
	# flat_list = []

	# for item in response_arr:
	# invoice_number = item['invoice_number']

	# # Extracting product descriptions
	# products = item.get('predictions', []) or item.get('product_description', [])

	# for product in products:
	# # Rename 'description' key to 'product_description' for uniformity across all products
	# product_description = product.get('product_description', product.get('description'))
	# predicted_material = product['predicted_material']
	# confidence = product['confidence']

	# flat_list.append({
	# 'invoice_number': invoice_number,
	# 'product_description': product_description,
	# 'predicted_material': predicted_material,
	# 'confidence': confidence
	# })

	load_dotenv()
	# Initialize OpenAI with your API key
	openai.api_key = os.getenv("OPENAI_API_KEY")

	prompt =f"""
	you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. You have a singular task :
	Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. Remember: You just have to share the o/p json, no thought process or anything else.

	Here is the json array/json : {json.dumps(response_arr)}
	"""
	messages=[{"role": "user", "content":prompt}]
	# Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
	response = openai.ChatCompletion.create(
	model="gpt-4",
	max_tokens=5000,
	temperature=0,
	messages = messages
	)
	# Extracting the result
	result = response.choices[0]["message"]["content"]
	print("After in min gpt")
	print(json.loads(result))

	df = pd.DataFrame(json.loads(result))
	# df = pd.DataFrame(flat_list)

	print("Df final : ", df)
	# Save the dataframe to a CSV in-memory

	result_csv = df.to_csv(index=False)

	csv_filename = "categories.csv"
	with open(csv_filename, "w") as f:
	f.write(result_csv)

	return df,csv_filename # Gradio will display this as a table



	interface = gr.Interface(fn=process_file,
	inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
	outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type

	interface.launch(share=True)