Spaces:

omdivyatej
/

general_invoice_parser

Runtime error

File size: 4,326 Bytes

# app.py
import gradio as gr
import pandas as pd  # Import pandas
from ocr_request import ocr_request
import os
from dotenv import load_dotenv
import openai
import json

def process_file(files):
    response_arr = []
    # Send the uploaded file to the function from ocr_request.py
    for file in files:
        response = ocr_request(file.name)
        response_arr.append(response)

    print("Main file :", response_arr)
    
    #i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]] 
    # flat_list = []

    # for item in response_arr:
    #     invoice_number = item['invoice_number']
        
    #     # Extracting product descriptions
    #     products = item.get('predictions', []) or item.get('product_description', [])
        
    #     for product in products:
    #         # Rename 'description' key to 'product_description' for uniformity across all products
    #         product_description = product.get('product_description', product.get('description'))
    #         predicted_material = product['predicted_material']
    #         confidence = product['confidence']
            
    #         flat_list.append({
    #             'invoice_number': invoice_number,
    #             'product_description': product_description,
    #             'predicted_material': predicted_material,
    #             'confidence': confidence
    #         })

    load_dotenv()
    # Initialize OpenAI with your API key
    openai.api_key = os.getenv("OPENAI_API_KEY")

    prompt =f"""
    you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. You have a singular task : 
    Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. Remember: You just have to share the o/p json, no thought process or anything else.   

    Here is the json array/json : {json.dumps(response_arr)}
    """
    messages=[{"role": "user", "content":prompt}]
    # Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
    response = openai.ChatCompletion.create(
        model="gpt-4",
        max_tokens=5000,
        temperature=0,
        messages = messages
    )    
    # Extracting the result
    result = response.choices[0]["message"]["content"]
    print("After in min gpt")
    print(json.loads(result))

    df = pd.DataFrame(json.loads(result))
    # df = pd.DataFrame(flat_list)

    print("Df final : ", df)
    # Save the dataframe to a CSV in-memory
    
    result_csv = df.to_csv(index=False)   
    
    csv_filename = "categories.csv"
    with open(csv_filename, "w") as f:
        f.write(result_csv)
    
    return df,csv_filename  # Gradio will display this as a table



interface = gr.Interface(fn=process_file, 
                         inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
                         outputs=["dataframe",gr.outputs.File(label="Download CSV")])  # Specify "dataframe" as output type

interface.launch(share=True)