File size: 3,027 Bytes
85c096c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6277da1
85c096c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# app.py
import gradio as gr
import pandas as pd  # Import pandas
from ocr_request import ocr_request
import io

def process_file(files):
    response_arr = []
    # Send the uploaded file to the function from ocr_request.py
    for file in files:
        response = ocr_request(file.name)
        response_arr.append(response)

    print("Main file :", response_arr)
    
    #i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]] 
    flat_list = []

    for item in response_arr:
        invoice_number = item['invoice_number']
        
        # Extracting product descriptions
        products = item.get('predictions', []) or item.get('product_description', [])
        
        for product in products:
            # Rename 'description' key to 'product_description' for uniformity across all products
            product_description = product.get('product_description', product.get('description'))
            predicted_material = product['predicted_material']
            confidence = product['confidence']
            
            flat_list.append({
                'invoice_number': invoice_number,
                'product_description': product_description,
                'predicted_material': predicted_material,
                'confidence': confidence
            })

    df = pd.DataFrame(flat_list)

    print("Df final : ", df)
    # Save the dataframe to a CSV in-memory
    
    result_csv = df.to_csv(index=False)   
    
    csv_filename = "categories.csv"
    with open(csv_filename, "w") as f:
        f.write(result_csv)
    
    return df,csv_filename  # Gradio will display this as a table



interface = gr.Interface(fn=process_file, 
                         inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
                         outputs=["dataframe",gr.outputs.File(label="Download CSV")])  # Specify "dataframe" as output type

interface.launch()