File size: 5,872 Bytes
5e692ef
03bc446
f57bf28
03bc446
 
20905da
f57bf28
03bc446
 
 
 
 
 
 
 
 
 
 
 
 
 
5e692ef
 
 
 
03bc446
5e692ef
 
 
03bc446
5e692ef
 
 
 
03bc446
 
 
5e692ef
 
 
 
 
03bc446
5e692ef
 
 
 
 
03bc446
5e692ef
 
 
 
 
 
03bc446
5e692ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03bc446
 
 
 
 
5e692ef
 
 
 
03bc446
5e692ef
03bc446
5e692ef
 
03bc446
 
 
 
 
 
 
 
 
 
5e692ef
ed7fe7c
f57bf28
03bc446
 
 
f57bf28
 
03bc446
 
 
 
 
 
 
 
 
 
 
 
 
 
f57bf28
 
 
 
 
 
03bc446
 
 
 
 
 
f57bf28
 
03bc446
 
5e692ef
 
03bc446
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
import pandas as pd
import numpy as np
from sepa import parser
import re


##################################################################
##################################################################
##################################################################

def full_function(xml_file):
    
    #for gradio: swap with xml_file for local testing
    full_name = xml_file.name
    #full_name = xml_file

    
    print("File name in gradio is ")
    print(full_name)
    
    def strip_namespace(xml):
        return re.sub(' xmlns="[^"]+"', '', xml, count=1)

    # Read file
    with open(full_name, 'r') as f:
        input_data = f.read()

    # Parse the bank statement XML to dictionary
    print("Parse full xml string")
    camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))

    statements = pd.DataFrame.from_dict(camt_dict['statements'])
    all_entries = []
    dd_all = []
    
    print("Start loop all the transactions and add to df")
    for i,_ in statements.iterrows():
        if 'entries' in camt_dict['statements'][i]:

            #create empty df
            df = pd.DataFrame()
            dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries']) 

            df['reference'] = dd['reference']
            df['credit_debit_indicator'] = dd['credit_debit_indicator']
            df['status'] = dd['status']
            df['account_servicer_reference'] = dd['account_servicer_reference']

            iban = camt_dict['statements'][i]['account']['id']['iban']
            name = camt_dict['statements'][i]['account']['name']
            df['iban'] = iban
            df['name'] = name
            df['currency'] = dd['amount'].str['currency']
            df['amount'] = dd['amount'].str['_value']
            df['reference'] = dd['reference']

            df['value_date'] = dd['value_date'].str['date']
            df['value_date'] = pd.to_datetime(df['value_date']).dt.strftime('%Y-%m-%d')
            df['booking_date'] = dd['booking_date'].str['date']
            df['booking_date'] = pd.to_datetime(df['booking_date']).dt.strftime('%Y-%m-%d')

            #bank transaction code
            df['proprietary_code'] = dd['bank_transaction_code'].str['proprietary'].str['code']
            df['proprietary_issuer'] = dd['bank_transaction_code'].str['proprietary'].str['issuer']

            df['domain_code'] = dd['bank_transaction_code'].str['domain'].str['code']
            df['family_code'] = dd['bank_transaction_code'].str['domain'].str['family'].str['code']
            df['sub_family_code'] = dd['bank_transaction_code'].str['domain'].str['family'].str['sub_family_code']

            #transaction details
            df['debtor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor'].str['name']
            df['debtor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor_account'].str['id'].str['iban']
            df['creditor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor'].str['name']
            df['creditor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor_account'].str['id'].str['iban']

            df['bic'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_agents'].str['debtor_agent'].str['financial_institution'].str['bic']
            df['remittance_information'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['remittance_information'].str['unstructured'].str[0]

            df['account_servicer_reference'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['account_servicer_reference']
            df['end_to_end_id'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['end_to_end_id']

        all_entries.append(df)

    print("merge the list into df")
    df_entries = pd.concat(all_entries)

    #drop duplicates
    print("remove duplicate rows")
    df_entries = df_entries.drop_duplicates(subset=['reference'], keep='last')
    
    print("all done")
    
    df_entries_example = df_entries[['reference', 'credit_debit_indicator', 'iban', 'name', 'currency', 'amount', 'value_date', 'debtor_name', 'debtor_iban', 'creditor_name', 'creditor_iban', 'remittance_information']].head(20)
    #print(df_entries_example)
    
    return df_entries, df_entries_example



##################################################################
##################################################################
##################################################################


def export_csv(xml_file):
    
    df_entries, df_entries_example = full_function(xml_file)
    
    df_entries.to_csv("./output.csv")
    
    out = gr.File.update(value="output.csv", visible=True)
    
    return out, df_entries_example

##################################################################
##################################################################
##################################################################

desc = "Upload XML file, convert to .csv file, and analyze transactions"

with gr.Blocks() as demo: 

    xml_file = gr.File(label = "Upload XML file here")
     
    #output table. 
    df_entries_example = gr.DataFrame(label="Example output table, top 20 rows (not all columns)")
    
    with gr.Row():
        #export_button = gr.Button("Export")
        out = gr.File(label = "Output file", interactive=False, visible=False)


    #submit_btn = gr.Button("Run analysis on XML file")
    #export_button.click(export_csv, df_entries, csv)


gr.Interface(fn=export_csv, inputs=xml_file, outputs=[out, df_entries_example], title=desc).launch(share=True, debug =True)