Spaces:
Runtime error
Runtime error
File size: 7,530 Bytes
5e692ef 03bc446 f57bf28 03bc446 20905da 6990840 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 6990840 ed7fe7c 6990840 f57bf28 6990840 f57bf28 03bc446 6990840 03bc446 6990840 03bc446 f57bf28 03bc446 6990840 f57bf28 03bc446 5e692ef 6990840 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
import pandas as pd
import numpy as np
from sepa import parser
import re
#####################################################################################################################################
#####################################################################################################################################
#####################################################################################################################################
def full_function(xml_file):
#for gradio: swap with xml_file for local testing
full_name = xml_file.name
#full_name = xml_file
print("File name in gradio is ")
print(full_name)
def strip_namespace(xml):
return re.sub(' xmlns="[^"]+"', '', xml, count=1)
# Read file
with open(full_name, 'r') as f:
input_data = f.read()
# Parse the bank statement XML to dictionary
print("Parse full xml string")
camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))
statements = pd.DataFrame.from_dict(camt_dict['statements'])
all_entries = []
dd_all = []
print("Start loop all the transactions and add to df")
for i,_ in statements.iterrows():
if 'entries' in camt_dict['statements'][i]:
#create empty df
df = pd.DataFrame()
dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries'])
df['reference'] = dd['reference']
df['credit_debit_indicator'] = dd['credit_debit_indicator']
df['status'] = dd['status']
df['account_servicer_reference'] = dd['account_servicer_reference']
iban = camt_dict['statements'][i]['account']['id']['iban']
name = camt_dict['statements'][i]['account']['name']
df['iban'] = iban
df['name'] = name
df['currency'] = dd['amount'].str['currency']
df['amount'] = dd['amount'].str['_value']
df['reference'] = dd['reference']
df['value_date'] = dd['value_date'].str['date']
df['value_date'] = pd.to_datetime(df['value_date']).dt.strftime('%Y-%m-%d')
df['booking_date'] = dd['booking_date'].str['date']
df['booking_date'] = pd.to_datetime(df['booking_date']).dt.strftime('%Y-%m-%d')
#bank transaction code
df['proprietary_code'] = dd['bank_transaction_code'].str['proprietary'].str['code']
df['proprietary_issuer'] = dd['bank_transaction_code'].str['proprietary'].str['issuer']
df['domain_code'] = dd['bank_transaction_code'].str['domain'].str['code']
df['family_code'] = dd['bank_transaction_code'].str['domain'].str['family'].str['code']
df['sub_family_code'] = dd['bank_transaction_code'].str['domain'].str['family'].str['sub_family_code']
#transaction details
df['debtor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor'].str['name']
df['debtor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor_account'].str['id'].str['iban']
df['creditor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor'].str['name']
df['creditor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor_account'].str['id'].str['iban']
df['bic'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_agents'].str['debtor_agent'].str['financial_institution'].str['bic']
df['remittance_information'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['remittance_information'].str['unstructured'].str[0]
df['account_servicer_reference'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['account_servicer_reference']
df['end_to_end_id'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['end_to_end_id']
all_entries.append(df)
print("merge the list into df")
df_entries = pd.concat(all_entries)
#drop duplicates
print("remove duplicate rows")
df_entries = df_entries.drop_duplicates(subset=['reference'], keep='last')
print("all done")
df_entries_example = df_entries[['reference', 'credit_debit_indicator', 'iban', 'name', 'currency', 'amount', 'value_date', 'debtor_name', 'debtor_iban', 'creditor_name', 'creditor_iban', 'remittance_information']].head(20)
#print(df_entries_example)
return df_entries, df_entries_example
#####################################################################################################################################
#####################################################################################################################################
#####################################################################################################################################
def function_code_count(df_entries):
#count number of values
df_proprietary_code_count = df_entries['proprietary_code'].value_counts()#.to_frame()
df_proprietary_code_count = pd.DataFrame(df_proprietary_code_count).reset_index(names="code")
df_proprietary_code_count.rename(columns={"proprietary_code": "count"}, inplace=True)
return df_proprietary_code_count
#####################################################################################################################################
#####################################################################################################################################
#####################################################################################################################################
def export_csv(xml_file):
df_entries, df_entries_example = full_function(xml_file)
df_entries.to_csv("./output.csv")
out = gr.File(value="output.csv", visible=True)
#count codes
df_proprietary_code_count = function_code_count(df_entries)
return out, df_entries_example, df_proprietary_code_count
#####################################################################################################################################
#####################################################################################################################################
#####################################################################################################################################
desc = "Upload XML file, convert to .csv file, and analyze transactions"
with gr.Blocks() as demo:
xml_file = gr.File(label = "Upload XML file here")
#output table.
df_entries_example = gr.DataFrame(label="Example output table, top 20 rows (not all columns)")
with gr.Row():
#export_button = gr.Button("Export")
out = gr.File(label = "Output file", interactive=False, visible=False)
with gr.Row():
df_proprietary_code_count = gr.DataFrame(label="Number of transactions per code")
#submit_btn = gr.Button("Run analysis on XML file")
#export_button.click(export_csv, df_entries, csv)
gr.Interface(fn=export_csv, inputs=xml_file, outputs=[out, df_entries_example, df_proprietary_code_count], title=desc).launch(share=True, debug =True)
|