Spaces:
Runtime error
Runtime error
File size: 5,872 Bytes
5e692ef 03bc446 f57bf28 03bc446 20905da f57bf28 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef 03bc446 5e692ef ed7fe7c f57bf28 03bc446 f57bf28 03bc446 f57bf28 03bc446 f57bf28 03bc446 5e692ef 03bc446 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
import pandas as pd
import numpy as np
from sepa import parser
import re
##################################################################
##################################################################
##################################################################
def full_function(xml_file):
#for gradio: swap with xml_file for local testing
full_name = xml_file.name
#full_name = xml_file
print("File name in gradio is ")
print(full_name)
def strip_namespace(xml):
return re.sub(' xmlns="[^"]+"', '', xml, count=1)
# Read file
with open(full_name, 'r') as f:
input_data = f.read()
# Parse the bank statement XML to dictionary
print("Parse full xml string")
camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))
statements = pd.DataFrame.from_dict(camt_dict['statements'])
all_entries = []
dd_all = []
print("Start loop all the transactions and add to df")
for i,_ in statements.iterrows():
if 'entries' in camt_dict['statements'][i]:
#create empty df
df = pd.DataFrame()
dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries'])
df['reference'] = dd['reference']
df['credit_debit_indicator'] = dd['credit_debit_indicator']
df['status'] = dd['status']
df['account_servicer_reference'] = dd['account_servicer_reference']
iban = camt_dict['statements'][i]['account']['id']['iban']
name = camt_dict['statements'][i]['account']['name']
df['iban'] = iban
df['name'] = name
df['currency'] = dd['amount'].str['currency']
df['amount'] = dd['amount'].str['_value']
df['reference'] = dd['reference']
df['value_date'] = dd['value_date'].str['date']
df['value_date'] = pd.to_datetime(df['value_date']).dt.strftime('%Y-%m-%d')
df['booking_date'] = dd['booking_date'].str['date']
df['booking_date'] = pd.to_datetime(df['booking_date']).dt.strftime('%Y-%m-%d')
#bank transaction code
df['proprietary_code'] = dd['bank_transaction_code'].str['proprietary'].str['code']
df['proprietary_issuer'] = dd['bank_transaction_code'].str['proprietary'].str['issuer']
df['domain_code'] = dd['bank_transaction_code'].str['domain'].str['code']
df['family_code'] = dd['bank_transaction_code'].str['domain'].str['family'].str['code']
df['sub_family_code'] = dd['bank_transaction_code'].str['domain'].str['family'].str['sub_family_code']
#transaction details
df['debtor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor'].str['name']
df['debtor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor_account'].str['id'].str['iban']
df['creditor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor'].str['name']
df['creditor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor_account'].str['id'].str['iban']
df['bic'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_agents'].str['debtor_agent'].str['financial_institution'].str['bic']
df['remittance_information'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['remittance_information'].str['unstructured'].str[0]
df['account_servicer_reference'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['account_servicer_reference']
df['end_to_end_id'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['end_to_end_id']
all_entries.append(df)
print("merge the list into df")
df_entries = pd.concat(all_entries)
#drop duplicates
print("remove duplicate rows")
df_entries = df_entries.drop_duplicates(subset=['reference'], keep='last')
print("all done")
df_entries_example = df_entries[['reference', 'credit_debit_indicator', 'iban', 'name', 'currency', 'amount', 'value_date', 'debtor_name', 'debtor_iban', 'creditor_name', 'creditor_iban', 'remittance_information']].head(20)
#print(df_entries_example)
return df_entries, df_entries_example
##################################################################
##################################################################
##################################################################
def export_csv(xml_file):
df_entries, df_entries_example = full_function(xml_file)
df_entries.to_csv("./output.csv")
out = gr.File.update(value="output.csv", visible=True)
return out, df_entries_example
##################################################################
##################################################################
##################################################################
desc = "Upload XML file, convert to .csv file, and analyze transactions"
with gr.Blocks() as demo:
xml_file = gr.File(label = "Upload XML file here")
#output table.
df_entries_example = gr.DataFrame(label="Example output table, top 20 rows (not all columns)")
with gr.Row():
#export_button = gr.Button("Export")
out = gr.File(label = "Output file", interactive=False, visible=False)
#submit_btn = gr.Button("Run analysis on XML file")
#export_button.click(export_csv, df_entries, csv)
gr.Interface(fn=export_csv, inputs=xml_file, outputs=[out, df_entries_example], title=desc).launch(share=True, debug =True) |