File size: 4,794 Bytes
ec6dd69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
import pandas as pd
import sys
import logging

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")

from country_by_country.utils.utils import (
    gather_tables,
    check_if_many,
    filled_table_extractors,
    gather_tables_with_merge,
)
from menu import display_pages_menu
from dotenv import load_dotenv


def merge_table(table_extractor: str) -> None:
    first_df_columns = pd.Series([])
    table_list = []
    for key, table in st.session_state["tables"].items():
        if table_extractor in key:
            if first_df_columns.empty:
                first_df_columns = table.columns
            # Replace column names for all DataFrames in the list
            table.columns = first_df_columns
            table_list.append(table)

    st.session_state["new_tables"] = pd.concat(
        table_list, ignore_index=True, sort=False
    )


def save_merge(table_extractor: str) -> None:
    tables_extracted_by_name = gather_tables_with_merge(
        st.session_state["assets"],
        st.session_state["new_tables"],
        table_extractor,
    )
    st.session_state["tables"] = tables_extracted_by_name
    st.session_state["algorithm_name"] = table_extractor


def remove_table(key: str) -> None:
    del st.session_state["tables"][key]
    if (
        "algorithm_name" in st.session_state
        and st.session_state["algorithm_name"] == key
    ):
        del st.session_state["algorithm_name"]


st.set_page_config(layout="wide", page_title="Merge Tables")  # page_icon="📈"
st.title("Country by Country Tax Reporting analysis : Headers")
st.subheader(
    "This page will allow you to modify the headers and to remove columns",
)
display_pages_menu()
load_dotenv()


if "tables" not in st.session_state:
    st.markdown(
        "# !! Don't change the page while the algorithms are runing, else they will start again"
    )


if (
    st.session_state.get("validate_selected_pages", False)
    and "pdf_after_page_validation" in st.session_state
):
    if "tables" not in st.session_state:
        for table_extractor in st.session_state["proc"].table_extractors:
            new_asset = table_extractor(st.session_state["pdf_after_page_validation"])
            st.session_state["assets"]["table_extractors"].append(new_asset)
        tables_extracted_by_name = gather_tables(st.session_state["assets"])
        logging.info(f"Table extracted : {tables_extracted_by_name}")

        st.session_state["tables"] = tables_extracted_by_name

    if not check_if_many(st.session_state["assets"]):
        st.markdown("# !! Nothing to merge")

        if "first_time_merge" not in st.session_state:
            st.session_state["first_time_merge"] = False
            st.switch_page("pages/4_Clean_Headers.py")

    col1, col2, col3 = st.columns([3, 1, 3])
    is_equal = True
    with col1:
        table_extractor = st.selectbox(
            "Choose an algorithm :",
            filled_table_extractors(st.session_state["assets"]),
            args=("selectbox2",),
            key="selectbox2",
        )

        number_column = None
        if table_extractor is not None:
            for key, table in st.session_state["tables"].items():
                if table_extractor in key:
                    with st.container(border=True):
                        if not number_column:
                            number_column = table.shape[1]
                        else:
                            if number_column != table.shape[1]:
                                is_equal = False
                        st.markdown("Table shape :" + str(table.shape))
                        st.markdown("Table name : " + key)
                        st.dataframe(
                            table,
                        )
                        st.button(
                            "Remove this table",
                            type="primary",
                            on_click=remove_table,
                            args=(key,),
                            key=key,
                        )

    with col2:
        st.markdown(
            "You won't be able to merge if the number of columns is not the same for each tables !!"
        )
        merged = st.button(
            "Merge",
            type="primary",
            on_click=merge_table,
            args=(table_extractor,),
            disabled=(False if is_equal else True),
        )
        validated = st.button(
            "Sauver le merge",
            on_click=save_merge,
            args=(table_extractor,),
        )
        if validated:
            st.switch_page("pages/4_Clean_Headers.py")

    with col3:
        if merged is True:
            edited_df = st.dataframe(
                st.session_state["new_tables"],
            )