Spaces:
Sleeping
Sleeping
File size: 4,794 Bytes
ec6dd69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import streamlit as st
import pandas as pd
import sys
import logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
from country_by_country.utils.utils import (
gather_tables,
check_if_many,
filled_table_extractors,
gather_tables_with_merge,
)
from menu import display_pages_menu
from dotenv import load_dotenv
def merge_table(table_extractor: str) -> None:
first_df_columns = pd.Series([])
table_list = []
for key, table in st.session_state["tables"].items():
if table_extractor in key:
if first_df_columns.empty:
first_df_columns = table.columns
# Replace column names for all DataFrames in the list
table.columns = first_df_columns
table_list.append(table)
st.session_state["new_tables"] = pd.concat(
table_list, ignore_index=True, sort=False
)
def save_merge(table_extractor: str) -> None:
tables_extracted_by_name = gather_tables_with_merge(
st.session_state["assets"],
st.session_state["new_tables"],
table_extractor,
)
st.session_state["tables"] = tables_extracted_by_name
st.session_state["algorithm_name"] = table_extractor
def remove_table(key: str) -> None:
del st.session_state["tables"][key]
if (
"algorithm_name" in st.session_state
and st.session_state["algorithm_name"] == key
):
del st.session_state["algorithm_name"]
st.set_page_config(layout="wide", page_title="Merge Tables") # page_icon="📈"
st.title("Country by Country Tax Reporting analysis : Headers")
st.subheader(
"This page will allow you to modify the headers and to remove columns",
)
display_pages_menu()
load_dotenv()
if "tables" not in st.session_state:
st.markdown(
"# !! Don't change the page while the algorithms are runing, else they will start again"
)
if (
st.session_state.get("validate_selected_pages", False)
and "pdf_after_page_validation" in st.session_state
):
if "tables" not in st.session_state:
for table_extractor in st.session_state["proc"].table_extractors:
new_asset = table_extractor(st.session_state["pdf_after_page_validation"])
st.session_state["assets"]["table_extractors"].append(new_asset)
tables_extracted_by_name = gather_tables(st.session_state["assets"])
logging.info(f"Table extracted : {tables_extracted_by_name}")
st.session_state["tables"] = tables_extracted_by_name
if not check_if_many(st.session_state["assets"]):
st.markdown("# !! Nothing to merge")
if "first_time_merge" not in st.session_state:
st.session_state["first_time_merge"] = False
st.switch_page("pages/4_Clean_Headers.py")
col1, col2, col3 = st.columns([3, 1, 3])
is_equal = True
with col1:
table_extractor = st.selectbox(
"Choose an algorithm :",
filled_table_extractors(st.session_state["assets"]),
args=("selectbox2",),
key="selectbox2",
)
number_column = None
if table_extractor is not None:
for key, table in st.session_state["tables"].items():
if table_extractor in key:
with st.container(border=True):
if not number_column:
number_column = table.shape[1]
else:
if number_column != table.shape[1]:
is_equal = False
st.markdown("Table shape :" + str(table.shape))
st.markdown("Table name : " + key)
st.dataframe(
table,
)
st.button(
"Remove this table",
type="primary",
on_click=remove_table,
args=(key,),
key=key,
)
with col2:
st.markdown(
"You won't be able to merge if the number of columns is not the same for each tables !!"
)
merged = st.button(
"Merge",
type="primary",
on_click=merge_table,
args=(table_extractor,),
disabled=(False if is_equal else True),
)
validated = st.button(
"Sauver le merge",
on_click=save_merge,
args=(table_extractor,),
)
if validated:
st.switch_page("pages/4_Clean_Headers.py")
with col3:
if merged is True:
edited_df = st.dataframe(
st.session_state["new_tables"],
)
|