File size: 5,621 Bytes
ec6dd69
 
 
 
 
 
 
 
 
dd6a24d
ec6dd69
 
 
 
 
 
 
 
 
 
dd6a24d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec6dd69
 
 
 
 
 
 
 
 
dd6a24d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec6dd69
 
 
 
 
 
 
dd6a24d
 
ec6dd69
 
 
 
 
 
 
 
 
 
 
dd6a24d
 
ec6dd69
dd6a24d
ec6dd69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd6a24d
ec6dd69
 
 
dd6a24d
 
 
 
 
 
 
 
 
 
 
 
ec6dd69
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import logging
import sys
import tempfile

import streamlit as st
import yaml
import copy
from menu import display_pages_menu, display_config
from pypdf import PdfReader
from utils import get_pdf_iframe, set_state, generate_assets

from country_by_country.processor import ReportProcessor

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")


def set_page_filter(value: dict):
    set_state(["config", "pagefilter"], value)


def initiate_configuration() -> None:
    st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"])
    if isinstance(st.session_state["config"]["pagefilter"], list):
        st.session_state["config"]["pagefilter"] = st.session_state["initial_config"][
            "pagefilter"
        ][0]
    st.session_state["selected_page_filter_name"] = st.session_state["config"][
        "pagefilter"
    ]["type"]


def on_pdf_file_upload() -> None:
    # Change states related to the pdf file upload
    mytmpfile.write(st.session_state.original_pdf.read())
    st.session_state["working_file_pdf"] = mytmpfile
    st.session_state["original_pdf_name"] = st.session_state.original_pdf.name

    # Generate assets
    generate_assets()

    st.session_state["page_redirection"] = "pages/1_Selected_Pages.py"


def on_config_file_upload() -> None:
    st.session_state["initial_config"] = st.session_state["initial_uploaded_config"]
    initiate_configuration()


def on_change_page_filter(name_to_filter_dict: dict) -> None:
    st.session_state["selected_page_filter_name"] = st.session_state[
        "radio_button_filter_selection"
    ]  # this 'buffer' is needed because selectors wipe their key on reload
    set_page_filter(name_to_filter_dict[st.session_state["selected_page_filter_name"]])


# Check if a redirection was requested
# Workaround because st.switch_page is not allowed in a callback function
if st.session_state.get("page_redirection", False):
    page_to_redirect_to = st.session_state["page_redirection"]
    st.session_state["page_redirection"] = False
    st.switch_page(page_to_redirect_to)

st.set_page_config(layout="wide", page_title="Accueil - upload de PDF")
st.title("Country by Country Tax Reporting analysis")
st.subheader(
    "This app will help you extract a table containing financial information from a pdf",
)
display_pages_menu()

mytmpfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)

# State initialization
if "first_time" not in st.session_state:
    logging.info("State initialization...")
    st.session_state["first_time"] = False

    logging.info("... loading default extract config")
    with open("extract_config.yaml", "r") as f:
        st.session_state["initial_config"] = yaml.safe_load(f.read())
    initiate_configuration()

    logging.info("... initializing processor and assets")
    st.session_state["proc"] = ReportProcessor(st.session_state["config"])
    st.session_state["assets"] = {
        "pagefilter": {},
        "table_extractors": [],
    }

with st.sidebar:

    st.markdown("# PDF Upload")

    st.markdown("## PDF Report to process")
    original_pdf = st.file_uploader(
        "Upload a pdf document containing financial table : ",
        key="original_pdf",
        on_change=on_pdf_file_upload,
    )

    if "original_pdf_name" in st.session_state:
        st.markdown(
            "Already loaded file : " + st.session_state["original_pdf_name"],
        )

    st.markdown("# Configuration:\n")
    # Upload personalized config if required
    loaded_config = st.file_uploader(
        "Upload a config if the default config doesn't suit you :",
        key="initial_uploaded_config",
        on_change=initiate_configuration,
    )

    if loaded_config is not None:
        if not loaded_config.name.endswith(".yaml"):
            st.error("Please upload a yaml file")
            loaded_config = None

        try:
            loaded_config_dict = yaml.safe_load(loaded_config)
            if not (
                loaded_config_dict.get("pagefilter", False)
                and loaded_config_dict.get("table_extraction", False)
            ):
                st.error("Please upload a valid config file")
                loaded_config = None
        except yaml.YAMLError as e:
            st.error("Unable to load yaml file config")
            loaded_config = None

    # Extract config

    if bool(loaded_config):
        st.session_state["initial_config"] = loaded_config_dict
        st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"])

    # Set page filter
    page_filter_name_to_config_mapping = {
        pagefilter["type"]: pagefilter
        for pagefilter in st.session_state["initial_config"]["pagefilter"]
    }
    page_filter_list = list(page_filter_name_to_config_mapping.keys())
    current_selected_page_filter_index = page_filter_list.index(
        st.session_state["selected_page_filter_name"]
    )
    selected_page_filter_name = st.radio(
        "Page filter",
        page_filter_list,
        index=current_selected_page_filter_index,
        on_change=on_change_page_filter,
        key="radio_button_filter_selection",
        args=(page_filter_name_to_config_mapping,),
    )

    display_config()


if "working_file_pdf" in st.session_state:
    # Once a pdf has been uploaded, it will be stored as
    # the "original_pdf" key in the session state.
    # Hence, the following code will only be executed if a pdf has been uploaded.

    # Display the uploaded pdf
    st.markdown(
        get_pdf_iframe(st.session_state["working_file_pdf"].name),
        unsafe_allow_html=True,
    )