|
import logging |
|
import sys |
|
import tempfile |
|
|
|
import streamlit as st |
|
import yaml |
|
import copy |
|
from menu import display_pages_menu, display_config |
|
from pypdf import PdfReader |
|
from utils import get_pdf_iframe, set_state, generate_assets |
|
|
|
from country_by_country.processor import ReportProcessor |
|
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") |
|
|
|
|
|
def set_page_filter(value: dict): |
|
set_state(["config", "pagefilter"], value) |
|
|
|
|
|
def initiate_configuration() -> None: |
|
st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"]) |
|
if isinstance(st.session_state["config"]["pagefilter"], list): |
|
st.session_state["config"]["pagefilter"] = st.session_state["initial_config"][ |
|
"pagefilter" |
|
][0] |
|
st.session_state["selected_page_filter_name"] = st.session_state["config"][ |
|
"pagefilter" |
|
]["type"] |
|
|
|
|
|
def on_pdf_file_upload() -> None: |
|
|
|
mytmpfile.write(st.session_state.original_pdf.read()) |
|
st.session_state["working_file_pdf"] = mytmpfile |
|
st.session_state["original_pdf_name"] = st.session_state.original_pdf.name |
|
|
|
|
|
generate_assets() |
|
|
|
st.session_state["page_redirection"] = "pages/1_Selected_Pages.py" |
|
|
|
|
|
def on_config_file_upload() -> None: |
|
st.session_state["initial_config"] = st.session_state["initial_uploaded_config"] |
|
initiate_configuration() |
|
|
|
|
|
def on_change_page_filter(name_to_filter_dict: dict) -> None: |
|
st.session_state["selected_page_filter_name"] = st.session_state[ |
|
"radio_button_filter_selection" |
|
] |
|
set_page_filter(name_to_filter_dict[st.session_state["selected_page_filter_name"]]) |
|
|
|
|
|
|
|
|
|
if st.session_state.get("page_redirection", False): |
|
page_to_redirect_to = st.session_state["page_redirection"] |
|
st.session_state["page_redirection"] = False |
|
st.switch_page(page_to_redirect_to) |
|
|
|
st.set_page_config(layout="wide", page_title="Accueil - upload de PDF") |
|
st.title("Country by Country Tax Reporting analysis") |
|
st.subheader( |
|
"This app will help you extract a table containing financial information from a pdf", |
|
) |
|
display_pages_menu() |
|
|
|
mytmpfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) |
|
|
|
|
|
if "first_time" not in st.session_state: |
|
logging.info("State initialization...") |
|
st.session_state["first_time"] = False |
|
|
|
logging.info("... loading default extract config") |
|
with open("extract_config.yaml", "r") as f: |
|
st.session_state["initial_config"] = yaml.safe_load(f.read()) |
|
initiate_configuration() |
|
|
|
logging.info("... initializing processor and assets") |
|
st.session_state["proc"] = ReportProcessor(st.session_state["config"]) |
|
st.session_state["assets"] = { |
|
"pagefilter": {}, |
|
"table_extractors": [], |
|
} |
|
|
|
with st.sidebar: |
|
|
|
st.markdown("# PDF Upload") |
|
|
|
st.markdown("## PDF Report to process") |
|
original_pdf = st.file_uploader( |
|
"Upload a pdf document containing financial table : ", |
|
key="original_pdf", |
|
on_change=on_pdf_file_upload, |
|
) |
|
|
|
if "original_pdf_name" in st.session_state: |
|
st.markdown( |
|
"Already loaded file : " + st.session_state["original_pdf_name"], |
|
) |
|
|
|
st.markdown("# Configuration:\n") |
|
|
|
loaded_config = st.file_uploader( |
|
"Upload a config if the default config doesn't suit you :", |
|
key="initial_uploaded_config", |
|
on_change=initiate_configuration, |
|
) |
|
|
|
if loaded_config is not None: |
|
if not loaded_config.name.endswith(".yaml"): |
|
st.error("Please upload a yaml file") |
|
loaded_config = None |
|
|
|
try: |
|
loaded_config_dict = yaml.safe_load(loaded_config) |
|
if not ( |
|
loaded_config_dict.get("pagefilter", False) |
|
and loaded_config_dict.get("table_extraction", False) |
|
): |
|
st.error("Please upload a valid config file") |
|
loaded_config = None |
|
except yaml.YAMLError as e: |
|
st.error("Unable to load yaml file config") |
|
loaded_config = None |
|
|
|
|
|
|
|
if bool(loaded_config): |
|
st.session_state["initial_config"] = loaded_config_dict |
|
st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"]) |
|
|
|
|
|
page_filter_name_to_config_mapping = { |
|
pagefilter["type"]: pagefilter |
|
for pagefilter in st.session_state["initial_config"]["pagefilter"] |
|
} |
|
page_filter_list = list(page_filter_name_to_config_mapping.keys()) |
|
current_selected_page_filter_index = page_filter_list.index( |
|
st.session_state["selected_page_filter_name"] |
|
) |
|
selected_page_filter_name = st.radio( |
|
"Page filter", |
|
page_filter_list, |
|
index=current_selected_page_filter_index, |
|
on_change=on_change_page_filter, |
|
key="radio_button_filter_selection", |
|
args=(page_filter_name_to_config_mapping,), |
|
) |
|
|
|
display_config() |
|
|
|
|
|
if "working_file_pdf" in st.session_state: |
|
|
|
|
|
|
|
|
|
|
|
st.markdown( |
|
get_pdf_iframe(st.session_state["working_file_pdf"].name), |
|
unsafe_allow_html=True, |
|
) |
|
|