Spaces:
Sleeping
Sleeping
File size: 5,621 Bytes
ec6dd69 dd6a24d ec6dd69 dd6a24d ec6dd69 dd6a24d ec6dd69 dd6a24d ec6dd69 dd6a24d ec6dd69 dd6a24d ec6dd69 dd6a24d ec6dd69 dd6a24d ec6dd69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import logging
import sys
import tempfile
import streamlit as st
import yaml
import copy
from menu import display_pages_menu, display_config
from pypdf import PdfReader
from utils import get_pdf_iframe, set_state, generate_assets
from country_by_country.processor import ReportProcessor
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
def set_page_filter(value: dict):
set_state(["config", "pagefilter"], value)
def initiate_configuration() -> None:
st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"])
if isinstance(st.session_state["config"]["pagefilter"], list):
st.session_state["config"]["pagefilter"] = st.session_state["initial_config"][
"pagefilter"
][0]
st.session_state["selected_page_filter_name"] = st.session_state["config"][
"pagefilter"
]["type"]
def on_pdf_file_upload() -> None:
# Change states related to the pdf file upload
mytmpfile.write(st.session_state.original_pdf.read())
st.session_state["working_file_pdf"] = mytmpfile
st.session_state["original_pdf_name"] = st.session_state.original_pdf.name
# Generate assets
generate_assets()
st.session_state["page_redirection"] = "pages/1_Selected_Pages.py"
def on_config_file_upload() -> None:
st.session_state["initial_config"] = st.session_state["initial_uploaded_config"]
initiate_configuration()
def on_change_page_filter(name_to_filter_dict: dict) -> None:
st.session_state["selected_page_filter_name"] = st.session_state[
"radio_button_filter_selection"
] # this 'buffer' is needed because selectors wipe their key on reload
set_page_filter(name_to_filter_dict[st.session_state["selected_page_filter_name"]])
# Check if a redirection was requested
# Workaround because st.switch_page is not allowed in a callback function
if st.session_state.get("page_redirection", False):
page_to_redirect_to = st.session_state["page_redirection"]
st.session_state["page_redirection"] = False
st.switch_page(page_to_redirect_to)
st.set_page_config(layout="wide", page_title="Accueil - upload de PDF")
st.title("Country by Country Tax Reporting analysis")
st.subheader(
"This app will help you extract a table containing financial information from a pdf",
)
display_pages_menu()
mytmpfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
# State initialization
if "first_time" not in st.session_state:
logging.info("State initialization...")
st.session_state["first_time"] = False
logging.info("... loading default extract config")
with open("extract_config.yaml", "r") as f:
st.session_state["initial_config"] = yaml.safe_load(f.read())
initiate_configuration()
logging.info("... initializing processor and assets")
st.session_state["proc"] = ReportProcessor(st.session_state["config"])
st.session_state["assets"] = {
"pagefilter": {},
"table_extractors": [],
}
with st.sidebar:
st.markdown("# PDF Upload")
st.markdown("## PDF Report to process")
original_pdf = st.file_uploader(
"Upload a pdf document containing financial table : ",
key="original_pdf",
on_change=on_pdf_file_upload,
)
if "original_pdf_name" in st.session_state:
st.markdown(
"Already loaded file : " + st.session_state["original_pdf_name"],
)
st.markdown("# Configuration:\n")
# Upload personalized config if required
loaded_config = st.file_uploader(
"Upload a config if the default config doesn't suit you :",
key="initial_uploaded_config",
on_change=initiate_configuration,
)
if loaded_config is not None:
if not loaded_config.name.endswith(".yaml"):
st.error("Please upload a yaml file")
loaded_config = None
try:
loaded_config_dict = yaml.safe_load(loaded_config)
if not (
loaded_config_dict.get("pagefilter", False)
and loaded_config_dict.get("table_extraction", False)
):
st.error("Please upload a valid config file")
loaded_config = None
except yaml.YAMLError as e:
st.error("Unable to load yaml file config")
loaded_config = None
# Extract config
if bool(loaded_config):
st.session_state["initial_config"] = loaded_config_dict
st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"])
# Set page filter
page_filter_name_to_config_mapping = {
pagefilter["type"]: pagefilter
for pagefilter in st.session_state["initial_config"]["pagefilter"]
}
page_filter_list = list(page_filter_name_to_config_mapping.keys())
current_selected_page_filter_index = page_filter_list.index(
st.session_state["selected_page_filter_name"]
)
selected_page_filter_name = st.radio(
"Page filter",
page_filter_list,
index=current_selected_page_filter_index,
on_change=on_change_page_filter,
key="radio_button_filter_selection",
args=(page_filter_name_to_config_mapping,),
)
display_config()
if "working_file_pdf" in st.session_state:
# Once a pdf has been uploaded, it will be stored as
# the "original_pdf" key in the session state.
# Hence, the following code will only be executed if a pdf has been uploaded.
# Display the uploaded pdf
st.markdown(
get_pdf_iframe(st.session_state["working_file_pdf"].name),
unsafe_allow_html=True,
)
|