diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..9e3d04c4950127cf2914f79d07bee7421416508b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv* diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..7c595d0a09218a35bb78fb6bb9a8f572ec6f9706 --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,2 @@ +[client] +showSidebarNavigation = false diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..24119757d7353a729c786a51c37a327f9ccd30d1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2015-2024 Data4Good + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 99173308fd296f213b2060136e91d40d6a952121..88791393d491329e18dfe4aebbdde7f483feefb8 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,9 @@ --- -title: Taxobservatory Demo -emoji: 📉 -colorFrom: yellow -colorTo: blue +title: TaxObservatory Demo +colorFrom: red +colorTo: green sdk: streamlit sdk_version: 1.32.2 app_file: app.py pinned: false --- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/__pycache__/menu.cpython-310.pyc b/__pycache__/menu.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a2a17dc09f0741b6e908b3a3998b2fc9872e009 Binary files /dev/null and b/__pycache__/menu.cpython-310.pyc differ diff --git a/__pycache__/utils.cpython-310.pyc b/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50771c62ef32d4c4bf17462282c029f53ef8d615 Binary files /dev/null and b/__pycache__/utils.cpython-310.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..e9d2076fde9308c899c63a7501e834bd9038e37d --- /dev/null +++ b/app.py @@ -0,0 +1,3 @@ +import streamlit as st + +st.switch_page("pages/0_Import_File.py") diff --git a/configs/test_full_workflow.yaml b/configs/test_full_workflow.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fedf38ac511d8a03c68c1bcf8e702d9cfb78866 --- /dev/null +++ b/configs/test_full_workflow.yaml @@ -0,0 +1,16 @@ +# Full workflow +# Requires OpenAI API key and only works with table_extraction:Unstructured and r + +pagefilter: + type: FromFilename + +table_extraction: + - type: Unstructured + params: + pdf_image_dpi: 300 + hi_res_model_name: "yolox" + +table_cleaning: + - type: LLM + params: + openai_model: "gpt-4-turbo-preview" diff --git a/configs/v0.yaml b/configs/v0.yaml new file mode 100644 index 0000000000000000000000000000000000000000..615dd58dd4f00cd2657fbd4db6e64da14237c78b --- /dev/null +++ b/configs/v0.yaml @@ -0,0 +1,20 @@ +pagefilter: + type: RFClassifier + params: + modelfile: random_forest_model_low_false_positive.joblib + +table_extraction: + - type: Camelot + params: + flavor: stream + - type: Camelot + params: + flavor: lattice + - type: Unstructured + params: + hi_res_model_name: "yolox" + pdf_image_dpi: 300 +# - type: LLamaParse +# - type: UnstructuredAPI + +# table_cleaning: diff --git a/country_by_country/.empty b/country_by_country/.empty new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/country_by_country/__init__.py b/country_by_country/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..77952d1932c4b1393f6d1ed9a512c27f6499d684 --- /dev/null +++ b/country_by_country/__init__.py @@ -0,0 +1,21 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/country_by_country/__main__.py b/country_by_country/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..71387630ca1f952723cf58b1ab8d0fad4401f7db --- /dev/null +++ b/country_by_country/__main__.py @@ -0,0 +1,67 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import logging +import pickle +import sys +from pathlib import Path + +import yaml + +# Local imports +from dotenv import load_dotenv + +from country_by_country import processor + +NUM_CLI_ARGS = 3 + + +def process_report(config: dict, pdf_filepath: str) -> None: + # Loading API keys from .env file + load_dotenv() + + proc = processor.ReportProcessor(config) + return proc.process(pdf_filepath) + + +if __name__ == "__main__": + + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + + if len(sys.argv) != NUM_CLI_ARGS: + logging.error("Usage : python -m country_by_country config.yaml report.pdf") + sys.exit(-1) + + logging.info(f"\nLoading {sys.argv[1]}") + with Path(sys.argv[1]).open() as fh: + config = yaml.safe_load(fh) + + assets = process_report(config, sys.argv[2]) + + # Save all the assets to disk + with Path("assets.pkl").open("wb") as fh: + pickle.dump(assets, fh) + logging.info( + "Assets dumped in assets.pkl. You can read then using : \n" + + "pickle.load(open('assets.pkl', 'rb'))", + ) diff --git a/country_by_country/__pycache__/__init__.cpython-310.pyc b/country_by_country/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a1432fba138ab923627fd884d31aee231b33fef Binary files /dev/null and b/country_by_country/__pycache__/__init__.cpython-310.pyc differ diff --git a/country_by_country/__pycache__/__main__.cpython-310.pyc b/country_by_country/__pycache__/__main__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2e0f221fc41ff1574e5525b5066ca53da454065 Binary files /dev/null and b/country_by_country/__pycache__/__main__.cpython-310.pyc differ diff --git a/country_by_country/__pycache__/dash_demo.cpython-310.pyc b/country_by_country/__pycache__/dash_demo.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fef390a09469b3dbccd11933fc0c0522333f14b Binary files /dev/null and b/country_by_country/__pycache__/dash_demo.cpython-310.pyc differ diff --git a/country_by_country/__pycache__/dash_process_methods.cpython-310.pyc b/country_by_country/__pycache__/dash_process_methods.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e395602bedf665cebbd2ce104ccb86d1aedb253 Binary files /dev/null and b/country_by_country/__pycache__/dash_process_methods.cpython-310.pyc differ diff --git a/country_by_country/__pycache__/processor.cpython-310.pyc b/country_by_country/__pycache__/processor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa39275d70a30ede65a663d93196b5932ef1cbee Binary files /dev/null and b/country_by_country/__pycache__/processor.cpython-310.pyc differ diff --git a/country_by_country/img_table_extraction/__pycache__/__init__.cpython-310.pyc b/country_by_country/img_table_extraction/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e40234f42102f7fe5dc59872bd6d0ee1049e758 Binary files /dev/null and b/country_by_country/img_table_extraction/__pycache__/__init__.cpython-310.pyc differ diff --git a/country_by_country/img_table_extraction/__pycache__/camelot_extractor.cpython-310.pyc b/country_by_country/img_table_extraction/__pycache__/camelot_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..771a984db1ea12c9d6967474744494af092c15bf Binary files /dev/null and b/country_by_country/img_table_extraction/__pycache__/camelot_extractor.cpython-310.pyc differ diff --git a/country_by_country/img_table_extraction/__pycache__/llama_parse_extractor.cpython-310.pyc b/country_by_country/img_table_extraction/__pycache__/llama_parse_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3c6676b22562463a3453b59b1dc74951fe11aae Binary files /dev/null and b/country_by_country/img_table_extraction/__pycache__/llama_parse_extractor.cpython-310.pyc differ diff --git a/country_by_country/img_table_extraction/__pycache__/unstructured.cpython-310.pyc b/country_by_country/img_table_extraction/__pycache__/unstructured.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dbed716d35ef794624938ddfa8429c013a735d47 Binary files /dev/null and b/country_by_country/img_table_extraction/__pycache__/unstructured.cpython-310.pyc differ diff --git a/country_by_country/models/decision_tree_model.joblib b/country_by_country/models/decision_tree_model.joblib new file mode 100644 index 0000000000000000000000000000000000000000..f894953306811bbe27719c303c816116d83f5641 Binary files /dev/null and b/country_by_country/models/decision_tree_model.joblib differ diff --git a/country_by_country/models/random_forest_country_names.pkl b/country_by_country/models/random_forest_country_names.pkl new file mode 100644 index 0000000000000000000000000000000000000000..918955ae773f5560b39b908a1c7e87b4321a0c38 Binary files /dev/null and b/country_by_country/models/random_forest_country_names.pkl differ diff --git a/country_by_country/models/random_forest_keywords.pkl b/country_by_country/models/random_forest_keywords.pkl new file mode 100644 index 0000000000000000000000000000000000000000..726a8d0d2571521e585acbc9b8168f483c256d4b Binary files /dev/null and b/country_by_country/models/random_forest_keywords.pkl differ diff --git a/country_by_country/models/random_forest_model_high_false_positive.joblib b/country_by_country/models/random_forest_model_high_false_positive.joblib new file mode 100644 index 0000000000000000000000000000000000000000..9e9da75614956e7a251e438e2333103f0be801f0 Binary files /dev/null and b/country_by_country/models/random_forest_model_high_false_positive.joblib differ diff --git a/country_by_country/models/random_forest_model_low_false_positive.joblib b/country_by_country/models/random_forest_model_low_false_positive.joblib new file mode 100644 index 0000000000000000000000000000000000000000..3048e46e9bc367768114c14be7607f057a824fdf Binary files /dev/null and b/country_by_country/models/random_forest_model_low_false_positive.joblib differ diff --git a/country_by_country/pagefilter/__init__.py b/country_by_country/pagefilter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..13a5d677bfbd3452eec0430675f83fcfec266b69 --- /dev/null +++ b/country_by_country/pagefilter/__init__.py @@ -0,0 +1,41 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports + +# Local imports +from .copy_as_is import CopyAsIs +from .from_filename import FromFilename +from .rf_classifier import RFClassifier + + +def from_config(config: dict) -> CopyAsIs | FromFilename: + filter_type = config["type"] + if "params" in config: + params = config["params"] + + if filter_type == "CopyAsIs": + return CopyAsIs() + elif filter_type == "FromFilename": + return FromFilename() + elif filter_type == "RFClassifier": + return RFClassifier(**params) diff --git a/country_by_country/pagefilter/__pycache__/__init__.cpython-310.pyc b/country_by_country/pagefilter/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bdf02658bb65cc448d91905aaece9e56bf28380f Binary files /dev/null and b/country_by_country/pagefilter/__pycache__/__init__.cpython-310.pyc differ diff --git a/country_by_country/pagefilter/__pycache__/copy_as_is.cpython-310.pyc b/country_by_country/pagefilter/__pycache__/copy_as_is.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5daec95b4452ea02496d9ca31d207a09e20b469d Binary files /dev/null and b/country_by_country/pagefilter/__pycache__/copy_as_is.cpython-310.pyc differ diff --git a/country_by_country/pagefilter/__pycache__/filter_pages.cpython-310.pyc b/country_by_country/pagefilter/__pycache__/filter_pages.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e83d2a89efc38c3652b99a25523f56d6b57448f2 Binary files /dev/null and b/country_by_country/pagefilter/__pycache__/filter_pages.cpython-310.pyc differ diff --git a/country_by_country/pagefilter/__pycache__/from_filename.cpython-310.pyc b/country_by_country/pagefilter/__pycache__/from_filename.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a9044b46fa08eac93d72ac2cfc95a4728a9eed7b Binary files /dev/null and b/country_by_country/pagefilter/__pycache__/from_filename.cpython-310.pyc differ diff --git a/country_by_country/pagefilter/__pycache__/rf_classifier.cpython-310.pyc b/country_by_country/pagefilter/__pycache__/rf_classifier.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..23f6388552400c409c51037b99aaaa50769b59ea Binary files /dev/null and b/country_by_country/pagefilter/__pycache__/rf_classifier.cpython-310.pyc differ diff --git a/country_by_country/pagefilter/copy_as_is.py b/country_by_country/pagefilter/copy_as_is.py new file mode 100644 index 0000000000000000000000000000000000000000..cc607e5d6d3716319a39e9c0104d446551c7c651 --- /dev/null +++ b/country_by_country/pagefilter/copy_as_is.py @@ -0,0 +1,51 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# External imports +import pypdf + + +class CopyAsIs: + """ + Dummy filter just copying the source pdf to a target + temporary file + """ + + def __init__(self) -> None: + pass + + def __call__(self, pdf_filepath: str, assets: dict) -> None: + """ + Basically keeps all the pages of the original document + Writes assets: + src_pdf: the original pdf filepath + selected_pages : list of selected pages + """ + + reader = pypdf.PdfReader(pdf_filepath) + n_pages = len(reader.pages) + + if assets is not None: + assets["pagefilter"] = { + "src_pdf": pdf_filepath, + "selected_pages": list(range(n_pages)), + } diff --git a/country_by_country/pagefilter/from_filename.py b/country_by_country/pagefilter/from_filename.py new file mode 100644 index 0000000000000000000000000000000000000000..6531856d2b2887e1d6eae6d357de499081569e8d --- /dev/null +++ b/country_by_country/pagefilter/from_filename.py @@ -0,0 +1,79 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +from pathlib import Path + +NUM_PAGE_FIELDS = 2 + + +class FromFilename: + """ + Filtering from filename. This filter expects the filename + of the pdf contains either the page or a page range of interest + explicitely given in the filename as : + + /dir/containing/the/filename_of_the_report_#1.pdf + /dif/containing/the/filename_of_the_report_#1-#2.pdf + + where #1 is a single page + #1-#2 is a page range + """ + + def __init__(self) -> None: + pass + + def __call__(self, pdf_filepath: str, assets: dict) -> None: + """ + Reads and processes a pdf from its filepath + It writes the filtered pdf as a temporary pdf + The filepath of this temporary pdf is returned + + Writes assets: + src_pdf: the original pdf filepath + target_pdf: the temporary target pdf filepath + selected_pages : list of selected pages + """ + + # Get the page or page range from the filename + src_filename = Path(pdf_filepath).name + + # We remove the extension, split on "_" and keep the last field + pagefield = src_filename[:-4].split("_")[-1] + selected_pages = [] + + if pagefield.isnumeric(): + selected_pages = [int(pagefield) - 1] + else: + pagefields = pagefield.split("-") + if ( + len(pagefields) == NUM_PAGE_FIELDS + and pagefields[0].isnumeric() + and pagefields[1].isnumeric() + ): + selected_pages = list(range(int(pagefields[0]) - 1, int(pagefields[1]))) + + if assets is not None: + assets["pagefilter"] = { + "src_pdf": pdf_filepath, + "selected_pages": selected_pages, + } diff --git a/country_by_country/pagefilter/rf_classifier.py b/country_by_country/pagefilter/rf_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..59654664f3e9f207c1fe429b4882b9497415fa72 --- /dev/null +++ b/country_by_country/pagefilter/rf_classifier.py @@ -0,0 +1,153 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard import +import pickle +import pkgutil +import tempfile + +# External imports +import joblib +import numpy as np +import pypdf + + +class FeatureExtractor: + """ + A class to extract the features of a page as required by the random forest + classifier + """ + + def __init__(self, keywords: list[str], all_country_names: list[str]) -> None: + """ + Arguments: + keywords: the keywords to count from the page text content + all_country_names: the country names/flags to count in the page content + """ + self.all_country_names = all_country_names + self.keywords = keywords + + def number_country_names(self, text: str) -> int: + """ + Computes and returns the total number of occurence of any of the the + country names + """ + return sum([text.count(country) for country in self.all_country_names]) + + def keyword(self, text: str, keyword: str) -> int: + """ + Computes and returns the number of occurence of the specific keyword + """ + return text.count(keyword) + + def __call__(self, text: str) -> np.array: + """ + Extracts the feature vector from the text + The features we extract are: + - nb_country: the total number of country names in the page + - keywords: how many times a string in the list of keywords is contained in the page + + A typical list of keywords is : + ["tax","countr","country by country","country-by-country","report","cbc",\ + "revenu","transparen","ethic","incom","employ","benefi","asset","contrib",\ + "profit","accrued","jurisdiction","sales","ebt","paid","stated","accu","tangible",\ + "fte", "expense", "related","headcount","capital","turnover","retained","current",\ + "plant","work","intragroup","remuneration","debt","contribution","per country"] + """ + features = [self.number_country_names(text)] + features.extend([self.keyword(text, keyword_i) for keyword_i in self.keywords]) + return features + + +class RFClassifier: + """ + RandomForest classifier of whether a page contains a CbCR table or not + This randomforest decides from the text content of the page and is unable + to detect a page where a CbCR table would be included as an image + """ + + def __init__(self, modelfile: str) -> None: + # Access the model bundled in the package + data = pkgutil.get_data( + "country_by_country", + f"models/{modelfile}", + ) + keywords = pickle.loads( + pkgutil.get_data("country_by_country", "models/random_forest_keywords.pkl"), + ).split(",") + + all_country_names = pickle.loads( + pkgutil.get_data( + "country_by_country", + "models/random_forest_country_names.pkl", + ), + ) + self.feature_extractor = FeatureExtractor(keywords, all_country_names) + # Unpack the data in a temporary file that joblib can then load + with tempfile.NamedTemporaryFile("wb", delete=False) as fp: + fp.write(data) + fp.close() + self.clf = joblib.load(fp.name) + + def __call__(self, pdf_filepath: str, assets: dict) -> None: + """ + Reads and processes a pdf from its filepath + It writes the filtered pdf as a temporary pdf + The filepath of this temporary pdf is returned + + Writes assets: + src_pdf: the original pdf filepath + target_pdf: the temporary target pdf filepath + selected_pages : List of int + """ + + reader = pypdf.PdfReader(pdf_filepath) + + # Extract the features from all the pages + page_features = [] + for p in reader.pages: + content = p.extract_text().lower() + page_features.append(self.feature_extractor(content)) + + # features is now num_pages x num_features_per_page + page_features = np.array(page_features) + n_pages, n_features_per_page = page_features.shape + + # Concatenate the features of the previous page and the next page + # the random forest expects + # [features_page_{i-1}, features_page_{i}, features_pages_{i+1}] + features = np.zeros((n_pages, 3 * n_features_per_page)) + features[1:, :n_features_per_page] = page_features[:-1] + features[:, n_features_per_page:-n_features_per_page] = page_features + features[:-1, -n_features_per_page:] = page_features[1:] + + # Performs the prediction + predictions = self.clf.predict(features) + + # And now we keep only the pages that have been selected + selected_pages = [ip for ip, keep_p in enumerate(predictions) if keep_p] + + if assets is not None: + assets["pagefilter"] = { + "src_pdf": pdf_filepath, + "selected_pages": selected_pages, + } diff --git a/country_by_country/processor.py b/country_by_country/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..68005c8c12267fe69609a3d85a6d0826ffdcd81f --- /dev/null +++ b/country_by_country/processor.py @@ -0,0 +1,87 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import logging + +# Local imports +from . import pagefilter, table_extraction +from .utils.utils import keep_pages + + +class ReportProcessor: + def __init__(self, config: dict) -> None: + # Report filter + self.page_filter = pagefilter.from_config(config["pagefilter"]) + + self.table_extractors = [] + self.table_cleaners = [] + + # Tables extraction + if "table_extraction" in config: + table_extractors = config["table_extraction"] + self.table_extractors = [ + table_extraction.from_config(name) for name in table_extractors + ] + + # Table cleaning & reformatting + # We can do this step only if we had table extraction algorithms + # otherwise, the assets will not be available + #if "table_cleaning" in config: + # table_cleaners = config["table_cleaning"] + # self.table_cleaners = [ + # table_cleaning.from_config(name) for name in table_cleaners + # ] + + def process(self, pdf_filepath: str) -> dict: + logging.info(f"Processing {pdf_filepath}") + + assets = { + "pagefilter": {}, + "table_extractors": [], + "table_cleaners": [], + } + + # Identifying the pages to extract + self.page_filter(pdf_filepath, assets) + + # Now that we identified the pages to be extracted, we extract them + # Note, in a GUI, we could ask the user to the change the content of + # assets["pagefilter"]["selected_pages"] before selecting the pages + pdf_to_process = keep_pages( + pdf_filepath, + assets["pagefilter"]["selected_pages"], + ) + + # Process the selected pages to detect the tables and extract + # their contents + for table_extractor in self.table_extractors: + new_asset = table_extractor(pdf_to_process) + assets["table_extractors"].append(new_asset) + + # Give the parsed content to the cleaner stage for getting organized data + #for table_cleaner in self.table_cleaners: + # for asset in assets["table_extractors"]: + # new_asset = table_cleaner(asset) + # assets["table_cleaners"].append(new_asset) + + return assets diff --git a/country_by_country/table_cleaning/__init__.py b/country_by_country/table_cleaning/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..833d9183fc71957eb22137568a988ba363e83084 --- /dev/null +++ b/country_by_country/table_cleaning/__init__.py @@ -0,0 +1,34 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Local imports +from .llm_cleaner import LLMCleaner + + +def from_config(config: dict) -> LLMCleaner: + extractor_type = config["type"] + extractor_params = {} + if "params" in config: + extractor_params = config["params"] + if extractor_type == "LLM": + return LLMCleaner(**extractor_params) + return None diff --git a/country_by_country/table_cleaning/__pycache__/__init__.cpython-310.pyc b/country_by_country/table_cleaning/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8aad3ba5f60dd49aa893a8dcf565247094ffeae9 Binary files /dev/null and b/country_by_country/table_cleaning/__pycache__/__init__.cpython-310.pyc differ diff --git a/country_by_country/table_cleaning/__pycache__/llm_cleaner.cpython-310.pyc b/country_by_country/table_cleaning/__pycache__/llm_cleaner.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6e8c20dc463688cd5b0cf8dd9f2a1d2e4061cf4 Binary files /dev/null and b/country_by_country/table_cleaning/__pycache__/llm_cleaner.cpython-310.pyc differ diff --git a/country_by_country/table_cleaning/llm_cleaner.py b/country_by_country/table_cleaning/llm_cleaner.py new file mode 100644 index 0000000000000000000000000000000000000000..b1ef4ec90423ce13f31df55d84ff69a5ebb3f59a --- /dev/null +++ b/country_by_country/table_cleaning/llm_cleaner.py @@ -0,0 +1,183 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import logging +import uuid + +import pandas as pd + +# External imports +from IPython.display import display +from langchain.prompts import PromptTemplate +from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser +from langchain_core.pydantic_v1 import BaseModel, Field +from langchain_openai import ChatOpenAI + +from country_by_country.utils import constants + + +class LLMCleaner: + def __init__(self, **kwargs: dict) -> None: + """ + Builds a table cleaner, by extracting clean data from tables + extracted during table extraction stage. + The kwargs given to the constructor are directly propagated + to the LLMCleaner constructor. + You are free to define any parameter LLMCleaner recognizes. + """ + self.kwargs = kwargs + self.type = "llm_cleaner" + self.openai_model = self.kwargs["openai_model"] + + def __call__(self, asset: dict) -> dict: + logging.info("\nKicking off cleaning stage...") + logging.info(f"Cleaning type: {self.type}, with params: {self.kwargs}") + logging.info( + f"Input extraction type: {asset['type']}, with params: {asset['params']}", + ) + + # Extract tables from previous stage + tables = asset["tables"] + + logging.info(f"Pulling {len(tables)} tables from extraction stage") + + # Convert tables to html to add to LLM prompt + html_tables = [table.to_html() for table in tables] + + # Define our LLM model + model = ChatOpenAI(temperature=0, model=self.openai_model) + + # ---------- CHAIN 1/2 - Pull countries from each table ---------- + logging.info("Starting chain 1/2: extracting country names from tables") + + # Output should have this model (a list of country names) + class CountryNames(BaseModel): + country_names: list[str] = Field( + description="Exhaustive list of countries with financial data in the table", + enum=constants.COUNTRIES, + ) + + # Output should be a JSON with above schema + parser1 = JsonOutputParser(pydantic_object=CountryNames) + + # Prompt includes one extracted table and some JSON output formatting instructions + prompt1 = PromptTemplate( + template="Extract an exhaustive list of countries from the following table " + + "in html format:\n{table}\n{format_instructions}", + input_variables=["table"], + partial_variables={ + "format_instructions": parser1.get_format_instructions(), + }, + ) + + # Chain + chain1 = {"table": lambda x: x} | prompt1 | model | parser1 + + # Run it + responses1 = chain1.batch(html_tables, {"max_concurrency": 4}) + + # Extract country lists from responses + country_lists = [resp["country_names"] for resp in responses1] + + # ---------- CHAIN 2/2 - Pull financial data for each country ---------- + logging.info("Starting chain 2/2: extracting financial data from tables") + + # Define country data model + class Country(BaseModel): + """Financial data about a country""" + + jur_name: str = Field(..., description="Name of the country") + total_revenues: float | None = Field(None, description="Total revenues") + profit_before_tax: float | None = Field( + None, + description="Amount of profit (or loss) before tax", + ) + tax_paid: float | None = Field(None, description="Income tax paid") + tax_accrued: float | None = Field(None, description="Accrued tax") + employees: float | None = Field(None, description="Number of employees") + stated_capital: float | None = Field(None, description="Stated capital") + accumulated_earnings: float | None = Field( + None, + description="Accumulated earnings", + ) + tangible_assets: float | None = Field( + None, + description="Tangible assets other than cash and cash equivalent", + ) + + # Output should have this model (a list of country objects) + class Countries(BaseModel): + """Extracting financial data for each country""" + + countries: list[Country] + + # Output should be a JSON with above schema + parser2 = PydanticOutputParser(pydantic_object=Countries) + + # Prompt includes one extracted table and some JSON output formatting instructions + template = ( + """You are an assistant tasked with extracting financial """ + + """data about {country_list} from the following table in html format:\n + {table}\n + {format_instructions} + """ + ) + + # Set up prompt + prompt = PromptTemplate.from_template( + template, + partial_variables={ + "format_instructions": parser2.get_format_instructions(), + }, + ) + + # Chain + chain2 = ( + {"table": lambda x: x[0], "country_list": lambda x: x[1]} + | prompt + | model.with_structured_output(Countries) + ) + + # Run it + responses2 = chain2.batch( + list(zip(html_tables, country_lists, strict=True)), + {"max_concurrency": 4}, + ) + + # Merge the tables into one dataframe + df = pd.concat( + [pd.json_normalize(resp.dict()["countries"]) for resp in responses2], + ).reset_index(drop=True) + + # Display + display(df) + + # Create asset + new_asset = { + "id": uuid.uuid4(), + "type": self.type, + "params": self.kwargs, + "table": df, + } + + return new_asset diff --git a/country_by_country/table_extraction/__init__.py b/country_by_country/table_extraction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e61031c11bacb4781873afcf793f42fddefb54b2 --- /dev/null +++ b/country_by_country/table_extraction/__init__.py @@ -0,0 +1,61 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Local imports +import logging +import sys + +from .camelot_extractor import Camelot +from .from_csv import FromCSV +from .llama_parse_extractor import LlamaParseExtractor +from .unstructured import Unstructured +from .unstructured_api import UnstructuredAPI + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + + +def from_config(config: dict) -> Camelot: + extractor_type = config["type"] + extractor_params = {} + if "params" in config: + extractor_params = config["params"] + if extractor_type == "Camelot": + return Camelot(**extractor_params) + elif extractor_type == "FromCSV": + return FromCSV(**extractor_params) + elif extractor_type == "Unstructured": + return Unstructured(**extractor_params) + elif extractor_type == "UnstructuredAPI": + return UnstructuredAPI(**extractor_params) + elif extractor_type == "LlamaParse": + return LlamaParseExtractor(**extractor_params) + elif extractor_type == "ExtractTableAPI": + # This is for legacy support + # In order to be able to use ExtractTable + # for benchmarking + # Note: ExtractTable-py is not maintained anymore + # This is the reason why this case is handled in a specific way + from .extract_table_api import ExtractTableAPI + + return ExtractTableAPI(**extractor_params) + else: + logging.info(f"There are no extractors of the type : {extractor_type}") diff --git a/country_by_country/table_extraction/__pycache__/__init__.cpython-310.pyc b/country_by_country/table_extraction/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35693a4a34fcd3942b5da3328386c3a27d2e0330 Binary files /dev/null and b/country_by_country/table_extraction/__pycache__/__init__.cpython-310.pyc differ diff --git a/country_by_country/table_extraction/__pycache__/camelot_extractor.cpython-310.pyc b/country_by_country/table_extraction/__pycache__/camelot_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..abffb59f58977d1c7b992a70bc7e4ef43d179ae0 Binary files /dev/null and b/country_by_country/table_extraction/__pycache__/camelot_extractor.cpython-310.pyc differ diff --git a/country_by_country/table_extraction/__pycache__/extract_table_api.cpython-310.pyc b/country_by_country/table_extraction/__pycache__/extract_table_api.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd5334d6653b6a5958f4517f005fe1eeac46f9f2 Binary files /dev/null and b/country_by_country/table_extraction/__pycache__/extract_table_api.cpython-310.pyc differ diff --git a/country_by_country/table_extraction/__pycache__/from_csv.cpython-310.pyc b/country_by_country/table_extraction/__pycache__/from_csv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a9bce9a5a1d85d907935c76fc225e939a9ece2a Binary files /dev/null and b/country_by_country/table_extraction/__pycache__/from_csv.cpython-310.pyc differ diff --git a/country_by_country/table_extraction/__pycache__/llama_parse_extractor.cpython-310.pyc b/country_by_country/table_extraction/__pycache__/llama_parse_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aca0a199f6139bf7583f1485939028499e92e9ca Binary files /dev/null and b/country_by_country/table_extraction/__pycache__/llama_parse_extractor.cpython-310.pyc differ diff --git a/country_by_country/table_extraction/__pycache__/unstructured.cpython-310.pyc b/country_by_country/table_extraction/__pycache__/unstructured.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c98c916f734647684fc67aa8582026c63ca351f9 Binary files /dev/null and b/country_by_country/table_extraction/__pycache__/unstructured.cpython-310.pyc differ diff --git a/country_by_country/table_extraction/__pycache__/unstructured_api.cpython-310.pyc b/country_by_country/table_extraction/__pycache__/unstructured_api.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae98490ecc2ab3d3501516f70a5d792656a9fc92 Binary files /dev/null and b/country_by_country/table_extraction/__pycache__/unstructured_api.cpython-310.pyc differ diff --git a/country_by_country/table_extraction/camelot_extractor.py b/country_by_country/table_extraction/camelot_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..40753d5a9a75b3fdd734cf3a79ac5aa5e3c5cb9c --- /dev/null +++ b/country_by_country/table_extraction/camelot_extractor.py @@ -0,0 +1,57 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import logging +import uuid + +# External imports +import camelot + + +class Camelot: + def __init__(self, flavor: str) -> None: + self.flavor = flavor + self.type = "camelot" + + def __call__(self, pdf_filepath: str) -> dict: + """ + Returns asset that contain: + tables: a list of pandas dataframe of the parsed tables + """ + logging.info("\nKicking off extraction stage...") + logging.info(f"Extraction type: {self.type}, with params: {self.flavor}") + + tables = camelot.read_pdf(pdf_filepath, flavor=self.flavor) + + # Write the parsed tables into the assets + tables_list = [t.df for t in tables] + + # Create asset + new_asset = { + "id": uuid.uuid4(), + "type": "camelot", + "params": {"flavor": self.flavor}, + "tables": tables_list, + } + + return new_asset diff --git a/country_by_country/table_extraction/extract_table_api.py b/country_by_country/table_extraction/extract_table_api.py new file mode 100644 index 0000000000000000000000000000000000000000..86b1b3af6c0e779d0b0635c4c47d40edb91cd684 --- /dev/null +++ b/country_by_country/table_extraction/extract_table_api.py @@ -0,0 +1,63 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import os +import uuid + +# External imports +try: + from ExtractTable import ExtractTable +except ImportError as e: + + class ExtractTableModuleException(Exception): + def __init__(self) -> None: + super().__init__("You must install ExtractTable : pip install ExtractTable") + + raise ExtractTableModuleException() from e + + +class ExtractTableAPI: + def __init__(self) -> None: + api_key = os.getenv("EXTRACT_TABLE_API_KEY") + self.extract_table = ExtractTable(api_key) + + def __call__(self, pdf_filepath: str) -> None: + """ + Writes assets: + ntables: the number of detected tables + tables: a list of pandas dataframe of the parsed tables + """ + tables_list = self.extract_table.process_file( + filepath=pdf_filepath, + pages="all", + output_format="df", + ) + + # Create asset + new_asset = { + "id": uuid.uuid4(), + "type": "ExtractTableAPI", + "tables": tables_list, + } + + return new_asset diff --git a/country_by_country/table_extraction/from_csv.py b/country_by_country/table_extraction/from_csv.py new file mode 100644 index 0000000000000000000000000000000000000000..72caa6954d0f026a93cd72ffa2a21257459f0adc --- /dev/null +++ b/country_by_country/table_extraction/from_csv.py @@ -0,0 +1,56 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import glob +import uuid +from pathlib import Path + +# External imports +import pandas as pd + + +class FromCSV: + def __init__(self, csv_directory: str) -> None: + self.csv_directory = csv_directory + + def __call__(self, pdf_filepath: str) -> dict: + """ + Returns asset that contain: + + """ + # Load the tables from matching csv files + # Given a report /path/to/{company_name}_{year}*.pdf + # Tables are searched in /csv_directory/{company_name}_{year}*.csv + report_basename = "_".join(Path(pdf_filepath).stem.split("_")[0:2]) + tables_files = glob.glob(f"{self.csv_directory}/{report_basename}*.csv") + tables_list = [pd.read_csv(f) for f in tables_files] + + # Create asset + new_asset = { + "id": uuid.uuid4(), + "type": "from_csv", + "params": {"csv_directory": self.csv_directory}, + "tables": tables_list, + } + + return new_asset diff --git a/country_by_country/table_extraction/llama_parse_extractor.py b/country_by_country/table_extraction/llama_parse_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..ec52ee08e6232bc78890beed38f7fda635443e51 --- /dev/null +++ b/country_by_country/table_extraction/llama_parse_extractor.py @@ -0,0 +1,79 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import logging +import uuid + +# External imports +import nest_asyncio +import pandas as pd +from llama_parse import LlamaParse + + +class LlamaParseExtractor: + def __init__(self, **kwargs: dict) -> None: + """ + Builds a pdf page parser, looking for tables using + the llama_parse library. + The kwargs given to the constructor are directly propagated + to the LlamaParse constructor. + You are free to define any parameter LlamaParse recognizes + """ + self.kwargs = kwargs + self.type = "llama_parse" + + # llama-parse is async-first + nest_asyncio.apply() + + def __call__(self, pdf_filepath: str) -> dict: + logging.info("\nKicking off extraction stage...") + logging.info(f"Extraction type: {self.type}, with params: {self.kwargs}") + + json_objs = LlamaParse(**self.kwargs).get_json_result(pdf_filepath) + + tables_list = [] + for page in json_objs[0]["pages"]: + for item in page["items"]: + if item["type"] == "table": + # If the number of columns in the header row is greater than the data rows + header_length = len(item["rows"][0]) + + for i in range(1, len(item["rows"])): + while len(item["rows"][i]) < header_length: + item["rows"][i].append("No Extract ") + while len(item["rows"][i]) > header_length: + item["rows"][0].append("No Extract ") + header_length = len(item["rows"][0]) + + df = pd.DataFrame(item["rows"][1:], columns=item["rows"][0]) + tables_list.append(df) + + # Create asset + new_asset = { + "id": uuid.uuid4(), + "type": self.type, + "params": self.kwargs, + "tables": tables_list, + } + + return new_asset diff --git a/country_by_country/table_extraction/unstructured.py b/country_by_country/table_extraction/unstructured.py new file mode 100644 index 0000000000000000000000000000000000000000..a00b997e317745636f9b49243f6ac2e4c9862442 --- /dev/null +++ b/country_by_country/table_extraction/unstructured.py @@ -0,0 +1,69 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import logging +import uuid + +# External imports +from io import StringIO + +import pandas as pd +from unstructured.partition.pdf import partition_pdf + + +class Unstructured: + def __init__(self, **kwargs: dict) -> dict: + """ + Builds a pdf page parser, looking for tables using + the unstructured library. + The kwargs given to the constructor are directly propagated + to the partition_pdf function. + You are free to define any parameter partition_pdf recognizes + """ + self.kwargs = kwargs + self.type = "unstructured" + + def __call__(self, pdf_filepath: str) -> dict: + logging.info("\nKicking off extraction stage...") + logging.info(f"Extraction type: {self.type}, with params: {self.kwargs}") + + elements = partition_pdf( + pdf_filepath, + infer_table_structure=True, + strategy="hi_res", + **self.kwargs, + ) + tables_list = [el for el in elements if el.category == "Table"] + tables_list = [ + pd.read_html(StringIO(t.metadata.text_as_html))[0] for t in tables_list + ] + + # Create asset + new_asset = { + "id": uuid.uuid4(), + "type": "unstructured", + "params": self.kwargs, + "tables": tables_list, + } + + return new_asset diff --git a/country_by_country/table_extraction/unstructured_api.py b/country_by_country/table_extraction/unstructured_api.py new file mode 100644 index 0000000000000000000000000000000000000000..9ea035300fe599b5b9758ec87a797df2818fbb6b --- /dev/null +++ b/country_by_country/table_extraction/unstructured_api.py @@ -0,0 +1,98 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import logging + +# External imports +import os +import uuid +from io import StringIO +from pathlib import Path + +import pandas as pd +from unstructured_client import UnstructuredClient +from unstructured_client.models import shared + + +class UnstructuredAPI: + def __init__(self, **kwargs: dict) -> dict: + """ + Builds a pdf page parser, looking for tables using + the unstructured.io api. + The kwargs given to the constructor are directly propagated + to the partition_pdf function. + You are free to define any parameter partition_pdf recognizes + """ + self.kwargs = kwargs + self.type = "unstructured_api" + + def __call__(self, pdf_filepath: str) -> dict: + logging.info("\nKicking off extraction stage...") + logging.info(f"Extraction type: {self.type}, with params: {self.kwargs}") + + s = UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) + + with Path(pdf_filepath).open("rb") as f: + # Note that this currently only supports a single file + files = shared.Files( + content=f.read(), + file_name=pdf_filepath, + ) + + req = shared.PartitionParameters( + files=files, + strategy="hi_res", + pdf_infer_table_structure="True", + **self.kwargs, + ) + + try: + resp = s.general.partition(req) + except Exception as e: + print(e) + else: + tables_list = [] + for el in resp.elements: + if el["type"] == "Table": + # Enclose in try block to ignore case when pandas can't read the table + # Happens when the html is incorrectly formatted + try: + table = pd.read_html(StringIO(el["metadata"]["text_as_html"]))[ + 0 + ] + except Exception: + logging.info( + "Html table discarded. Pandas couldn't read the table.", + ) + else: + tables_list.append(table) + + # Create asset + new_asset = { + "id": uuid.uuid4(), + "type": "unstructured_api", + "params": self.kwargs, + "tables": tables_list, + } + + return new_asset diff --git a/country_by_country/utils/__pycache__/constants.cpython-310.pyc b/country_by_country/utils/__pycache__/constants.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c570e5de98fa7b125a9a971d0fa3cd7ab856467 Binary files /dev/null and b/country_by_country/utils/__pycache__/constants.cpython-310.pyc differ diff --git a/country_by_country/utils/__pycache__/utils.cpython-310.pyc b/country_by_country/utils/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e900a553bd71687fa83d12a64ec689aaa9311dd5 Binary files /dev/null and b/country_by_country/utils/__pycache__/utils.cpython-310.pyc differ diff --git a/country_by_country/utils/constants.py b/country_by_country/utils/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..9a6beed92b8e2aba09e477410ba723b326556301 --- /dev/null +++ b/country_by_country/utils/constants.py @@ -0,0 +1,4342 @@ +# List include united nation member countries and dependents as Jersey, +# list might need to be completed +COUNTRIES = [ + "Afghanistan", + "Albania", + "Algeria", + "Andorra", + "Angola", + "Antigua and Barbuda", + "Argentina", + "Armenia", + "Australia", + "Austria", + "Azerbaijan", + "Bahamas", + "Bahrain", + "Bangladesh", + "Barbados", + "Belarus", + "Belgium", + "Belize", + "Benin", + "Bhutan", + "Bolivia", + "Bosnia and Herzegovina", + "Botswana", + "Brazil", + "Brunei", + "Bulgaria", + "Burkina Faso", + "Burundi", + "Cabo Verde", + "Cayman Islands", + "Cambodia", + "Cameroon", + "Canada", + "Central African Republic", + "Chad", + "Chile", + "China", + "Colombia", + "Comoros", + "Congo", + "Costa Rica", + "Croatia", + "Cuba", + "Cyprus", + "Czechia (Czech Republic)", + "Democratic Republic of the Congo", + "Denmark", + "Djibouti", + "Dominica", + "Dominican Republic", + "Ecuador", + "Egypt", + "El Salvador", + "Equatorial Guinea", + "Eritrea", + "Estonia", + "Eswatini (fmr. 'Swaziland')", + "Ethiopia", + "Fiji", + "Finland", + "France", + "Gabon", + "Gambia", + "Georgia", + "Germany", + "Ghana", + "Greece", + "Grenada", + "Greenland", + "Guatemala", + "Guinea", + "Guinea-Bissau", + "Guyana", + "Haiti", + "Honduras", + "Hungary", + "Iceland", + "India", + "Indonesia", + "Iran", + "Iraq", + "Ireland", + "Israel", + "Italy", + "Jamaica", + "Japan", + "Jersey", + "Jordan", + "Kazakhstan", + "Kenya", + "Kiribati", + "Kuwait", + "Kyrgyzstan", + "Laos", + "Latvia", + "Lebanon", + "Lesotho", + "Liberia", + "Libya", + "Liechtenstein", + "Lithuania", + "Luxembourg", + "Madagascar", + "Malawi", + "Malaysia", + "Maldives", + "Mali", + "Malta", + "Marshall Islands", + "Mauritania", + "Mauritius", + "Mexico", + "Micronesia", + "Moldova", + "Monaco", + "Mongolia", + "Montenegro", + "Morocco", + "Mozambique", + "Myanmar (formerly Burma)", + "Namibia", + "Nauru", + "Nepal", + "Netherlands", + "New Zealand", + "Nicaragua", + "Niger", + "Nigeria", + "North Korea", + "North Macedonia (formerly Macedonia)", + "Norway", + "Oman", + "Pakistan", + "Palau", + "Panama", + "Papua New Guinea", + "Paraguay", + "Peru", + "Philippines", + "Poland", + "Portugal", + "Qatar", + "Romania", + "Russia", + "Rwanda", + "Saint Kitts and Nevis", + "Saint Lucia", + "Saint Vincent and the Grenadines", + "Samoa", + "San Marino", + "Sao Tome and Principe", + "Saudi Arabia", + "Senegal", + "Serbia", + "Seychelles", + "Sierra Leone", + "Singapore", + "Slovakia", + "Slovenia", + "Solomon Islands", + "Somalia", + "South Africa", + "South Korea", + "South Sudan", + "Spain", + "Sri Lanka", + "Sudan", + "Suriname", + "Sweden", + "Switzerland", + "Syria", + "Taiwan", + "Tajikistan", + "Tanzania", + "Thailand", + "Timor-Leste", + "Togo", + "Tonga", + "Trinidad and Tobago", + "Tunisia", + "Turkey", + "Turkmenistan", + "Tuvalu", + "Uganda", + "Ukraine", + "United Arab Emirates", + "United Kingdom", + "United States", + "Uruguay", + "Uzbekistan", + "Vanuatu", + "Venezuela", + "Vietnam", + "Yemen", + "Zambia", + "Zambia", + "Zimbabwe", +] + +JURIDICTIONS = { + "Afghanistan": { + "Alpha-2 code": "AF", + "Alpha-3 code": "AFG", + "Numeric code": "4", + "Latitude (average)": "33", + "Longitude (average)": "65", + }, + "Albania": { + "Alpha-2 code": "AL", + "Alpha-3 code": "ALB", + "Numeric code": "8", + "Latitude (average)": "41", + "Longitude (average)": "20", + }, + "Algeria": { + "Alpha-2 code": "DZ", + "Alpha-3 code": "DZA", + "Numeric code": "12", + "Latitude (average)": "28", + "Longitude (average)": "3", + }, + "American Samoa": { + "Alpha-2 code": "AS", + "Alpha-3 code": "ASM", + "Numeric code": "16", + "Latitude (average)": "-14.3333", + "Longitude (average)": "-170", + }, + "Andorra": { + "Alpha-2 code": "AD", + "Alpha-3 code": "AND", + "Numeric code": "20", + "Latitude (average)": "42.5", + "Longitude (average)": "1.6", + }, + "Angola": { + "Alpha-2 code": "AO", + "Alpha-3 code": "AGO", + "Numeric code": "24", + "Latitude (average)": "-12.5", + "Longitude (average)": "18.5", + }, + "Anguilla": { + "Alpha-2 code": "AI", + "Alpha-3 code": "AIA", + "Numeric code": "660", + "Latitude (average)": "18.25", + "Longitude (average)": "-63.1667", + }, + "Antarctica": { + "Alpha-2 code": "AQ", + "Alpha-3 code": "ATA", + "Numeric code": "10", + "Latitude (average)": "-90", + "Longitude (average)": "0", + }, + "Antigua and Barbuda": { + "Alpha-2 code": "AG", + "Alpha-3 code": "ATG", + "Numeric code": "28", + "Latitude (average)": "17.05", + "Longitude (average)": "-61.8", + }, + "Argentina": { + "Alpha-2 code": "AR", + "Alpha-3 code": "ARG", + "Numeric code": "32", + "Latitude (average)": "-34", + "Longitude (average)": "-64", + }, + "Armenia": { + "Alpha-2 code": "AM", + "Alpha-3 code": "ARM", + "Numeric code": "51", + "Latitude (average)": "40", + "Longitude (average)": "45", + }, + "Aruba": { + "Alpha-2 code": "AW", + "Alpha-3 code": "ABW", + "Numeric code": "533", + "Latitude (average)": "12.5", + "Longitude (average)": "-69.9667", + }, + "Australia": { + "Alpha-2 code": "AU", + "Alpha-3 code": "AUS", + "Numeric code": "36", + "Latitude (average)": "-27", + "Longitude (average)": "133", + }, + "Austria": { + "Alpha-2 code": "AT", + "Alpha-3 code": "AUT", + "Numeric code": "40", + "Latitude (average)": "47.3333", + "Longitude (average)": "13.3333", + }, + "Azerbaijan": { + "Alpha-2 code": "AZ", + "Alpha-3 code": "AZE", + "Numeric code": "31", + "Latitude (average)": "40.5", + "Longitude (average)": "47.5", + }, + "Bahamas": { + "Alpha-2 code": "BS", + "Alpha-3 code": "BHS", + "Numeric code": "44", + "Latitude (average)": "24.25", + "Longitude (average)": "-76", + }, + "Bahrain": { + "Alpha-2 code": "BH", + "Alpha-3 code": "BHR", + "Numeric code": "48", + "Latitude (average)": "26", + "Longitude (average)": "50.55", + }, + "Bangladesh": { + "Alpha-2 code": "BD", + "Alpha-3 code": "BGD", + "Numeric code": "50", + "Latitude (average)": "24", + "Longitude (average)": "90", + }, + "Barbados": { + "Alpha-2 code": "BB", + "Alpha-3 code": "BRB", + "Numeric code": "52", + "Latitude (average)": "13.1667", + "Longitude (average)": "-59.5333", + }, + "Belarus": { + "Alpha-2 code": "BY", + "Alpha-3 code": "BLR", + "Numeric code": "112", + "Latitude (average)": "53", + "Longitude (average)": "28", + }, + "Belgium": { + "Alpha-2 code": "BE", + "Alpha-3 code": "BEL", + "Numeric code": "56", + "Latitude (average)": "50.8333", + "Longitude (average)": "4", + }, + "Belize": { + "Alpha-2 code": "BZ", + "Alpha-3 code": "BLZ", + "Numeric code": "84", + "Latitude (average)": "17.25", + "Longitude (average)": "-88.75", + }, + "Benin": { + "Alpha-2 code": "BJ", + "Alpha-3 code": "BEN", + "Numeric code": "204", + "Latitude (average)": "9.5", + "Longitude (average)": "2.25", + }, + "Bermuda": { + "Alpha-2 code": "BM", + "Alpha-3 code": "BMU", + "Numeric code": "60", + "Latitude (average)": "32.3333", + "Longitude (average)": "-64.75", + }, + "Bhutan": { + "Alpha-2 code": "BT", + "Alpha-3 code": "BTN", + "Numeric code": "64", + "Latitude (average)": "27.5", + "Longitude (average)": "90.5", + }, + "Bolivia, Plurinational State of": { + "Alpha-2 code": "BO", + "Alpha-3 code": "BOL", + "Numeric code": "68", + "Latitude (average)": "-17", + "Longitude (average)": "-65", + }, + "Bolivia": { + "Alpha-2 code": "BO", + "Alpha-3 code": "BOL", + "Numeric code": "68", + "Latitude (average)": "-17", + "Longitude (average)": "-65", + }, + "Bosnia and Herzegovina": { + "Alpha-2 code": "BA", + "Alpha-3 code": "BIH", + "Numeric code": "70", + "Latitude (average)": "44", + "Longitude (average)": "18", + }, + "Botswana": { + "Alpha-2 code": "BW", + "Alpha-3 code": "BWA", + "Numeric code": "72", + "Latitude (average)": "-22", + "Longitude (average)": "24", + }, + "Bouvet Island": { + "Alpha-2 code": "BV", + "Alpha-3 code": "BVT", + "Numeric code": "74", + "Latitude (average)": "-54.4333", + "Longitude (average)": "3.4", + }, + "Brazil": { + "Alpha-2 code": "BR", + "Alpha-3 code": "BRA", + "Numeric code": "76", + "Latitude (average)": "-10", + "Longitude (average)": "-55", + }, + "British Indian Ocean Territory": { + "Alpha-2 code": "IO", + "Alpha-3 code": "IOT", + "Numeric code": "86", + "Latitude (average)": "-6", + "Longitude (average)": "71.5", + }, + "Brunei Darussalam": { + "Alpha-2 code": "BN", + "Alpha-3 code": "BRN", + "Numeric code": "96", + "Latitude (average)": "4.5", + "Longitude (average)": "114.6667", + }, + "Brunei": { + "Alpha-2 code": "BN", + "Alpha-3 code": "BRN", + "Numeric code": "96", + "Latitude (average)": "4.5", + "Longitude (average)": "114.6667", + }, + "Bulgaria": { + "Alpha-2 code": "BG", + "Alpha-3 code": "BGR", + "Numeric code": "100", + "Latitude (average)": "43", + "Longitude (average)": "25", + }, + "Burkina Faso": { + "Alpha-2 code": "BF", + "Alpha-3 code": "BFA", + "Numeric code": "854", + "Latitude (average)": "13", + "Longitude (average)": "-2", + }, + "Burundi": { + "Alpha-2 code": "BI", + "Alpha-3 code": "BDI", + "Numeric code": "108", + "Latitude (average)": "-3.5", + "Longitude (average)": "30", + }, + "Cambodia": { + "Alpha-2 code": "KH", + "Alpha-3 code": "KHM", + "Numeric code": "116", + "Latitude (average)": "13", + "Longitude (average)": "105", + }, + "Cameroon": { + "Alpha-2 code": "CM", + "Alpha-3 code": "CMR", + "Numeric code": "120", + "Latitude (average)": "6", + "Longitude (average)": "12", + }, + "Canada": { + "Alpha-2 code": "CA", + "Alpha-3 code": "CAN", + "Numeric code": "124", + "Latitude (average)": "60", + "Longitude (average)": "-95", + }, + "Cape Verde": { + "Alpha-2 code": "CV", + "Alpha-3 code": "CPV", + "Numeric code": "132", + "Latitude (average)": "16", + "Longitude (average)": "-24", + }, + "Cayman Islands": { + "Alpha-2 code": "KY", + "Alpha-3 code": "CYM", + "Numeric code": "136", + "Latitude (average)": "19.5", + "Longitude (average)": "-80.5", + }, + "Central African Republic": { + "Alpha-2 code": "CF", + "Alpha-3 code": "CAF", + "Numeric code": "140", + "Latitude (average)": "7", + "Longitude (average)": "21", + }, + "Chad": { + "Alpha-2 code": "TD", + "Alpha-3 code": "TCD", + "Numeric code": "148", + "Latitude (average)": "15", + "Longitude (average)": "19", + }, + "Chile": { + "Alpha-2 code": "CL", + "Alpha-3 code": "CHL", + "Numeric code": "152", + "Latitude (average)": "-30", + "Longitude (average)": "-71", + }, + "China": { + "Alpha-2 code": "CN", + "Alpha-3 code": "CHN", + "Numeric code": "156", + "Latitude (average)": "35", + "Longitude (average)": "105", + }, + "Christmas Island": { + "Alpha-2 code": "CX", + "Alpha-3 code": "CXR", + "Numeric code": "162", + "Latitude (average)": "-10.5", + "Longitude (average)": "105.6667", + }, + "Cocos (Keeling) Islands": { + "Alpha-2 code": "CC", + "Alpha-3 code": "CCK", + "Numeric code": "166", + "Latitude (average)": "-12.5", + "Longitude (average)": "96.8333", + }, + "Colombia": { + "Alpha-2 code": "CO", + "Alpha-3 code": "COL", + "Numeric code": "170", + "Latitude (average)": "4", + "Longitude (average)": "-72", + }, + "Comoros": { + "Alpha-2 code": "KM", + "Alpha-3 code": "COM", + "Numeric code": "174", + "Latitude (average)": "-12.1667", + "Longitude (average)": "44.25", + }, + "Congo": { + "Alpha-2 code": "CG", + "Alpha-3 code": "COG", + "Numeric code": "178", + "Latitude (average)": "-1", + "Longitude (average)": "15", + }, + "Congo, the Democratic Republic of the": { + "Alpha-2 code": "CD", + "Alpha-3 code": "COD", + "Numeric code": "180", + "Latitude (average)": "0", + "Longitude (average)": "25", + }, + "Cook Islands": { + "Alpha-2 code": "CK", + "Alpha-3 code": "COK", + "Numeric code": "184", + "Latitude (average)": "-21.2333", + "Longitude (average)": "-159.7667", + }, + "Costa Rica": { + "Alpha-2 code": "CR", + "Alpha-3 code": "CRI", + "Numeric code": "188", + "Latitude (average)": "10", + "Longitude (average)": "-84", + }, + "Côte d'Ivoire": { + "Alpha-2 code": "CI", + "Alpha-3 code": "CIV", + "Numeric code": "384", + "Latitude (average)": "8", + "Longitude (average)": "-5", + }, + "Ivory Coast": { + "Alpha-2 code": "CI", + "Alpha-3 code": "CIV", + "Numeric code": "384", + "Latitude (average)": "8", + "Longitude (average)": "-5", + }, + "Croatia": { + "Alpha-2 code": "HR", + "Alpha-3 code": "HRV", + "Numeric code": "191", + "Latitude (average)": "45.1667", + "Longitude (average)": "15.5", + }, + "Cuba": { + "Alpha-2 code": "CU", + "Alpha-3 code": "CUB", + "Numeric code": "192", + "Latitude (average)": "21.5", + "Longitude (average)": "-80", + }, + "Cyprus": { + "Alpha-2 code": "CY", + "Alpha-3 code": "CYP", + "Numeric code": "196", + "Latitude (average)": "35", + "Longitude (average)": "33", + }, + "Czech Republic": { + "Alpha-2 code": "CZ", + "Alpha-3 code": "CZE", + "Numeric code": "203", + "Latitude (average)": "49.75", + "Longitude (average)": "15.5", + }, + "Denmark": { + "Alpha-2 code": "DK", + "Alpha-3 code": "DNK", + "Numeric code": "208", + "Latitude (average)": "56", + "Longitude (average)": "10", + }, + "Djibouti": { + "Alpha-2 code": "DJ", + "Alpha-3 code": "DJI", + "Numeric code": "262", + "Latitude (average)": "11.5", + "Longitude (average)": "43", + }, + "Dominica": { + "Alpha-2 code": "DM", + "Alpha-3 code": "DMA", + "Numeric code": "212", + "Latitude (average)": "15.4167", + "Longitude (average)": "-61.3333", + }, + "Dominican Republic": { + "Alpha-2 code": "DO", + "Alpha-3 code": "DOM", + "Numeric code": "214", + "Latitude (average)": "19", + "Longitude (average)": "-70.6667", + }, + "Ecuador": { + "Alpha-2 code": "EC", + "Alpha-3 code": "ECU", + "Numeric code": "218", + "Latitude (average)": "-2", + "Longitude (average)": "-77.5", + }, + "Egypt": { + "Alpha-2 code": "EG", + "Alpha-3 code": "EGY", + "Numeric code": "818", + "Latitude (average)": "27", + "Longitude (average)": "30", + }, + "El Salvador": { + "Alpha-2 code": "SV", + "Alpha-3 code": "SLV", + "Numeric code": "222", + "Latitude (average)": "13.8333", + "Longitude (average)": "-88.9167", + }, + "Equatorial Guinea": { + "Alpha-2 code": "GQ", + "Alpha-3 code": "GNQ", + "Numeric code": "226", + "Latitude (average)": "2", + "Longitude (average)": "10", + }, + "Eritrea": { + "Alpha-2 code": "ER", + "Alpha-3 code": "ERI", + "Numeric code": "232", + "Latitude (average)": "15", + "Longitude (average)": "39", + }, + "Estonia": { + "Alpha-2 code": "EE", + "Alpha-3 code": "EST", + "Numeric code": "233", + "Latitude (average)": "59", + "Longitude (average)": "26", + }, + "Ethiopia": { + "Alpha-2 code": "ET", + "Alpha-3 code": "ETH", + "Numeric code": "231", + "Latitude (average)": "8", + "Longitude (average)": "38", + }, + "Falkland Islands (Malvinas)": { + "Alpha-2 code": "FK", + "Alpha-3 code": "FLK", + "Numeric code": "238", + "Latitude (average)": "-51.75", + "Longitude (average)": "-59", + }, + "Faroe Islands": { + "Alpha-2 code": "FO", + "Alpha-3 code": "FRO", + "Numeric code": "234", + "Latitude (average)": "62", + "Longitude (average)": "-7", + }, + "Fiji": { + "Alpha-2 code": "FJ", + "Alpha-3 code": "FJI", + "Numeric code": "242", + "Latitude (average)": "-18", + "Longitude (average)": "175", + }, + "Finland": { + "Alpha-2 code": "FI", + "Alpha-3 code": "FIN", + "Numeric code": "246", + "Latitude (average)": "64", + "Longitude (average)": "26", + }, + "France": { + "Alpha-2 code": "FR", + "Alpha-3 code": "FRA", + "Numeric code": "250", + "Latitude (average)": "46", + "Longitude (average)": "2", + }, + "French Guiana": { + "Alpha-2 code": "GF", + "Alpha-3 code": "GUF", + "Numeric code": "254", + "Latitude (average)": "4", + "Longitude (average)": "-53", + }, + "French Polynesia": { + "Alpha-2 code": "PF", + "Alpha-3 code": "PYF", + "Numeric code": "258", + "Latitude (average)": "-15", + "Longitude (average)": "-140", + }, + "French Southern Territories": { + "Alpha-2 code": "TF", + "Alpha-3 code": "ATF", + "Numeric code": "260", + "Latitude (average)": "-43", + "Longitude (average)": "67", + }, + "Gabon": { + "Alpha-2 code": "GA", + "Alpha-3 code": "GAB", + "Numeric code": "266", + "Latitude (average)": "-1", + "Longitude (average)": "11.75", + }, + "Gambia": { + "Alpha-2 code": "GM", + "Alpha-3 code": "GMB", + "Numeric code": "270", + "Latitude (average)": "13.4667", + "Longitude (average)": "-16.5667", + }, + "Georgia": { + "Alpha-2 code": "GE", + "Alpha-3 code": "GEO", + "Numeric code": "268", + "Latitude (average)": "42", + "Longitude (average)": "43.5", + }, + "Germany": { + "Alpha-2 code": "DE", + "Alpha-3 code": "DEU", + "Numeric code": "276", + "Latitude (average)": "51", + "Longitude (average)": "9", + }, + "Ghana": { + "Alpha-2 code": "GH", + "Alpha-3 code": "GHA", + "Numeric code": "288", + "Latitude (average)": "8", + "Longitude (average)": "-2", + }, + "Gibraltar": { + "Alpha-2 code": "GI", + "Alpha-3 code": "GIB", + "Numeric code": "292", + "Latitude (average)": "36.1833", + "Longitude (average)": "-5.3667", + }, + "Greece": { + "Alpha-2 code": "GR", + "Alpha-3 code": "GRC", + "Numeric code": "300", + "Latitude (average)": "39", + "Longitude (average)": "22", + }, + "Greenland": { + "Alpha-2 code": "GL", + "Alpha-3 code": "GRL", + "Numeric code": "304", + "Latitude (average)": "72", + "Longitude (average)": "-40", + }, + "Grenada": { + "Alpha-2 code": "GD", + "Alpha-3 code": "GRD", + "Numeric code": "308", + "Latitude (average)": "12.1167", + "Longitude (average)": "-61.6667", + }, + "Guadeloupe": { + "Alpha-2 code": "GP", + "Alpha-3 code": "GLP", + "Numeric code": "312", + "Latitude (average)": "16.25", + "Longitude (average)": "-61.5833", + }, + "Guam": { + "Alpha-2 code": "GU", + "Alpha-3 code": "GUM", + "Numeric code": "316", + "Latitude (average)": "13.4667", + "Longitude (average)": "144.7833", + }, + "Guatemala": { + "Alpha-2 code": "GT", + "Alpha-3 code": "GTM", + "Numeric code": "320", + "Latitude (average)": "15.5", + "Longitude (average)": "-90.25", + }, + "Guernsey": { + "Alpha-2 code": "GG", + "Alpha-3 code": "GGY", + "Numeric code": "831", + "Latitude (average)": "49.5", + "Longitude (average)": "-2.56", + }, + "Guinea": { + "Alpha-2 code": "GN", + "Alpha-3 code": "GIN", + "Numeric code": "324", + "Latitude (average)": "11", + "Longitude (average)": "-10", + }, + "Guinea-Bissau": { + "Alpha-2 code": "GW", + "Alpha-3 code": "GNB", + "Numeric code": "624", + "Latitude (average)": "12", + "Longitude (average)": "-15", + }, + "Guyana": { + "Alpha-2 code": "GY", + "Alpha-3 code": "GUY", + "Numeric code": "328", + "Latitude (average)": "5", + "Longitude (average)": "-59", + }, + "Haiti": { + "Alpha-2 code": "HT", + "Alpha-3 code": "HTI", + "Numeric code": "332", + "Latitude (average)": "19", + "Longitude (average)": "-72.4167", + }, + "Heard Island and McDonald Islands": { + "Alpha-2 code": "HM", + "Alpha-3 code": "HMD", + "Numeric code": "334", + "Latitude (average)": "-53.1", + "Longitude (average)": "72.5167", + }, + "Holy See (Vatican City State)": { + "Alpha-2 code": "VA", + "Alpha-3 code": "VAT", + "Numeric code": "336", + "Latitude (average)": "41.9", + "Longitude (average)": "12.45", + }, + "Honduras": { + "Alpha-2 code": "HN", + "Alpha-3 code": "HND", + "Numeric code": "340", + "Latitude (average)": "15", + "Longitude (average)": "-86.5", + }, + "Hong Kong": { + "Alpha-2 code": "HK", + "Alpha-3 code": "HKG", + "Numeric code": "344", + "Latitude (average)": "22.25", + "Longitude (average)": "114.1667", + }, + "Hungary": { + "Alpha-2 code": "HU", + "Alpha-3 code": "HUN", + "Numeric code": "348", + "Latitude (average)": "47", + "Longitude (average)": "20", + }, + "Iceland": { + "Alpha-2 code": "IS", + "Alpha-3 code": "ISL", + "Numeric code": "352", + "Latitude (average)": "65", + "Longitude (average)": "-18", + }, + "India": { + "Alpha-2 code": "IN", + "Alpha-3 code": "IND", + "Numeric code": "356", + "Latitude (average)": "20", + "Longitude (average)": "77", + }, + "Indonesia": { + "Alpha-2 code": "ID", + "Alpha-3 code": "IDN", + "Numeric code": "360", + "Latitude (average)": "-5", + "Longitude (average)": "120", + }, + "Iran, Islamic Republic of": { + "Alpha-2 code": "IR", + "Alpha-3 code": "IRN", + "Numeric code": "364", + "Latitude (average)": "32", + "Longitude (average)": "53", + }, + "Iraq": { + "Alpha-2 code": "IQ", + "Alpha-3 code": "IRQ", + "Numeric code": "368", + "Latitude (average)": "33", + "Longitude (average)": "44", + }, + "Ireland": { + "Alpha-2 code": "IE", + "Alpha-3 code": "IRL", + "Numeric code": "372", + "Latitude (average)": "53", + "Longitude (average)": "-8", + }, + "Isle of Man": { + "Alpha-2 code": "IM", + "Alpha-3 code": "IMN", + "Numeric code": "833", + "Latitude (average)": "54.23", + "Longitude (average)": "-4.55", + }, + "Israel": { + "Alpha-2 code": "IL", + "Alpha-3 code": "ISR", + "Numeric code": "376", + "Latitude (average)": "31.5", + "Longitude (average)": "34.75", + }, + "Italy": { + "Alpha-2 code": "IT", + "Alpha-3 code": "ITA", + "Numeric code": "380", + "Latitude (average)": "42.8333", + "Longitude (average)": "12.8333", + }, + "Jamaica": { + "Alpha-2 code": "JM", + "Alpha-3 code": "JAM", + "Numeric code": "388", + "Latitude (average)": "18.25", + "Longitude (average)": "-77.5", + }, + "Japan": { + "Alpha-2 code": "JP", + "Alpha-3 code": "JPN", + "Numeric code": "392", + "Latitude (average)": "36", + "Longitude (average)": "138", + }, + "Jersey": { + "Alpha-2 code": "JE", + "Alpha-3 code": "JEY", + "Numeric code": "832", + "Latitude (average)": "49.21", + "Longitude (average)": "-2.13", + }, + "Jordan": { + "Alpha-2 code": "JO", + "Alpha-3 code": "JOR", + "Numeric code": "400", + "Latitude (average)": "31", + "Longitude (average)": "36", + }, + "Kazakhstan": { + "Alpha-2 code": "KZ", + "Alpha-3 code": "KAZ", + "Numeric code": "398", + "Latitude (average)": "48", + "Longitude (average)": "68", + }, + "Kenya": { + "Alpha-2 code": "KE", + "Alpha-3 code": "KEN", + "Numeric code": "404", + "Latitude (average)": "1", + "Longitude (average)": "38", + }, + "Kiribati": { + "Alpha-2 code": "KI", + "Alpha-3 code": "KIR", + "Numeric code": "296", + "Latitude (average)": "1.4167", + "Longitude (average)": "173", + }, + "Korea, Democratic People's Republic of": { + "Alpha-2 code": "KP", + "Alpha-3 code": "PRK", + "Numeric code": "408", + "Latitude (average)": "40", + "Longitude (average)": "127", + }, + "Korea, Republic of": { + "Alpha-2 code": "KR", + "Alpha-3 code": "KOR", + "Numeric code": "410", + "Latitude (average)": "37", + "Longitude (average)": "127.5", + }, + "South Korea": { + "Alpha-2 code": "KR", + "Alpha-3 code": "KOR", + "Numeric code": "410", + "Latitude (average)": "37", + "Longitude (average)": "127.5", + }, + "Kuwait": { + "Alpha-2 code": "KW", + "Alpha-3 code": "KWT", + "Numeric code": "414", + "Latitude (average)": "29.3375", + "Longitude (average)": "47.6581", + }, + "Kyrgyzstan": { + "Alpha-2 code": "KG", + "Alpha-3 code": "KGZ", + "Numeric code": "417", + "Latitude (average)": "41", + "Longitude (average)": "75", + }, + "Lao People's Democratic Republic": { + "Alpha-2 code": "LA", + "Alpha-3 code": "LAO", + "Numeric code": "418", + "Latitude (average)": "18", + "Longitude (average)": "105", + }, + "Latvia": { + "Alpha-2 code": "LV", + "Alpha-3 code": "LVA", + "Numeric code": "428", + "Latitude (average)": "57", + "Longitude (average)": "25", + }, + "Lebanon": { + "Alpha-2 code": "LB", + "Alpha-3 code": "LBN", + "Numeric code": "422", + "Latitude (average)": "33.8333", + "Longitude (average)": "35.8333", + }, + "Lesotho": { + "Alpha-2 code": "LS", + "Alpha-3 code": "LSO", + "Numeric code": "426", + "Latitude (average)": "-29.5", + "Longitude (average)": "28.5", + }, + "Liberia": { + "Alpha-2 code": "LR", + "Alpha-3 code": "LBR", + "Numeric code": "430", + "Latitude (average)": "6.5", + "Longitude (average)": "-9.5", + }, + "Libya": { + "Alpha-2 code": "LY", + "Alpha-3 code": "LBY", + "Numeric code": "434", + "Latitude (average)": "25", + "Longitude (average)": "17", + }, + "Liechtenstein": { + "Alpha-2 code": "LI", + "Alpha-3 code": "LIE", + "Numeric code": "438", + "Latitude (average)": "47.1667", + "Longitude (average)": "9.5333", + }, + "Lithuania": { + "Alpha-2 code": "LT", + "Alpha-3 code": "LTU", + "Numeric code": "440", + "Latitude (average)": "56", + "Longitude (average)": "24", + }, + "Luxembourg": { + "Alpha-2 code": "LU", + "Alpha-3 code": "LUX", + "Numeric code": "442", + "Latitude (average)": "49.75", + "Longitude (average)": "6.1667", + }, + "Macao": { + "Alpha-2 code": "MO", + "Alpha-3 code": "MAC", + "Numeric code": "446", + "Latitude (average)": "22.1667", + "Longitude (average)": "113.55", + }, + "Macedonia, the former Yugoslav Republic of": { + "Alpha-2 code": "MK", + "Alpha-3 code": "MKD", + "Numeric code": "807", + "Latitude (average)": "41.8333", + "Longitude (average)": "22", + }, + "Madagascar": { + "Alpha-2 code": "MG", + "Alpha-3 code": "MDG", + "Numeric code": "450", + "Latitude (average)": "-20", + "Longitude (average)": "47", + }, + "Malawi": { + "Alpha-2 code": "MW", + "Alpha-3 code": "MWI", + "Numeric code": "454", + "Latitude (average)": "-13.5", + "Longitude (average)": "34", + }, + "Malaysia": { + "Alpha-2 code": "MY", + "Alpha-3 code": "MYS", + "Numeric code": "458", + "Latitude (average)": "2.5", + "Longitude (average)": "112.5", + }, + "Maldives": { + "Alpha-2 code": "MV", + "Alpha-3 code": "MDV", + "Numeric code": "462", + "Latitude (average)": "3.25", + "Longitude (average)": "73", + }, + "Mali": { + "Alpha-2 code": "ML", + "Alpha-3 code": "MLI", + "Numeric code": "466", + "Latitude (average)": "17", + "Longitude (average)": "-4", + }, + "Malta": { + "Alpha-2 code": "MT", + "Alpha-3 code": "MLT", + "Numeric code": "470", + "Latitude (average)": "35.8333", + "Longitude (average)": "14.5833", + }, + "Marshall Islands": { + "Alpha-2 code": "MH", + "Alpha-3 code": "MHL", + "Numeric code": "584", + "Latitude (average)": "9", + "Longitude (average)": "168", + }, + "Martinique": { + "Alpha-2 code": "MQ", + "Alpha-3 code": "MTQ", + "Numeric code": "474", + "Latitude (average)": "14.6667", + "Longitude (average)": "-61", + }, + "Mauritania": { + "Alpha-2 code": "MR", + "Alpha-3 code": "MRT", + "Numeric code": "478", + "Latitude (average)": "20", + "Longitude (average)": "-12", + }, + "Mauritius": { + "Alpha-2 code": "MU", + "Alpha-3 code": "MUS", + "Numeric code": "480", + "Latitude (average)": "-20.2833", + "Longitude (average)": "57.55", + }, + "Mayotte": { + "Alpha-2 code": "YT", + "Alpha-3 code": "MYT", + "Numeric code": "175", + "Latitude (average)": "-12.8333", + "Longitude (average)": "45.1667", + }, + "Mexico": { + "Alpha-2 code": "MX", + "Alpha-3 code": "MEX", + "Numeric code": "484", + "Latitude (average)": "23", + "Longitude (average)": "-102", + }, + "Micronesia, Federated States of": { + "Alpha-2 code": "FM", + "Alpha-3 code": "FSM", + "Numeric code": "583", + "Latitude (average)": "6.9167", + "Longitude (average)": "158.25", + }, + "Moldova, Republic of": { + "Alpha-2 code": "MD", + "Alpha-3 code": "MDA", + "Numeric code": "498", + "Latitude (average)": "47", + "Longitude (average)": "29", + }, + "Monaco": { + "Alpha-2 code": "MC", + "Alpha-3 code": "MCO", + "Numeric code": "492", + "Latitude (average)": "43.7333", + "Longitude (average)": "7.4", + }, + "Mongolia": { + "Alpha-2 code": "MN", + "Alpha-3 code": "MNG", + "Numeric code": "496", + "Latitude (average)": "46", + "Longitude (average)": "105", + }, + "Montenegro": { + "Alpha-2 code": "ME", + "Alpha-3 code": "MNE", + "Numeric code": "499", + "Latitude (average)": "42", + "Longitude (average)": "19", + }, + "Montserrat": { + "Alpha-2 code": "MS", + "Alpha-3 code": "MSR", + "Numeric code": "500", + "Latitude (average)": "16.75", + "Longitude (average)": "-62.2", + }, + "Morocco": { + "Alpha-2 code": "MA", + "Alpha-3 code": "MAR", + "Numeric code": "504", + "Latitude (average)": "32", + "Longitude (average)": "-5", + }, + "Mozambique": { + "Alpha-2 code": "MZ", + "Alpha-3 code": "MOZ", + "Numeric code": "508", + "Latitude (average)": "-18.25", + "Longitude (average)": "35", + }, + "Myanmar": { + "Alpha-2 code": "MM", + "Alpha-3 code": "MMR", + "Numeric code": "104", + "Latitude (average)": "22", + "Longitude (average)": "98", + }, + "Burma": { + "Alpha-2 code": "MM", + "Alpha-3 code": "MMR", + "Numeric code": "104", + "Latitude (average)": "22", + "Longitude (average)": "98", + }, + "Namibia": { + "Alpha-2 code": "NA", + "Alpha-3 code": "NAM", + "Numeric code": "516", + "Latitude (average)": "-22", + "Longitude (average)": "17", + }, + "Nauru": { + "Alpha-2 code": "NR", + "Alpha-3 code": "NRU", + "Numeric code": "520", + "Latitude (average)": "-0.5333", + "Longitude (average)": "166.9167", + }, + "Nepal": { + "Alpha-2 code": "NP", + "Alpha-3 code": "NPL", + "Numeric code": "524", + "Latitude (average)": "28", + "Longitude (average)": "84", + }, + "Netherlands": { + "Alpha-2 code": "NL", + "Alpha-3 code": "NLD", + "Numeric code": "528", + "Latitude (average)": "52.5", + "Longitude (average)": "5.75", + }, + "Netherlands Antilles": { + "Alpha-2 code": "AN", + "Alpha-3 code": "ANT", + "Numeric code": "530", + "Latitude (average)": "12.25", + "Longitude (average)": "-68.75", + }, + "New Caledonia": { + "Alpha-2 code": "NC", + "Alpha-3 code": "NCL", + "Numeric code": "540", + "Latitude (average)": "-21.5", + "Longitude (average)": "165.5", + }, + "New Zealand": { + "Alpha-2 code": "NZ", + "Alpha-3 code": "NZL", + "Numeric code": "554", + "Latitude (average)": "-41", + "Longitude (average)": "174", + }, + "Nicaragua": { + "Alpha-2 code": "NI", + "Alpha-3 code": "NIC", + "Numeric code": "558", + "Latitude (average)": "13", + "Longitude (average)": "-85", + }, + "Niger": { + "Alpha-2 code": "NE", + "Alpha-3 code": "NER", + "Numeric code": "562", + "Latitude (average)": "16", + "Longitude (average)": "8", + }, + "Nigeria": { + "Alpha-2 code": "NG", + "Alpha-3 code": "NGA", + "Numeric code": "566", + "Latitude (average)": "10", + "Longitude (average)": "8", + }, + "Niue": { + "Alpha-2 code": "NU", + "Alpha-3 code": "NIU", + "Numeric code": "570", + "Latitude (average)": "-19.0333", + "Longitude (average)": "-169.8667", + }, + "Norfolk Island": { + "Alpha-2 code": "NF", + "Alpha-3 code": "NFK", + "Numeric code": "574", + "Latitude (average)": "-29.0333", + "Longitude (average)": "167.95", + }, + "Northern Mariana Islands": { + "Alpha-2 code": "MP", + "Alpha-3 code": "MNP", + "Numeric code": "580", + "Latitude (average)": "15.2", + "Longitude (average)": "145.75", + }, + "Norway": { + "Alpha-2 code": "NO", + "Alpha-3 code": "NOR", + "Numeric code": "578", + "Latitude (average)": "62", + "Longitude (average)": "10", + }, + "Oman": { + "Alpha-2 code": "OM", + "Alpha-3 code": "OMN", + "Numeric code": "512", + "Latitude (average)": "21", + "Longitude (average)": "57", + }, + "Pakistan": { + "Alpha-2 code": "PK", + "Alpha-3 code": "PAK", + "Numeric code": "586", + "Latitude (average)": "30", + "Longitude (average)": "70", + }, + "Palau": { + "Alpha-2 code": "PW", + "Alpha-3 code": "PLW", + "Numeric code": "585", + "Latitude (average)": "7.5", + "Longitude (average)": "134.5", + }, + "Palestinian Territory, Occupied": { + "Alpha-2 code": "PS", + "Alpha-3 code": "PSE", + "Numeric code": "275", + "Latitude (average)": "32", + "Longitude (average)": "35.25", + }, + "Panama": { + "Alpha-2 code": "PA", + "Alpha-3 code": "PAN", + "Numeric code": "591", + "Latitude (average)": "9", + "Longitude (average)": "-80", + }, + "Papua New Guinea": { + "Alpha-2 code": "PG", + "Alpha-3 code": "PNG", + "Numeric code": "598", + "Latitude (average)": "-6", + "Longitude (average)": "147", + }, + "Paraguay": { + "Alpha-2 code": "PY", + "Alpha-3 code": "PRY", + "Numeric code": "600", + "Latitude (average)": "-23", + "Longitude (average)": "-58", + }, + "Peru": { + "Alpha-2 code": "PE", + "Alpha-3 code": "PER", + "Numeric code": "604", + "Latitude (average)": "-10", + "Longitude (average)": "-76", + }, + "Philippines": { + "Alpha-2 code": "PH", + "Alpha-3 code": "PHL", + "Numeric code": "608", + "Latitude (average)": "13", + "Longitude (average)": "122", + }, + "Pitcairn": { + "Alpha-2 code": "PN", + "Alpha-3 code": "PCN", + "Numeric code": "612", + "Latitude (average)": "-24.7", + "Longitude (average)": "-127.4", + }, + "Poland": { + "Alpha-2 code": "PL", + "Alpha-3 code": "POL", + "Numeric code": "616", + "Latitude (average)": "52", + "Longitude (average)": "20", + }, + "Portugal": { + "Alpha-2 code": "PT", + "Alpha-3 code": "PRT", + "Numeric code": "620", + "Latitude (average)": "39.5", + "Longitude (average)": "-8", + }, + "Puerto Rico": { + "Alpha-2 code": "PR", + "Alpha-3 code": "PRI", + "Numeric code": "630", + "Latitude (average)": "18.25", + "Longitude (average)": "-66.5", + }, + "Qatar": { + "Alpha-2 code": "QA", + "Alpha-3 code": "QAT", + "Numeric code": "634", + "Latitude (average)": "25.5", + "Longitude (average)": "51.25", + }, + "Réunion": { + "Alpha-2 code": "RE", + "Alpha-3 code": "REU", + "Numeric code": "638", + "Latitude (average)": "-21.1", + "Longitude (average)": "55.6", + }, + "Romania": { + "Alpha-2 code": "RO", + "Alpha-3 code": "ROU", + "Numeric code": "642", + "Latitude (average)": "46", + "Longitude (average)": "25", + }, + "Russian Federation": { + "Alpha-2 code": "RU", + "Alpha-3 code": "RUS", + "Numeric code": "643", + "Latitude (average)": "60", + "Longitude (average)": "100", + }, + "Russia": { + "Alpha-2 code": "RU", + "Alpha-3 code": "RUS", + "Numeric code": "643", + "Latitude (average)": "60", + "Longitude (average)": "100", + }, + "Rwanda": { + "Alpha-2 code": "RW", + "Alpha-3 code": "RWA", + "Numeric code": "646", + "Latitude (average)": "-2", + "Longitude (average)": "30", + }, + "Saint Helena, Ascension and Tristan da Cunha": { + "Alpha-2 code": "SH", + "Alpha-3 code": "SHN", + "Numeric code": "654", + "Latitude (average)": "-15.9333", + "Longitude (average)": "-5.7", + }, + "Saint Kitts and Nevis": { + "Alpha-2 code": "KN", + "Alpha-3 code": "KNA", + "Numeric code": "659", + "Latitude (average)": "17.3333", + "Longitude (average)": "-62.75", + }, + "Saint Lucia": { + "Alpha-2 code": "LC", + "Alpha-3 code": "LCA", + "Numeric code": "662", + "Latitude (average)": "13.8833", + "Longitude (average)": "-61.1333", + }, + "Saint Pierre and Miquelon": { + "Alpha-2 code": "PM", + "Alpha-3 code": "SPM", + "Numeric code": "666", + "Latitude (average)": "46.8333", + "Longitude (average)": "-56.3333", + }, + "Saint Vincent and the Grenadines": { + "Alpha-2 code": "VC", + "Alpha-3 code": "VCT", + "Numeric code": "670", + "Latitude (average)": "13.25", + "Longitude (average)": "-61.2", + }, + "Samoa": { + "Alpha-2 code": "WS", + "Alpha-3 code": "WSM", + "Numeric code": "882", + "Latitude (average)": "-13.5833", + "Longitude (average)": "-172.3333", + }, + "San Marino": { + "Alpha-2 code": "SM", + "Alpha-3 code": "SMR", + "Numeric code": "674", + "Latitude (average)": "43.7667", + "Longitude (average)": "12.4167", + }, + "Sao Tome and Principe": { + "Alpha-2 code": "ST", + "Alpha-3 code": "STP", + "Numeric code": "678", + "Latitude (average)": "1", + "Longitude (average)": "7", + }, + "Saudi Arabia": { + "Alpha-2 code": "SA", + "Alpha-3 code": "SAU", + "Numeric code": "682", + "Latitude (average)": "25", + "Longitude (average)": "45", + }, + "Senegal": { + "Alpha-2 code": "SN", + "Alpha-3 code": "SEN", + "Numeric code": "686", + "Latitude (average)": "14", + "Longitude (average)": "-14", + }, + "Serbia": { + "Alpha-2 code": "RS", + "Alpha-3 code": "SRB", + "Numeric code": "688", + "Latitude (average)": "44", + "Longitude (average)": "21", + }, + "Seychelles": { + "Alpha-2 code": "SC", + "Alpha-3 code": "SYC", + "Numeric code": "690", + "Latitude (average)": "-4.5833", + "Longitude (average)": "55.6667", + }, + "Sierra Leone": { + "Alpha-2 code": "SL", + "Alpha-3 code": "SLE", + "Numeric code": "694", + "Latitude (average)": "8.5", + "Longitude (average)": "-11.5", + }, + "Singapore": { + "Alpha-2 code": "SG", + "Alpha-3 code": "SGP", + "Numeric code": "702", + "Latitude (average)": "1.3667", + "Longitude (average)": "103.8", + }, + "Slovakia": { + "Alpha-2 code": "SK", + "Alpha-3 code": "SVK", + "Numeric code": "703", + "Latitude (average)": "48.6667", + "Longitude (average)": "19.5", + }, + "Slovenia": { + "Alpha-2 code": "SI", + "Alpha-3 code": "SVN", + "Numeric code": "705", + "Latitude (average)": "46", + "Longitude (average)": "15", + }, + "Solomon Islands": { + "Alpha-2 code": "SB", + "Alpha-3 code": "SLB", + "Numeric code": "90", + "Latitude (average)": "-8", + "Longitude (average)": "159", + }, + "Somalia": { + "Alpha-2 code": "SO", + "Alpha-3 code": "SOM", + "Numeric code": "706", + "Latitude (average)": "10", + "Longitude (average)": "49", + }, + "South Africa": { + "Alpha-2 code": "ZA", + "Alpha-3 code": "ZAF", + "Numeric code": "710", + "Latitude (average)": "-29", + "Longitude (average)": "24", + }, + "South Georgia and the South Sandwich Islands": { + "Alpha-2 code": "GS", + "Alpha-3 code": "SGS", + "Numeric code": "239", + "Latitude (average)": "-54.5", + "Longitude (average)": "-37", + }, + "South Sudan": { + "Alpha-2 code": "SS", + "Alpha-3 code": "SSD", + "Numeric code": "728", + "Latitude (average)": "8", + "Longitude (average)": "30", + }, + "Spain": { + "Alpha-2 code": "ES", + "Alpha-3 code": "ESP", + "Numeric code": "724", + "Latitude (average)": "40", + "Longitude (average)": "-4", + }, + "Sri Lanka": { + "Alpha-2 code": "LK", + "Alpha-3 code": "LKA", + "Numeric code": "144", + "Latitude (average)": "7", + "Longitude (average)": "81", + }, + "Sudan": { + "Alpha-2 code": "SD", + "Alpha-3 code": "SDN", + "Numeric code": "736", + "Latitude (average)": "15", + "Longitude (average)": "30", + }, + "Suriname": { + "Alpha-2 code": "SR", + "Alpha-3 code": "SUR", + "Numeric code": "740", + "Latitude (average)": "4", + "Longitude (average)": "-56", + }, + "Svalbard and Jan Mayen": { + "Alpha-2 code": "SJ", + "Alpha-3 code": "SJM", + "Numeric code": "744", + "Latitude (average)": "78", + "Longitude (average)": "20", + }, + "Swaziland": { + "Alpha-2 code": "SZ", + "Alpha-3 code": "SWZ", + "Numeric code": "748", + "Latitude (average)": "-26.5", + "Longitude (average)": "31.5", + }, + "Sweden": { + "Alpha-2 code": "SE", + "Alpha-3 code": "SWE", + "Numeric code": "752", + "Latitude (average)": "62", + "Longitude (average)": "15", + }, + "Switzerland": { + "Alpha-2 code": "CH", + "Alpha-3 code": "CHE", + "Numeric code": "756", + "Latitude (average)": "47", + "Longitude (average)": "8", + }, + "Syrian Arab Republic": { + "Alpha-2 code": "SY", + "Alpha-3 code": "SYR", + "Numeric code": "760", + "Latitude (average)": "35", + "Longitude (average)": "38", + }, + "Taiwan, Province of China": { + "Alpha-2 code": "TW", + "Alpha-3 code": "TWN", + "Numeric code": "158", + "Latitude (average)": "23.5", + "Longitude (average)": "121", + }, + "Taiwan": { + "Alpha-2 code": "TW", + "Alpha-3 code": "TWN", + "Numeric code": "158", + "Latitude (average)": "23.5", + "Longitude (average)": "121", + }, + "Tajikistan": { + "Alpha-2 code": "TJ", + "Alpha-3 code": "TJK", + "Numeric code": "762", + "Latitude (average)": "39", + "Longitude (average)": "71", + }, + "Tanzania, United Republic of": { + "Alpha-2 code": "TZ", + "Alpha-3 code": "TZA", + "Numeric code": "834", + "Latitude (average)": "-6", + "Longitude (average)": "35", + }, + "Thailand": { + "Alpha-2 code": "TH", + "Alpha-3 code": "THA", + "Numeric code": "764", + "Latitude (average)": "15", + "Longitude (average)": "100", + }, + "Timor-Leste": { + "Alpha-2 code": "TL", + "Alpha-3 code": "TLS", + "Numeric code": "626", + "Latitude (average)": "-8.55", + "Longitude (average)": "125.5167", + }, + "Togo": { + "Alpha-2 code": "TG", + "Alpha-3 code": "TGO", + "Numeric code": "768", + "Latitude (average)": "8", + "Longitude (average)": "1.1667", + }, + "Tokelau": { + "Alpha-2 code": "TK", + "Alpha-3 code": "TKL", + "Numeric code": "772", + "Latitude (average)": "-9", + "Longitude (average)": "-172", + }, + "Tonga": { + "Alpha-2 code": "TO", + "Alpha-3 code": "TON", + "Numeric code": "776", + "Latitude (average)": "-20", + "Longitude (average)": "-175", + }, + "Trinidad and Tobago": { + "Alpha-2 code": "TT", + "Alpha-3 code": "TTO", + "Numeric code": "780", + "Latitude (average)": "11", + "Longitude (average)": "-61", + }, + "Tunisia": { + "Alpha-2 code": "TN", + "Alpha-3 code": "TUN", + "Numeric code": "788", + "Latitude (average)": "34", + "Longitude (average)": "9", + }, + "Turkey": { + "Alpha-2 code": "TR", + "Alpha-3 code": "TUR", + "Numeric code": "792", + "Latitude (average)": "39", + "Longitude (average)": "35", + }, + "Turkmenistan": { + "Alpha-2 code": "TM", + "Alpha-3 code": "TKM", + "Numeric code": "795", + "Latitude (average)": "40", + "Longitude (average)": "60", + }, + "Turks and Caicos Islands": { + "Alpha-2 code": "TC", + "Alpha-3 code": "TCA", + "Numeric code": "796", + "Latitude (average)": "21.75", + "Longitude (average)": "-71.5833", + }, + "Tuvalu": { + "Alpha-2 code": "TV", + "Alpha-3 code": "TUV", + "Numeric code": "798", + "Latitude (average)": "-8", + "Longitude (average)": "178", + }, + "Uganda": { + "Alpha-2 code": "UG", + "Alpha-3 code": "UGA", + "Numeric code": "800", + "Latitude (average)": "1", + "Longitude (average)": "32", + }, + "Ukraine": { + "Alpha-2 code": "UA", + "Alpha-3 code": "UKR", + "Numeric code": "804", + "Latitude (average)": "49", + "Longitude (average)": "32", + }, + "United Arab Emirates": { + "Alpha-2 code": "AE", + "Alpha-3 code": "ARE", + "Numeric code": "784", + "Latitude (average)": "24", + "Longitude (average)": "54", + }, + "United Kingdom": { + "Alpha-2 code": "GB", + "Alpha-3 code": "GBR", + "Numeric code": "826", + "Latitude (average)": "54", + "Longitude (average)": "-2", + }, + "United States": { + "Alpha-2 code": "US", + "Alpha-3 code": "USA", + "Numeric code": "840", + "Latitude (average)": "38", + "Longitude (average)": "-97", + }, + "United States Minor Outlying Islands": { + "Alpha-2 code": "UM", + "Alpha-3 code": "UMI", + "Numeric code": "581", + "Latitude (average)": "19.2833", + "Longitude (average)": "166.6", + }, + "Uruguay": { + "Alpha-2 code": "UY", + "Alpha-3 code": "URY", + "Numeric code": "858", + "Latitude (average)": "-33", + "Longitude (average)": "-56", + }, + "Uzbekistan": { + "Alpha-2 code": "UZ", + "Alpha-3 code": "UZB", + "Numeric code": "860", + "Latitude (average)": "41", + "Longitude (average)": "64", + }, + "Vanuatu": { + "Alpha-2 code": "VU", + "Alpha-3 code": "VUT", + "Numeric code": "548", + "Latitude (average)": "-16", + "Longitude (average)": "167", + }, + "Venezuela": { + "Alpha-2 code": "VE", + "Alpha-3 code": "VEN", + "Numeric code": "862", + "Latitude (average)": "8", + "Longitude (average)": "-66", + }, + "Viet Nam": { + "Alpha-2 code": "VN", + "Alpha-3 code": "VNM", + "Numeric code": "704", + "Latitude (average)": "16", + "Longitude (average)": "106", + }, + "Vietnam": { + "Alpha-2 code": "VN", + "Alpha-3 code": "VNM", + "Numeric code": "704", + "Latitude (average)": "16", + "Longitude (average)": "106", + }, + "Virgin Islands, British": { + "Alpha-2 code": "VG", + "Alpha-3 code": "VGB", + "Numeric code": "92", + "Latitude (average)": "18.5", + "Longitude (average)": "-64.5", + }, + "Virgin Islands, U.S.": { + "Alpha-2 code": "VI", + "Alpha-3 code": "VIR", + "Numeric code": "850", + "Latitude (average)": "18.3333", + "Longitude (average)": "-64.8333", + }, + "Wallis and Futuna": { + "Alpha-2 code": "WF", + "Alpha-3 code": "WLF", + "Numeric code": "876", + "Latitude (average)": "-13.3", + "Longitude (average)": "-176.2", + }, + "Western Sahara": { + "Alpha-2 code": "EH", + "Alpha-3 code": "ESH", + "Numeric code": "732", + "Latitude (average)": "24.5", + "Longitude (average)": "-13", + }, + "Yemen": { + "Alpha-2 code": "YE", + "Alpha-3 code": "YEM", + "Numeric code": "887", + "Latitude (average)": "15", + "Longitude (average)": "48", + }, + "Zambia": { + "Alpha-2 code": "ZM", + "Alpha-3 code": "ZMB", + "Numeric code": "894", + "Latitude (average)": "-15", + "Longitude (average)": "30", + }, + "Zimbabwe": { + "Alpha-2 code": "ZW", + "Alpha-3 code": "ZWE", + "Numeric code": "716", + "Latitude (average)": "-20", + "Longitude (average)": "30", + }, +} + + +CURRENCIES = { + "AFGHANISTAN": { + "Currency": "Afghani", + "AlphabeticCode": "AFA", + "NumericCode": "004", + "MinorUnit": "", + "WidthdrawalDate": "2003-01", + }, + "ÅLAND ISLANDS": { + "Currency": "Markka", + "AlphabeticCode": "FIM", + "NumericCode": "246", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "ALBANIA": { + "Currency": "Old Lek", + "AlphabeticCode": "ALK", + "NumericCode": "008", + "MinorUnit": "", + "WidthdrawalDate": "1989-12", + }, + "ALGERIA": { + "Currency": "Algerian Dinar", + "AlphabeticCode": "DZD", + "NumericCode": "012", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "AMERICAN SAMOA": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ANDORRA": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "ANGOLA": { + "Currency": "Kwanza Reajustado", + "AlphabeticCode": "AOR", + "NumericCode": "982", + "MinorUnit": "", + "WidthdrawalDate": "2000-02", + }, + "ANGUILLA": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ANTARCTICA": { + "Currency": "No universal currency", + "AlphabeticCode": "", + "NumericCode": "", + "MinorUnit": "", + "WidthdrawalDate": "", + }, + "ANTIGUA AND BARBUDA": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ARGENTINA": { + "Currency": "Peso", + "AlphabeticCode": "ARY", + "NumericCode": "032", + "MinorUnit": "", + "WidthdrawalDate": "1989 to 1990", + }, + "ARMENIA": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1994-08", + }, + "ARUBA": { + "Currency": "Aruban Florin", + "AlphabeticCode": "AWG", + "NumericCode": "533", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "AUSTRALIA": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "AUSTRIA": { + "Currency": "Schilling", + "AlphabeticCode": "ATS", + "NumericCode": "040", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "AZERBAIJAN": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1994-08", + }, + "BAHAMAS (THE)": { + "Currency": "Bahamian Dollar", + "AlphabeticCode": "BSD", + "NumericCode": "044", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BAHRAIN": { + "Currency": "Bahraini Dinar", + "AlphabeticCode": "BHD", + "NumericCode": "048", + "MinorUnit": "3", + "WidthdrawalDate": "", + }, + "BANGLADESH": { + "Currency": "Taka", + "AlphabeticCode": "BDT", + "NumericCode": "050", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BARBADOS": { + "Currency": "Barbados Dollar", + "AlphabeticCode": "BBD", + "NumericCode": "052", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BELARUS": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1994-06", + }, + "BELGIUM": { + "Currency": "Financial Franc", + "AlphabeticCode": "BEL", + "NumericCode": "992", + "MinorUnit": "", + "WidthdrawalDate": "1990-03", + }, + "BELIZE": { + "Currency": "Belize Dollar", + "AlphabeticCode": "BZD", + "NumericCode": "084", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BENIN": { + "Currency": "CFA Franc BCEAO", + "AlphabeticCode": "XOF", + "NumericCode": "952", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "BERMUDA": { + "Currency": "Bermudian Dollar", + "AlphabeticCode": "BMD", + "NumericCode": "060", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BHUTAN": { + "Currency": "Ngultrum", + "AlphabeticCode": "BTN", + "NumericCode": "064", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BOLIVIA (PLURINATIONAL STATE OF)": { + "Currency": "Mvdol", + "AlphabeticCode": "BOV", + "NumericCode": "984", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BONAIRE, SINT EUSTATIUS AND SABA": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BOSNIA AND HERZEGOVINA": { + "Currency": "Dinar", + "AlphabeticCode": "BAD", + "NumericCode": "070", + "MinorUnit": "", + "WidthdrawalDate": "1998-07", + }, + "BOTSWANA": { + "Currency": "Pula", + "AlphabeticCode": "BWP", + "NumericCode": "072", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BOUVET ISLAND": { + "Currency": "Norwegian Krone", + "AlphabeticCode": "NOK", + "NumericCode": "578", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BRAZIL": { + "Currency": "Cruzeiro Real", + "AlphabeticCode": "BRR", + "NumericCode": "987", + "MinorUnit": "", + "WidthdrawalDate": "1994-07", + }, + "BRITISH INDIAN OCEAN TERRITORY (THE)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BRUNEI DARUSSALAM": { + "Currency": "Brunei Dollar", + "AlphabeticCode": "BND", + "NumericCode": "096", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "BULGARIA": { + "Currency": "Lev", + "AlphabeticCode": "BGL", + "NumericCode": "100", + "MinorUnit": "", + "WidthdrawalDate": "2003-11", + }, + "BURKINA FASO": { + "Currency": "CFA Franc BCEAO", + "AlphabeticCode": "XOF", + "NumericCode": "952", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "BURUNDI": { + "Currency": "Burundi Franc", + "AlphabeticCode": "BIF", + "NumericCode": "108", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "CABO VERDE": { + "Currency": "Cabo Verde Escudo", + "AlphabeticCode": "CVE", + "NumericCode": "132", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CAMBODIA": { + "Currency": "Riel", + "AlphabeticCode": "KHR", + "NumericCode": "116", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CAMEROON": { + "Currency": "CFA Franc BEAC", + "AlphabeticCode": "XAF", + "NumericCode": "950", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "CANADA": { + "Currency": "Canadian Dollar", + "AlphabeticCode": "CAD", + "NumericCode": "124", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CAYMAN ISLANDS (THE)": { + "Currency": "Cayman Islands Dollar", + "AlphabeticCode": "KYD", + "NumericCode": "136", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CENTRAL AFRICAN REPUBLIC (THE)": { + "Currency": "CFA Franc BEAC", + "AlphabeticCode": "XAF", + "NumericCode": "950", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "CHAD": { + "Currency": "CFA Franc BEAC", + "AlphabeticCode": "XAF", + "NumericCode": "950", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "CHILE": { + "Currency": "Unidad de Fomento", + "AlphabeticCode": "CLF", + "NumericCode": "990", + "MinorUnit": "4", + "WidthdrawalDate": "", + }, + "CHINA": { + "Currency": "Yuan Renminbi", + "AlphabeticCode": "CNY", + "NumericCode": "156", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CHRISTMAS ISLAND": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "COCOS (KEELING) ISLANDS (THE)": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "COLOMBIA": { + "Currency": "Unidad de Valor Real", + "AlphabeticCode": "COU", + "NumericCode": "970", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "COMOROS (THE)": { + "Currency": "Comorian Franc ", + "AlphabeticCode": "KMF", + "NumericCode": "174", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "CONGO (THE DEMOCRATIC REPUBLIC OF THE)": { + "Currency": "Congolese Franc", + "AlphabeticCode": "CDF", + "NumericCode": "976", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CONGO (THE)": { + "Currency": "CFA Franc BEAC", + "AlphabeticCode": "XAF", + "NumericCode": "950", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "COOK ISLANDS (THE)": { + "Currency": "New Zealand Dollar", + "AlphabeticCode": "NZD", + "NumericCode": "554", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "COSTA RICA": { + "Currency": "Costa Rican Colon", + "AlphabeticCode": "CRC", + "NumericCode": "188", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CÔTE D'IVOIRE": { + "Currency": "CFA Franc BCEAO", + "AlphabeticCode": "XOF", + "NumericCode": "952", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "CROATIA": { + "Currency": "Croatian Kuna", + "AlphabeticCode": "HRK", + "NumericCode": "191", + "MinorUnit": "", + "WidthdrawalDate": "2015-06", + }, + "CUBA": { + "Currency": "Peso Convertible", + "AlphabeticCode": "CUC", + "NumericCode": "931", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CURAÇAO": { + "Currency": "Netherlands Antillean Guilder", + "AlphabeticCode": "ANG", + "NumericCode": "532", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "CYPRUS": { + "Currency": "Cyprus Pound", + "AlphabeticCode": "CYP", + "NumericCode": "196", + "MinorUnit": "", + "WidthdrawalDate": "2008-01", + }, + "CZECHIA": { + "Currency": "Czech Koruna", + "AlphabeticCode": "CZK", + "NumericCode": "203", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "DENMARK": { + "Currency": "Danish Krone", + "AlphabeticCode": "DKK", + "NumericCode": "208", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "DJIBOUTI": { + "Currency": "Djibouti Franc", + "AlphabeticCode": "DJF", + "NumericCode": "262", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "DOMINICA": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "DOMINICAN REPUBLIC (THE)": { + "Currency": "Dominican Peso", + "AlphabeticCode": "DOP", + "NumericCode": "214", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ECUADOR": { + "Currency": "Unidad de Valor Constante (UVC)", + "AlphabeticCode": "ECV", + "NumericCode": "983", + "MinorUnit": "", + "WidthdrawalDate": "2000-09", + }, + "EGYPT": { + "Currency": "Egyptian Pound", + "AlphabeticCode": "EGP", + "NumericCode": "818", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "EL SALVADOR": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "EQUATORIAL GUINEA": { + "Currency": "Ekwele", + "AlphabeticCode": "GQE", + "NumericCode": "226", + "MinorUnit": "", + "WidthdrawalDate": "1986-06", + }, + "ERITREA": { + "Currency": "Nakfa", + "AlphabeticCode": "ERN", + "NumericCode": "232", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ESTONIA": { + "Currency": "Kroon", + "AlphabeticCode": "EEK", + "NumericCode": "233", + "MinorUnit": "", + "WidthdrawalDate": "2011-01", + }, + "ESWATINI": { + "Currency": "Lilangeni", + "AlphabeticCode": "SZL", + "NumericCode": "748", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ETHIOPIA": { + "Currency": "Ethiopian Birr", + "AlphabeticCode": "ETB", + "NumericCode": "230", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "EUROPEAN UNION": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "FALKLAND ISLANDS (THE) [MALVINAS]": { + "Currency": "Falkland Islands Pound", + "AlphabeticCode": "FKP", + "NumericCode": "238", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "FAROE ISLANDS (THE)": { + "Currency": "Danish Krone", + "AlphabeticCode": "DKK", + "NumericCode": "208", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "FIJI": { + "Currency": "Fiji Dollar", + "AlphabeticCode": "FJD", + "NumericCode": "242", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "FINLAND": { + "Currency": "Markka", + "AlphabeticCode": "FIM", + "NumericCode": "246", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "FRANCE": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "FRENCH GUIANA": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "FRENCH POLYNESIA": { + "Currency": "CFP Franc", + "AlphabeticCode": "XPF", + "NumericCode": "953", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "FRENCH SOUTHERN TERRITORIES (THE)": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GABON": { + "Currency": "CFA Franc BEAC", + "AlphabeticCode": "XAF", + "NumericCode": "950", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "GAMBIA (THE)": { + "Currency": "Dalasi", + "AlphabeticCode": "GMD", + "NumericCode": "270", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GEORGIA": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1994-04", + }, + "GERMANY": { + "Currency": "Deutsche Mark", + "AlphabeticCode": "DEM", + "NumericCode": "276", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "GHANA": { + "Currency": "Ghana Cedi", + "AlphabeticCode": "GHP", + "NumericCode": "939", + "MinorUnit": "", + "WidthdrawalDate": "2007-06", + }, + "GIBRALTAR": { + "Currency": "Gibraltar Pound", + "AlphabeticCode": "GIP", + "NumericCode": "292", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GREECE": { + "Currency": "Drachma", + "AlphabeticCode": "GRD", + "NumericCode": "300", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "GREENLAND": { + "Currency": "Danish Krone", + "AlphabeticCode": "DKK", + "NumericCode": "208", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GRENADA": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GUADELOUPE": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "GUAM": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GUATEMALA": { + "Currency": "Quetzal", + "AlphabeticCode": "GTQ", + "NumericCode": "320", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GUERNSEY": { + "Currency": "Pound Sterling", + "AlphabeticCode": "GBP", + "NumericCode": "826", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "GUINEA": { + "Currency": "Syli", + "AlphabeticCode": "GNS", + "NumericCode": "324", + "MinorUnit": "", + "WidthdrawalDate": "1986-02", + }, + "GUINEA-BISSAU": { + "Currency": "Guinea-Bissau Peso", + "AlphabeticCode": "GWP", + "NumericCode": "624", + "MinorUnit": "", + "WidthdrawalDate": "1997-05", + }, + "GUYANA": { + "Currency": "Guyana Dollar", + "AlphabeticCode": "GYD", + "NumericCode": "328", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "HAITI": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "HEARD ISLAND AND McDONALD ISLANDS": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "HOLY SEE (THE)": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "HONDURAS": { + "Currency": "Lempira", + "AlphabeticCode": "HNL", + "NumericCode": "340", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "HONG KONG": { + "Currency": "Hong Kong Dollar", + "AlphabeticCode": "HKD", + "NumericCode": "344", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "HUNGARY": { + "Currency": "Forint", + "AlphabeticCode": "HUF", + "NumericCode": "348", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ICELAND": { + "Currency": "Old Krona", + "AlphabeticCode": "ISJ", + "NumericCode": "352", + "MinorUnit": "", + "WidthdrawalDate": "1989 to 1990", + }, + "INDIA": { + "Currency": "Indian Rupee", + "AlphabeticCode": "INR", + "NumericCode": "356", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "INDONESIA": { + "Currency": "Rupiah", + "AlphabeticCode": "IDR", + "NumericCode": "360", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "INTERNATIONAL MONETARY FUND (IMF)": { + "Currency": "SDR (Special Drawing Right)", + "AlphabeticCode": "XDR", + "NumericCode": "960", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "IRAN (ISLAMIC REPUBLIC OF)": { + "Currency": "Iranian Rial", + "AlphabeticCode": "IRR", + "NumericCode": "364", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "IRAQ": { + "Currency": "Iraqi Dinar", + "AlphabeticCode": "IQD", + "NumericCode": "368", + "MinorUnit": "3", + "WidthdrawalDate": "", + }, + "IRELAND": { + "Currency": "Irish Pound", + "AlphabeticCode": "IEP", + "NumericCode": "372", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "ISLE OF MAN": { + "Currency": "Pound Sterling", + "AlphabeticCode": "GBP", + "NumericCode": "826", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ISRAEL": { + "Currency": "Old Shekel", + "AlphabeticCode": "ILR", + "NumericCode": "376", + "MinorUnit": "", + "WidthdrawalDate": "1989 to 1990", + }, + "ITALY": { + "Currency": "Italian Lira", + "AlphabeticCode": "ITL", + "NumericCode": "380", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "JAMAICA": { + "Currency": "Jamaican Dollar", + "AlphabeticCode": "JMD", + "NumericCode": "388", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "JAPAN": { + "Currency": "Yen", + "AlphabeticCode": "JPY", + "NumericCode": "392", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "JERSEY": { + "Currency": "Pound Sterling", + "AlphabeticCode": "GBP", + "NumericCode": "826", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "JORDAN": { + "Currency": "Jordanian Dinar", + "AlphabeticCode": "JOD", + "NumericCode": "400", + "MinorUnit": "3", + "WidthdrawalDate": "", + }, + "KAZAKHSTAN": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1994-05", + }, + "KENYA": { + "Currency": "Kenyan Shilling", + "AlphabeticCode": "KES", + "NumericCode": "404", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "KIRIBATI": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "KOREA (THE DEMOCRATIC PEOPLE'S REPUBLIC OF)": { + "Currency": "North Korean Won", + "AlphabeticCode": "KPW", + "NumericCode": "408", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "KOREA (THE REPUBLIC OF)": { + "Currency": "Won", + "AlphabeticCode": "KRW", + "NumericCode": "410", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "KUWAIT": { + "Currency": "Kuwaiti Dinar", + "AlphabeticCode": "KWD", + "NumericCode": "414", + "MinorUnit": "3", + "WidthdrawalDate": "", + }, + "KYRGYZSTAN": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1993-01", + }, + "LAO PEOPLE'S DEMOCRATIC REPUBLIC (THE)": { + "Currency": "Lao Kip", + "AlphabeticCode": "LAK", + "NumericCode": "418", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "LATVIA": { + "Currency": "Latvian Ruble", + "AlphabeticCode": "LVR", + "NumericCode": "428", + "MinorUnit": "", + "WidthdrawalDate": "1994-12", + }, + "LEBANON": { + "Currency": "Lebanese Pound", + "AlphabeticCode": "LBP", + "NumericCode": "422", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "LESOTHO": { + "Currency": "Financial Rand", + "AlphabeticCode": "ZAL", + "NumericCode": "991", + "MinorUnit": "", + "WidthdrawalDate": "1995-03", + }, + "LIBERIA": { + "Currency": "Liberian Dollar", + "AlphabeticCode": "LRD", + "NumericCode": "430", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "LIBYA": { + "Currency": "Libyan Dinar", + "AlphabeticCode": "LYD", + "NumericCode": "434", + "MinorUnit": "3", + "WidthdrawalDate": "", + }, + "LIECHTENSTEIN": { + "Currency": "Swiss Franc", + "AlphabeticCode": "CHF", + "NumericCode": "756", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "LITHUANIA": { + "Currency": "Talonas", + "AlphabeticCode": "LTT", + "NumericCode": "440", + "MinorUnit": "", + "WidthdrawalDate": "1993-07", + }, + "LUXEMBOURG": { + "Currency": "Luxembourg Financial Franc", + "AlphabeticCode": "LUL", + "NumericCode": "988", + "MinorUnit": "", + "WidthdrawalDate": "1990-03", + }, + "MACAO": { + "Currency": "Pataca", + "AlphabeticCode": "MOP", + "NumericCode": "446", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NORTH MACEDONIA": { + "Currency": "Denar", + "AlphabeticCode": "MKD", + "NumericCode": "807", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MADAGASCAR": { + "Currency": "Malagasy Franc", + "AlphabeticCode": "MGF", + "NumericCode": "450", + "MinorUnit": "", + "WidthdrawalDate": "2004-12", + }, + "MALAWI": { + "Currency": "Kwacha", + "AlphabeticCode": "MWK", + "NumericCode": "454", + "MinorUnit": "", + "WidthdrawalDate": "2016-02", + }, + "MALAYSIA": { + "Currency": "Malaysian Ringgit", + "AlphabeticCode": "MYR", + "NumericCode": "458", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MALDIVES": { + "Currency": "Maldive Rupee", + "AlphabeticCode": "MVQ", + "NumericCode": "462", + "MinorUnit": "", + "WidthdrawalDate": "1989-12", + }, + "MALI": { + "Currency": "Mali Franc", + "AlphabeticCode": "MLF", + "NumericCode": "466", + "MinorUnit": "", + "WidthdrawalDate": "1984-11", + }, + "MALTA": { + "Currency": "Maltese Pound", + "AlphabeticCode": "MTP", + "NumericCode": "470", + "MinorUnit": "", + "WidthdrawalDate": "1983-06", + }, + "MARSHALL ISLANDS (THE)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MARTINIQUE": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "MAURITANIA": { + "Currency": "Ouguiya", + "AlphabeticCode": "MRO", + "NumericCode": "478", + "MinorUnit": "", + "WidthdrawalDate": "2017-12", + }, + "MAURITIUS": { + "Currency": "Mauritius Rupee", + "AlphabeticCode": "MUR", + "NumericCode": "480", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MAYOTTE": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "MEMBER COUNTRIES OF THE AFRICAN DEVELOPMENT BANK GROUP": { + "Currency": "ADB Unit of Account", + "AlphabeticCode": "XUA", + "NumericCode": "965", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "MEXICO": { + "Currency": "Mexican Peso", + "AlphabeticCode": "MXP", + "NumericCode": "484", + "MinorUnit": "", + "WidthdrawalDate": "1993-01", + }, + "MICRONESIA (FEDERATED STATES OF)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MOLDOVA (THE REPUBLIC OF)": { + "Currency": "Moldovan Leu", + "AlphabeticCode": "MDL", + "NumericCode": "498", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MONACO": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "MONGOLIA": { + "Currency": "Tugrik", + "AlphabeticCode": "MNT", + "NumericCode": "496", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MONTENEGRO": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MONTSERRAT": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MOROCCO": { + "Currency": "Moroccan Dirham", + "AlphabeticCode": "MAD", + "NumericCode": "504", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "MOZAMBIQUE": { + "Currency": "Mozambique Metical", + "AlphabeticCode": "MZM", + "NumericCode": "508", + "MinorUnit": "", + "WidthdrawalDate": "2006-06", + }, + "MYANMAR": { + "Currency": "Kyat", + "AlphabeticCode": "MMK", + "NumericCode": "104", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NAMIBIA": { + "Currency": "Rand", + "AlphabeticCode": "ZAR", + "NumericCode": "710", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NAURU": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NEPAL": { + "Currency": "Nepalese Rupee", + "AlphabeticCode": "NPR", + "NumericCode": "524", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NETHERLANDS (THE)": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NEW CALEDONIA": { + "Currency": "CFP Franc", + "AlphabeticCode": "XPF", + "NumericCode": "953", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "NEW ZEALAND": { + "Currency": "New Zealand Dollar", + "AlphabeticCode": "NZD", + "NumericCode": "554", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NICARAGUA": { + "Currency": "Cordoba", + "AlphabeticCode": "NIC", + "NumericCode": "558", + "MinorUnit": "", + "WidthdrawalDate": "1990-10", + }, + "NIGER (THE)": { + "Currency": "CFA Franc BCEAO", + "AlphabeticCode": "XOF", + "NumericCode": "952", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "NIGERIA": { + "Currency": "Naira", + "AlphabeticCode": "NGN", + "NumericCode": "566", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NIUE": { + "Currency": "New Zealand Dollar", + "AlphabeticCode": "NZD", + "NumericCode": "554", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NORFOLK ISLAND": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NORTHERN MARIANA ISLANDS (THE)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "NORWAY": { + "Currency": "Norwegian Krone", + "AlphabeticCode": "NOK", + "NumericCode": "578", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "OMAN": { + "Currency": "Rial Omani", + "AlphabeticCode": "OMR", + "NumericCode": "512", + "MinorUnit": "3", + "WidthdrawalDate": "", + }, + "PAKISTAN": { + "Currency": "Pakistan Rupee", + "AlphabeticCode": "PKR", + "NumericCode": "586", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "PALAU": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "PALESTINE, STATE OF": { + "Currency": "No universal currency", + "AlphabeticCode": "", + "NumericCode": "", + "MinorUnit": "", + "WidthdrawalDate": "", + }, + "PANAMA": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "PAPUA NEW GUINEA": { + "Currency": "Kina", + "AlphabeticCode": "PGK", + "NumericCode": "598", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "PARAGUAY": { + "Currency": "Guarani", + "AlphabeticCode": "PYG", + "NumericCode": "600", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "PERU": { + "Currency": "Sol", + "AlphabeticCode": "PES", + "NumericCode": "604", + "MinorUnit": "", + "WidthdrawalDate": "1986-02", + }, + "PHILIPPINES (THE)": { + "Currency": "Philippine Peso", + "AlphabeticCode": "PHP", + "NumericCode": "608", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "PITCAIRN": { + "Currency": "New Zealand Dollar", + "AlphabeticCode": "NZD", + "NumericCode": "554", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "POLAND": { + "Currency": "Zloty", + "AlphabeticCode": "PLZ", + "NumericCode": "616", + "MinorUnit": "", + "WidthdrawalDate": "1997-01", + }, + "PORTUGAL": { + "Currency": "Portuguese Escudo", + "AlphabeticCode": "PTE", + "NumericCode": "620", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "PUERTO RICO": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "QATAR": { + "Currency": "Qatari Rial", + "AlphabeticCode": "QAR", + "NumericCode": "634", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "RÉUNION": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "ROMANIA": { + "Currency": "New Romanian Leu ", + "AlphabeticCode": "RON", + "NumericCode": "946", + "MinorUnit": "", + "WidthdrawalDate": "2015-06", + }, + "RUSSIAN FEDERATION (THE)": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUB", + "NumericCode": "643", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "RWANDA": { + "Currency": "Rwanda Franc", + "AlphabeticCode": "RWF", + "NumericCode": "646", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "SAINT BARTHÉLEMY": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA": { + "Currency": "Saint Helena Pound", + "AlphabeticCode": "SHP", + "NumericCode": "654", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SAINT KITTS AND NEVIS": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SAINT LUCIA": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SAINT MARTIN (FRENCH PART)": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SAINT PIERRE AND MIQUELON": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "SAINT VINCENT AND THE GRENADINES": { + "Currency": "East Caribbean Dollar", + "AlphabeticCode": "XCD", + "NumericCode": "951", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SAMOA": { + "Currency": "Tala", + "AlphabeticCode": "WST", + "NumericCode": "882", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SAN MARINO": { + "Currency": "Italian Lira", + "AlphabeticCode": "ITL", + "NumericCode": "380", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "SAO TOME AND PRINCIPE": { + "Currency": "Dobra", + "AlphabeticCode": "STD", + "NumericCode": "678", + "MinorUnit": "", + "WidthdrawalDate": "2017-12", + }, + "SAUDI ARABIA": { + "Currency": "Saudi Riyal", + "AlphabeticCode": "SAR", + "NumericCode": "682", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SENEGAL": { + "Currency": "CFA Franc BCEAO", + "AlphabeticCode": "XOF", + "NumericCode": "952", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "SERBIA": { + "Currency": "Serbian Dinar", + "AlphabeticCode": "RSD", + "NumericCode": "941", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SEYCHELLES": { + "Currency": "Seychelles Rupee", + "AlphabeticCode": "SCR", + "NumericCode": "690", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SIERRA LEONE": { + "Currency": "Leone", + "AlphabeticCode": "SLL", + "NumericCode": "694", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SINGAPORE": { + "Currency": "Singapore Dollar", + "AlphabeticCode": "SGD", + "NumericCode": "702", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SINT MAARTEN (DUTCH PART)": { + "Currency": "Netherlands Antillean Guilder", + "AlphabeticCode": "ANG", + "NumericCode": "532", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + 'SISTEMA UNITARIO DE COMPENSACION REGIONAL DE PAGOS "SUCRE"': { + "Currency": "Sucre", + "AlphabeticCode": "XSU", + "NumericCode": "994", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "SLOVAKIA": { + "Currency": "Slovak Koruna", + "AlphabeticCode": "SKK", + "NumericCode": "703", + "MinorUnit": "", + "WidthdrawalDate": "2009-01", + }, + "SLOVENIA": { + "Currency": "Tolar", + "AlphabeticCode": "SIT", + "NumericCode": "705", + "MinorUnit": "", + "WidthdrawalDate": "2007-01", + }, + "SOLOMON ISLANDS": { + "Currency": "Solomon Islands Dollar", + "AlphabeticCode": "SBD", + "NumericCode": "090", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SOMALIA": { + "Currency": "Somali Shilling", + "AlphabeticCode": "SOS", + "NumericCode": "706", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SOUTH AFRICA": { + "Currency": "Financial Rand", + "AlphabeticCode": "ZAL", + "NumericCode": "991", + "MinorUnit": "", + "WidthdrawalDate": "1995-03", + }, + "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS": { + "Currency": "No universal currency", + "AlphabeticCode": "", + "NumericCode": "", + "MinorUnit": "", + "WidthdrawalDate": "", + }, + "SOUTH SUDAN": { + "Currency": "Sudanese Pound", + "AlphabeticCode": "SDG", + "NumericCode": "938", + "MinorUnit": "", + "WidthdrawalDate": "2012-09", + }, + "SPAIN": { + "Currency": "Spanish Peseta", + "AlphabeticCode": "ESP", + "NumericCode": "724", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "SRI LANKA": { + "Currency": "Sri Lanka Rupee", + "AlphabeticCode": "LKR", + "NumericCode": "144", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SUDAN (THE)": { + "Currency": "Sudanese Pound", + "AlphabeticCode": "SDG", + "NumericCode": "938", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SURINAME": { + "Currency": "Surinam Guilder", + "AlphabeticCode": "SRG", + "NumericCode": "740", + "MinorUnit": "", + "WidthdrawalDate": "2003-12", + }, + "SVALBARD AND JAN MAYEN": { + "Currency": "Norwegian Krone", + "AlphabeticCode": "NOK", + "NumericCode": "578", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SWEDEN": { + "Currency": "Swedish Krona", + "AlphabeticCode": "SEK", + "NumericCode": "752", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "SWITZERLAND": { + "Currency": "WIR Franc (for electronic)", + "AlphabeticCode": "CHC", + "NumericCode": "948", + "MinorUnit": "", + "WidthdrawalDate": "2004-11", + }, + "SYRIAN ARAB REPUBLIC": { + "Currency": "Syrian Pound", + "AlphabeticCode": "SYP", + "NumericCode": "760", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "TAIWAN (PROVINCE OF CHINA)": { + "Currency": "New Taiwan Dollar", + "AlphabeticCode": "TWD", + "NumericCode": "901", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "TAJIKISTAN": { + "Currency": "Tajik Ruble", + "AlphabeticCode": "TJR", + "NumericCode": "762", + "MinorUnit": "", + "WidthdrawalDate": "2001-04", + }, + "TANZANIA, UNITED REPUBLIC OF": { + "Currency": "Tanzanian Shilling", + "AlphabeticCode": "TZS", + "NumericCode": "834", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "THAILAND": { + "Currency": "Baht", + "AlphabeticCode": "THB", + "NumericCode": "764", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "TIMOR-LESTE": { + "Currency": "Timor Escudo", + "AlphabeticCode": "TPE", + "NumericCode": "626", + "MinorUnit": "", + "WidthdrawalDate": "2002-11", + }, + "TOGO": { + "Currency": "CFA Franc BCEAO", + "AlphabeticCode": "XOF", + "NumericCode": "952", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "TOKELAU": { + "Currency": "New Zealand Dollar", + "AlphabeticCode": "NZD", + "NumericCode": "554", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "TONGA": { + "Currency": "Pa'anga", + "AlphabeticCode": "TOP", + "NumericCode": "776", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "TRINIDAD AND TOBAGO": { + "Currency": "Trinidad and Tobago Dollar", + "AlphabeticCode": "TTD", + "NumericCode": "780", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "TUNISIA": { + "Currency": "Tunisian Dinar", + "AlphabeticCode": "TND", + "NumericCode": "788", + "MinorUnit": "3", + "WidthdrawalDate": "", + }, + "TURKEY": { + "Currency": "New Turkish Lira", + "AlphabeticCode": "TRY", + "NumericCode": "949", + "MinorUnit": "", + "WidthdrawalDate": "2009-01", + }, + "TURKMENISTAN": { + "Currency": "Turkmenistan Manat", + "AlphabeticCode": "TMM", + "NumericCode": "795", + "MinorUnit": "", + "WidthdrawalDate": "2009-01", + }, + "TURKS AND CAICOS ISLANDS (THE)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "TUVALU": { + "Currency": "Australian Dollar", + "AlphabeticCode": "AUD", + "NumericCode": "036", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "UGANDA": { + "Currency": "Old Shilling", + "AlphabeticCode": "UGW", + "NumericCode": "800", + "MinorUnit": "", + "WidthdrawalDate": "1989 to 1990", + }, + "UKRAINE": { + "Currency": "Karbovanet", + "AlphabeticCode": "UAK", + "NumericCode": "804", + "MinorUnit": "", + "WidthdrawalDate": "1996-09", + }, + "UNITED ARAB EMIRATES (THE)": { + "Currency": "UAE Dirham", + "AlphabeticCode": "AED", + "NumericCode": "784", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "UNITED KINGDOM OF GREAT BRITAIN AND NORTHERN IRELAND (THE)": { + "Currency": "Pound Sterling", + "AlphabeticCode": "GBP", + "NumericCode": "826", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "UNITED STATES MINOR OUTLYING ISLANDS (THE)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "UNITED STATES OF AMERICA (THE)": { + "Currency": "US Dollar (Next day)", + "AlphabeticCode": "USN", + "NumericCode": "997", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "URUGUAY": { + "Currency": "Uruguayan Peso", + "AlphabeticCode": "UYP", + "NumericCode": "858", + "MinorUnit": "", + "WidthdrawalDate": "1993-03", + }, + "UZBEKISTAN": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1994-07", + }, + "VANUATU": { + "Currency": "Vatu", + "AlphabeticCode": "VUV", + "NumericCode": "548", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "VENEZUELA (BOLIVARIAN REPUBLIC OF)": { + "Currency": "Bolívar", + "AlphabeticCode": "VEF", + "NumericCode": "937", + "MinorUnit": "", + "WidthdrawalDate": "2018-08", + }, + "VIET NAM": { + "Currency": "Dong", + "AlphabeticCode": "VND", + "NumericCode": "704", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "VIRGIN ISLANDS (BRITISH)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "VIRGIN ISLANDS (U.S.)": { + "Currency": "US Dollar", + "AlphabeticCode": "USD", + "NumericCode": "840", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "WALLIS AND FUTUNA": { + "Currency": "CFP Franc", + "AlphabeticCode": "XPF", + "NumericCode": "953", + "MinorUnit": "0", + "WidthdrawalDate": "", + }, + "WESTERN SAHARA": { + "Currency": "Moroccan Dirham", + "AlphabeticCode": "MAD", + "NumericCode": "504", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "YEMEN": { + "Currency": "Yemeni Rial", + "AlphabeticCode": "YER", + "NumericCode": "886", + "MinorUnit": "2", + "WidthdrawalDate": "", + }, + "ZAMBIA": { + "Currency": "Zambian Kwacha", + "AlphabeticCode": "ZMK", + "NumericCode": "894", + "MinorUnit": "", + "WidthdrawalDate": "2012-12", + }, + "ZIMBABWE": { + "Currency": "Zimbabwe Dollar", + "AlphabeticCode": "ZWR", + "NumericCode": "935", + "MinorUnit": "", + "WidthdrawalDate": "2009-06", + }, + "ZZ01_Bond Markets Unit European_EURCO": { + "Currency": "Bond Markets Unit European Composite Unit (EURCO)", + "AlphabeticCode": "XBA", + "NumericCode": "955", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ02_Bond Markets Unit European_EMU-6": { + "Currency": "Bond Markets Unit European Monetary Unit (E.M.U.-6)", + "AlphabeticCode": "XBB", + "NumericCode": "956", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ03_Bond Markets Unit European_EUA-9": { + "Currency": "Bond Markets Unit European Unit of Account 9 (E.U.A.-9)", + "AlphabeticCode": "XBC", + "NumericCode": "957", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ04_Bond Markets Unit European_EUA-17": { + "Currency": "Bond Markets Unit European Unit of Account 17 (E.U.A.-17)", + "AlphabeticCode": "XBD", + "NumericCode": "958", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ06_Testing_Code": { + "Currency": "Codes specifically reserved for testing purposes", + "AlphabeticCode": "XTS", + "NumericCode": "963", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ07_No_Currency": { + "Currency": "The codes assigned for transactions where no currency is involved", + "AlphabeticCode": "XXX", + "NumericCode": "999", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ08_Gold": { + "Currency": "Gold", + "AlphabeticCode": "XAU", + "NumericCode": "959", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ09_Palladium": { + "Currency": "Palladium", + "AlphabeticCode": "XPD", + "NumericCode": "964", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ10_Platinum": { + "Currency": "Platinum", + "AlphabeticCode": "XPT", + "NumericCode": "962", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "ZZ11_Silver": { + "Currency": "Silver", + "AlphabeticCode": "XAG", + "NumericCode": "961", + "MinorUnit": "-", + "WidthdrawalDate": "", + }, + "BOLIVIA": { + "Currency": "Peso boliviano", + "AlphabeticCode": "BOP", + "NumericCode": "068", + "MinorUnit": "", + "WidthdrawalDate": "1987-02", + }, + "BURMA": { + "Currency": "Kyat", + "AlphabeticCode": "BUK", + "NumericCode": "104", + "MinorUnit": "", + "WidthdrawalDate": "1990-02", + }, + "CZECHOSLOVAKIA": { + "Currency": "Koruna", + "AlphabeticCode": "CSK", + "NumericCode": "200", + "MinorUnit": "", + "WidthdrawalDate": "1993-03", + }, + "EUROPEAN MONETARY CO-OPERATION FUND (EMCF)": { + "Currency": "European Currency Unit (E.C.U)", + "AlphabeticCode": "XEU", + "NumericCode": "954", + "MinorUnit": "", + "WidthdrawalDate": "1999-01", + }, + "FRENCH SOUTHERN TERRITORIES": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "GERMAN DEMOCRATIC REPUBLIC": { + "Currency": "Mark der DDR", + "AlphabeticCode": "DDM", + "NumericCode": "278", + "MinorUnit": "", + "WidthdrawalDate": "1990-07 to 1990-09", + }, + "HOLY SEE (VATICAN CITY STATE)": { + "Currency": "Italian Lira", + "AlphabeticCode": "ITL", + "NumericCode": "380", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "LAO": { + "Currency": "Pathet Lao Kip", + "AlphabeticCode": "LAJ", + "NumericCode": "418", + "MinorUnit": "", + "WidthdrawalDate": "1979-12", + }, + "MOLDOVA, REPUBLIC OF": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1993-12", + }, + "NETHERLANDS": { + "Currency": "Netherlands Guilder", + "AlphabeticCode": "NLG", + "NumericCode": "528", + "MinorUnit": "", + "WidthdrawalDate": "2002-03", + }, + "NETHERLANDS ANTILLES": { + "Currency": "Netherlands Antillean Guilder", + "AlphabeticCode": "ANG", + "NumericCode": "532", + "MinorUnit": "", + "WidthdrawalDate": "2010-10", + }, + "RUSSIAN FEDERATION": { + "Currency": "Russian Ruble", + "AlphabeticCode": "RUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "2004-01", + }, + "SAINT MARTIN": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "1999-01", + }, + "SAINT-BARTHÉLEMY": { + "Currency": "French Franc", + "AlphabeticCode": "FRF", + "NumericCode": "250", + "MinorUnit": "", + "WidthdrawalDate": "1999-01", + }, + "SERBIA AND MONTENEGRO": { + "Currency": "Euro", + "AlphabeticCode": "EUR", + "NumericCode": "978", + "MinorUnit": "", + "WidthdrawalDate": "2006-10", + }, + "SOUTHERN RHODESIA": { + "Currency": "Rhodesian Dollar", + "AlphabeticCode": "RHD", + "NumericCode": "716", + "MinorUnit": "", + "WidthdrawalDate": "1978 to 1981", + }, + "SUDAN": { + "Currency": "Sudanese Pound", + "AlphabeticCode": "SDP", + "NumericCode": "736", + "MinorUnit": "", + "WidthdrawalDate": "1998-06", + }, + "SWAZILAND": { + "Currency": "Lilangeni", + "AlphabeticCode": "SZL", + "NumericCode": "748", + "MinorUnit": "", + "WidthdrawalDate": "2018-08", + }, + "UNION OF SOVIET SOCIALIST REPUBLICS": { + "Currency": "Rouble", + "AlphabeticCode": "SUR", + "NumericCode": "810", + "MinorUnit": "", + "WidthdrawalDate": "1990-12", + }, + "UNITED STATES": { + "Currency": "US Dollar (Same day)", + "AlphabeticCode": "USS", + "NumericCode": "998", + "MinorUnit": "", + "WidthdrawalDate": "2014-03", + }, + "VENEZUELA": { + "Currency": "Bolivar Fuerte", + "AlphabeticCode": "VEF", + "NumericCode": "937", + "MinorUnit": "", + "WidthdrawalDate": "2011-12", + }, + "VIETNAM": { + "Currency": "Old Dong", + "AlphabeticCode": "VNC", + "NumericCode": "704", + "MinorUnit": "", + "WidthdrawalDate": "1989-1990", + }, + "YEMEN, DEMOCRATIC": { + "Currency": "Yemeni Dinar", + "AlphabeticCode": "YDD", + "NumericCode": "720", + "MinorUnit": "", + "WidthdrawalDate": "1991-09", + }, + "YUGOSLAVIA": { + "Currency": "Yugoslavian Dinar", + "AlphabeticCode": "YUN", + "NumericCode": "890", + "MinorUnit": "", + "WidthdrawalDate": "1995-11", + }, + "ZAIRE": { + "Currency": "Zaire", + "AlphabeticCode": "ZRZ", + "NumericCode": "180", + "MinorUnit": "", + "WidthdrawalDate": "1994-02", + }, + "ZZ01_Gold-Franc": { + "Currency": "Gold-Franc", + "AlphabeticCode": "XFO", + "NumericCode": "", + "MinorUnit": "", + "WidthdrawalDate": "2006-10", + }, + "ZZ02_RINET Funds Code": { + "Currency": "RINET Funds Code", + "AlphabeticCode": "XRE", + "NumericCode": "", + "MinorUnit": "", + "WidthdrawalDate": "1999-11", + }, + "ZZ05_UIC-Franc": { + "Currency": "UIC-Franc", + "AlphabeticCode": "XFU", + "NumericCode": "", + "MinorUnit": "", + "WidthdrawalDate": "2013-11", + }, +} + + +SECTORS = [ + "Banking, Insurance & Financial Services", + "Biotechnology and Life Sciences", + "Business Services", + "Chemicals, Petroleum, Rubber & Plastic", + "Communications", + "Computer Software", + "Construction", + "Food & Tobacco Manufacturing", + "Industrial, Electric & Electronic Machinery", + "Leather, Stone, Clay & Glass products", + "Metals & Metal Products", + "Mining & Extraction", + "Miscellaneous Manufacturing", + "Other", + "Printing & Publishing", + "Property Services", + "Retail", + "Textiles & Clothing Manufacturing", + "Transport Manufacturing", + "Transport, Freight & Storage", + "Travel, Personal & Leisure", + "Utilities", + "Wholesale", + "Wood, Furniture & Paper Manufacturing", +] + +COMPANIES = { + "Acciona": {"headquarter": "ESP", "sector": "Construction"}, + "Acerinox": {"headquarter": "ESP", "sector": "Metals & Metal Products"}, + "ACS": {"headquarter": "ESP", "sector": "Construction"}, + "Adecco": {"headquarter": "CHE", "sector": "Business Services"}, + "Adyen": { + "headquarter": "NLD", + "sector": "Banking, Insurance & Financial Services", + }, + "Aegon": { + "headquarter": "NLD", + "sector": "Banking, Insurance & Financial Services", + }, + "Ajinomoto": {"headquarter": "JPN", "sector": "Food & Tobacco Manufacturing"}, + "AkerSolutions": {"headquarter": "NOR", "sector": "Business Services"}, + "AkzoNobel": { + "headquarter": "NLD", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Allianz": { + "headquarter": "DEU", + "sector": "Banking, Insurance & Financial Services", + }, + "AmericaMovil": {"headquarter": "MEX", "sector": "Communications"}, + "Amorepacific": { + "headquarter": "KOR", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "AngloAmerican": {"headquarter": "GBR", "sector": "Mining & Extraction"}, + "Applus": {"headquarter": "ESP", "sector": "Business Services"}, + "Aquafil": { + "headquarter": "ITA", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "ASTM": {"headquarter": "ITA", "sector": "Transport, Freight & Storage"}, + "Atlantia": {"headquarter": "ITA", "sector": "Transport, Freight & Storage"}, + "AutostradePerL'Italia": { + "headquarter": "ITA", + "sector": "Transport, Freight & Storage", + }, + "AXA": {"headquarter": "FRA", "sector": "Banking, Insurance & Financial Services"}, + "Barloworld": {"headquarter": "ZAF", "sector": "Wholesale"}, + "BayWa": {"headquarter": "DEU", "sector": "Wholesale"}, + "BHP": {"headquarter": "AUS", "sector": "Mining & Extraction"}, + "Biocon": { + "headquarter": "IND", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Bonava": {"headquarter": "SWE", "sector": "Construction"}, + "BP": {"headquarter": "GBR", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "BT": {"headquarter": "GBR", "sector": "Communications"}, + "BuzziUnicem": { + "headquarter": "ITA", + "sector": "Leather, Stone, Clay & Glass products", + }, + "BWEnergy": {"headquarter": "BMU", "sector": "Mining & Extraction"}, + "Canacol": {"headquarter": "CAN", "sector": "Mining & Extraction"}, + "Castellum": {"headquarter": "SWE", "sector": "Property Services"}, + "Cellnex": {"headquarter": "ESP", "sector": "Communications"}, + "CELSA": {"headquarter": "ESP", "sector": "Metals & Metal Products"}, + "Cembre": { + "headquarter": "ITA", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "Cipla": {"headquarter": "IND", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "Coloplast": { + "headquarter": "DNK", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "Corwin": {"headquarter": "SVK", "sector": "Property Services"}, + "CSL": {"headquarter": "AUS", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "Cyient": {"headquarter": "IND", "sector": "Computer Software"}, + "Diasorin": { + "headquarter": "ITA", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Discovery": { + "headquarter": "ZAF", + "sector": "Banking, Insurance & Financial Services", + }, + "DNO": {"headquarter": "NOR", "sector": "Mining & Extraction"}, + "DSM": {"headquarter": "NLD", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "Dundee": { + "headquarter": "CAN", + "sector": "Banking, Insurance & Financial Services", + }, + "Duratex": { + "headquarter": "BRA", + "sector": "Wood, Furniture & Paper Manufacturing", + }, + "Ecopetrol": {"headquarter": "COL", "sector": "Mining & Extraction"}, + "Eisai": {"headquarter": "JPN", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "ElCorteInglés": {"headquarter": "ESP", "sector": "Retail"}, + "EmpresasCopec": {"headquarter": "CHL", "sector": "Retail"}, + "Enagas": {"headquarter": "ESP", "sector": "Transport, Freight & Storage"}, + "ENAV": {"headquarter": "ITA", "sector": "Transport, Freight & Storage"}, + "Endesa": {"headquarter": "ESP", "sector": "Utilities"}, + "Enel": {"headquarter": "ITA", "sector": "Utilities"}, + "ENI": {"headquarter": "ITA", "sector": "Mining & Extraction"}, + "Equinor": {"headquarter": "NOR", "sector": "Mining & Extraction"}, + "ERG": {"headquarter": "ITA", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "Esprinet": {"headquarter": "ITA", "sector": "Business Services"}, + "Essity": {"headquarter": "SWE", "sector": "Wholesale"}, + "EVRAZ": {"headquarter": "GBR", "sector": "Mining & Extraction"}, + "Exxaro": {"headquarter": "ZAF", "sector": "Mining & Extraction"}, + "FalckRenewables": {"headquarter": "ITA", "sector": "Utilities"}, + "FCFMinerals": {"headquarter": "PHL", "sector": "Mining & Extraction"}, + "Fedrigoni": { + "headquarter": "ITA", + "sector": "Wood, Furniture & Paper Manufacturing", + }, + "Feralpi": {"headquarter": "ITA", "sector": "Metals & Metal Products"}, + "Ferrovial": {"headquarter": "ESP", "sector": "Construction"}, + "FerrovieDelloStatoItaliane": { + "headquarter": "ITA", + "sector": "Transport, Freight & Storage", + }, + "FILA": {"headquarter": "ITA", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "Finnair": {"headquarter": "FIN", "sector": "Transport, Freight & Storage"}, + "Fortum": {"headquarter": "FIN", "sector": "Utilities"}, + "Generali": { + "headquarter": "ITA", + "sector": "Banking, Insurance & Financial Services", + }, + "GeoPark": {"headquarter": "BMU", "sector": "Mining & Extraction"}, + "Grundfos": { + "headquarter": "DNK", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "GVS": { + "headquarter": "ITA", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "Heimstaden": {"headquarter": "SWE", "sector": "Property Services"}, + "HessCorporation": { + "headquarter": "USA", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Hydro": {"headquarter": "NOR", "sector": "Metals & Metal Products"}, + "Iberdrola": {"headquarter": "ESP", "sector": "Utilities"}, + "Iberostar": {"headquarter": "ESP", "sector": "Travel, Personal & Leisure"}, + "IKEA": {"headquarter": "NLD", "sector": "Retail"}, + "Inditex": {"headquarter": "ESP", "sector": "Textiles & Clothing Manufacturing"}, + "Indra": {"headquarter": "ESP", "sector": "Business Services"}, + "InPost": {"headquarter": "LUX", "sector": "Retail"}, + "InsudPharma": { + "headquarter": "AUT", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Interpump": { + "headquarter": "ITA", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "ItauUnibanco": { + "headquarter": "BRA", + "sector": "Banking, Insurance & Financial Services", + }, + "Kesko": {"headquarter": "FIN", "sector": "Wholesale"}, + "KPN": {"headquarter": "NLD", "sector": "Communications"}, + "L&G": {"headquarter": "GBR", "sector": "Banking, Insurance & Financial Services"}, + "Leonardo": {"headquarter": "ITA", "sector": "Transport Manufacturing"}, + "LupinLimited": { + "headquarter": "IND", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Lush": {"headquarter": "GBR", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "LU-VE": { + "headquarter": "ITA", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "MAPFRE": { + "headquarter": "ESP", + "sector": "Banking, Insurance & Financial Services", + }, + "MegaHoldings": { + "headquarter": "TWN", + "sector": "Banking, Insurance & Financial Services", + }, + "Meiji": {"headquarter": "JPN", "sector": "Food & Tobacco Manufacturing"}, + "MeliaHotels": {"headquarter": "ESP", "sector": "Travel, Personal & Leisure"}, + "MotorOilHellas": { + "headquarter": "GRC", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Mundys": {"headquarter": "ITA", "sector": "Transport, Freight & Storage"}, + "MunichRe": { + "headquarter": "DEU", + "sector": "Banking, Insurance & Financial Services", + }, + "NationalGrid": {"headquarter": "GBR", "sector": "Utilities"}, + "Nedbank": { + "headquarter": "ZAF", + "sector": "Banking, Insurance & Financial Services", + }, + "Newmont": {"headquarter": "USA", "sector": "Mining & Extraction"}, + "NH": {"headquarter": "ESP", "sector": "Travel, Personal & Leisure"}, + "NN": {"headquarter": "NLD", "sector": "Banking, Insurance & Financial Services"}, + "Nordgold": {"headquarter": "GBR", "sector": "Mining & Extraction"}, + "NovoHoldings": { + "headquarter": "DNK", + "sector": "Banking, Insurance & Financial Services", + }, + "NTT": {"headquarter": "JPN", "sector": "Computer Software"}, + "OHLA": {"headquarter": "ESP", "sector": "Construction"}, + "OldMutual": { + "headquarter": "ZAF", + "sector": "Banking, Insurance & Financial Services", + }, + "Omron": { + "headquarter": "JPN", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "Orica": {"headquarter": "AUS", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "Ørsted": {"headquarter": "DNK", "sector": "Utilities"}, + "OTB": {"headquarter": "ITA", "sector": "Textiles & Clothing Manufacturing"}, + "Pandora": {"headquarter": "DNK", "sector": "Miscellaneous Manufacturing"}, + "Panoro": {"headquarter": "NOR", "sector": "Mining & Extraction"}, + "ParquesReunidos": {"headquarter": "ESP", "sector": "Travel, Personal & Leisure"}, + "Pearson": {"headquarter": "GBR", "sector": "Printing & Publishing"}, + "Philips": { + "headquarter": "NLD", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "Piaggio": {"headquarter": "ITA", "sector": "Transport Manufacturing"}, + "Piquadro": {"headquarter": "ITA", "sector": "Retail"}, + "Prisa": {"headquarter": "ESP", "sector": "Printing & Publishing"}, + "Prudential": { + "headquarter": "GBR", + "sector": "Banking, Insurance & Financial Services", + }, + "Prysmian": {"headquarter": "ITA", "sector": "Communications"}, + "QIAGEN": {"headquarter": "NLD", "sector": "Biotechnology and Life Sciences"}, + "Radici": { + "headquarter": "ITA", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Randstad": {"headquarter": "NLD", "sector": "Business Services"}, + "Recordati": { + "headquarter": "ITA", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "RedElectrica": {"headquarter": "ESP", "sector": "Utilities"}, + "REPSOL": {"headquarter": "ESP", "sector": "Mining & Extraction"}, + "RIOTINTO": {"headquarter": "AUS", "sector": "Mining & Extraction"}, + "RoyalUnibrew": {"headquarter": "DNK", "sector": "Food & Tobacco Manufacturing"}, + "SAES": { + "headquarter": "ITA", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "Saipem": {"headquarter": "ITA", "sector": "Mining & Extraction"}, + "SGR": {"headquarter": "ITA", "sector": "Utilities"}, + "SHELL": {"headquarter": "GBR", "sector": "Mining & Extraction"}, + "SHV": {"headquarter": "NLD", "sector": "Wholesale"}, + "Siltronic": { + "headquarter": "DEU", + "sector": "Industrial, Electric & Electronic Machinery", + }, + "SJP": {"headquarter": "GBR", "sector": "Banking, Insurance & Financial Services"}, + "SNAM": {"headquarter": "ITA", "sector": "Transport, Freight & Storage"}, + "SOL": {"headquarter": "ITA", "sector": "Chemicals, Petroleum, Rubber & Plastic"}, + "Sonoco": {"headquarter": "USA", "sector": "Wood, Furniture & Paper Manufacturing"}, + "South32": {"headquarter": "AUS", "sector": "Mining & Extraction"}, + "SSE": {"headquarter": "GBR", "sector": "Utilities"}, + "SumitomoMetalMining": {"headquarter": "JPN", "sector": "Metals & Metal Products"}, + "SwissLife": { + "headquarter": "CHE", + "sector": "Banking, Insurance & Financial Services", + }, + "SwissRe": { + "headquarter": "CHE", + "sector": "Banking, Insurance & Financial Services", + }, + "TeckResources": {"headquarter": "CAN", "sector": "Mining & Extraction"}, + "Telefonica": {"headquarter": "ESP", "sector": "Communications"}, + "Telenor": {"headquarter": "NOR", "sector": "Communications"}, + "Terna": {"headquarter": "ITA", "sector": "Utilities"}, + "TIM": {"headquarter": "ITA", "sector": "Communications"}, + "Tinexta": {"headquarter": "ITA", "sector": "Business Services"}, + "TKH": {"headquarter": "NLD", "sector": "Metals & Metal Products"}, + "Tod's": {"headquarter": "ITA", "sector": "Textiles & Clothing Manufacturing"}, + "TotalEnergies": {"headquarter": "FRA", "sector": "Mining & Extraction"}, + "Unilever": {"headquarter": "GBR", "sector": "Food & Tobacco Manufacturing"}, + "Uniper": {"headquarter": "DEU", "sector": "Utilities"}, + "Usiminas": {"headquarter": "BRA", "sector": "Metals & Metal Products"}, + "VanLanschotKempen": { + "headquarter": "NLD", + "sector": "Banking, Insurance & Financial Services", + }, + "Vattenfall": {"headquarter": "SWE", "sector": "Utilities"}, + "Vodacom": {"headquarter": "ZAF", "sector": "Communications"}, + "VODAFONE": {"headquarter": "GBR", "sector": "Communications"}, + "Vopak": {"headquarter": "NLD", "sector": "Transport, Freight & Storage"}, + "VR": {"headquarter": "FIN", "sector": "Transport, Freight & Storage"}, + "Wesfarmers": { + "headquarter": "AUS", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Yara": {"headquarter": "NOR", "sector": "Mining & Extraction"}, + "Yokohama": { + "headquarter": "JPN", + "sector": "Chemicals, Petroleum, Rubber & Plastic", + }, + "Other": {"headquarter": "", "sector": ""}, +} diff --git a/country_by_country/utils/utils.py b/country_by_country/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..93ecc4cf828d3cf280d02468f9bb78b822593ae7 --- /dev/null +++ b/country_by_country/utils/utils.py @@ -0,0 +1,114 @@ +# MIT License +# +# Copyright (c) 2024 dataforgood +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Standard imports +import pathlib +import tempfile + +import pandas as pd + +# External imports +import pypdf + + +def keep_pages(pdf_filepath: str, selected_pages: list[int]) -> str: + """ + Function to extract the selected pages from a source pdf + It returns the path to the PDF created by keeping only the + selected pages + """ + reader = pypdf.PdfReader(pdf_filepath) + writer = pypdf.PdfWriter() + + for pi in selected_pages: + writer.add_page(reader.pages[pi]) + + # We add the original pdf name without extension + # in the prefix of the temporary file + # in order to keep a trace of this name so that the next modules, from table + # extraction can make use of this name. + # For example, FromCSV makes use of this name to determine the name of the + # CSV to load + pdf_stem = pathlib.Path(pdf_filepath).stem + filename = tempfile.NamedTemporaryFile( + prefix=f"{pdf_stem}____", + suffix=".pdf", + delete=False, + ).name + writer.write(filename) + + return filename + + +def gather_tables( + assets: dict, +) -> dict: + tables_by_name = {} + for asset in assets["table_extractors"]: + tables = asset["tables"] + for i in range(len(tables)): + for label, _content in tables[i].items(): + if isinstance(tables[i][label], pd.DataFrame): + tables[i].columns = [ + "No Extract " + str(i + 1) for i in range(tables[i].shape[1]) + ] + break + tables_by_name[asset["type"] + "_" + str(i)] = tables[i] + + return tables_by_name + + +def check_if_many(assets: dict) -> bool: + for asset in assets["table_extractors"]: + tables = asset["tables"] + if len(tables) > 1: + return True + return False + + +def filled_table_extractors(assets: dict) -> list: + tables_by_name = [] + for asset in assets["table_extractors"]: + tables = asset["tables"] + if len(tables) > 0: + tables_by_name.append(asset["type"]) + return tables_by_name + + +def gather_tables_with_merge( + assets: dict, + new_tables: pd.DataFrame, + table_extractor: str, +) -> dict: + tables_by_name = {} + for asset in assets["table_extractors"]: + if asset["type"] == table_extractor: + tables_by_name[table_extractor] = new_tables + else: + tables = asset["tables"] + if len(tables) == 1: + tables_by_name[asset["type"]] = tables[0] + elif len(tables) > 1: + for i in range(len(tables)): + tables_by_name[asset["type"] + "_" + str(i)] = tables[i] + + return tables_by_name diff --git a/extract_config.yaml b/extract_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f9ee614c8e83b6eb31a53757eab1a77bc2e0474 --- /dev/null +++ b/extract_config.yaml @@ -0,0 +1,10 @@ +pagefilter: + - type: RFClassifier + params: + modelfile: random_forest_model_low_false_positive.joblib + +table_extraction: + - type: Unstructured + params: + hi_res_model_name: "yolox" + pdf_image_dpi: 300 \ No newline at end of file diff --git a/menu.py b/menu.py new file mode 100644 index 0000000000000000000000000000000000000000..d9969b0842bb9d76292180cfad090bd314e85a79 --- /dev/null +++ b/menu.py @@ -0,0 +1,36 @@ +import streamlit as st +import yaml + + +def display_pages_menu() -> None: + with st.sidebar: + st.markdown("# Reset") + st.markdown("Click the button below to reset the app") + if st.button("Reset", type="primary"): + st.session_state.clear() + st.switch_page("app.py") + + if "original_pdf_name" in st.session_state: + st.markdown( + f"Currently processing : {st.session_state['original_pdf_name']}", + ) + + st.markdown("# Pipeline steps") + st.page_link("pages/0_Import_File.py", label="Upload PDF") + st.page_link("pages/1_Selected_Pages.py", label="Pages selection") + st.page_link("pages/2_Metadata.py", label="Metadata") + st.page_link("pages/3_Merge_Tables.py", label="Merge tables") + st.page_link("pages/4_Clean_Headers.py", label="Headers setup") + st.page_link("pages/5_Clean_Tables.py", label="Tables customization") + + +def display_config() -> None: + yaml_str = yaml.dump( + st.session_state["config"], + default_flow_style=False, + sort_keys=False, + indent=2, + ) + # Ajouter des backticks triples pour créer un bloc de code markdown + markdown_str = f"## The configuration is : \n\n```\n{yaml_str}\n```" + st.write(markdown_str) diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..4f01b4f3fcfeec3f384ea9f685146a0d2dae7054 --- /dev/null +++ b/packages.txt @@ -0,0 +1,10 @@ +ghostscript +python3-tk +poppler-utils +libleptonica-dev +tesseract-ocr +libtesseract-dev +python3-pil +tesseract-ocr-eng +tesseract-ocr-script-latn + diff --git a/pages/.env b/pages/.env new file mode 100644 index 0000000000000000000000000000000000000000..1e7a6b298a2c6a33186938e828132d552b4eb5aa --- /dev/null +++ b/pages/.env @@ -0,0 +1,4 @@ +LLAMA_CLOUD_API_KEY=llx-kcjiD3GhaLpPianIBgZrP1IXao8xlPzo3l3NYy0Nj0UuElb9 + +EXTRACT_TABLE_API_KEY=1tryuTHXfi1o8PlcvOfjlJ1fuSesReBziYS6UFwa +#OnvGkk3mlLanocjH0gUJcPXjgGkmiaws6r2aMlk8 \ No newline at end of file diff --git a/pages/0_Import_File.py b/pages/0_Import_File.py new file mode 100644 index 0000000000000000000000000000000000000000..18884cb5aa8cc7c903e178226bc0fa938168eb59 --- /dev/null +++ b/pages/0_Import_File.py @@ -0,0 +1,134 @@ +import logging +import sys +import tempfile + +import streamlit as st +import yaml +import copy +from menu import display_pages_menu, display_config +from pypdf import PdfReader +from utils import get_pdf_iframe, set_state + +from country_by_country.processor import ReportProcessor + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + + +def set_page_filter(value: dict): + set_state(["config", "pagefilter"], value) + + +st.set_page_config(layout="wide", page_title="Accueil - upload de PDF") +st.title("Country by Country Tax Reporting analysis") +st.subheader( + "This app will help you extract a table containing financial information from a pdf", +) +display_pages_menu() + +mytmpfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) + +with st.sidebar: + + st.markdown("# PDF Upload") + + st.markdown("## PDF Report to process") + original_pdf = st.file_uploader( + "Upload a pdf document containing financial table : ", + ) + + if original_pdf is not None: + mytmpfile.write(original_pdf.read()) + st.session_state["working_file_pdf"] = mytmpfile + st.session_state["original_pdf_name"] = original_pdf.name + + if "original_pdf_name" in st.session_state: + st.markdown( + "Already loaded file : " + st.session_state["original_pdf_name"], + ) + + st.markdown("# Configuration:\n") + # Upload personalized config if required + loaded_config = st.file_uploader( + "Upload a config if the default config doesn't suit you :", + ) + if loaded_config is not None: + if not loaded_config.name.endswith(".yaml"): + st.error("Please upload a yaml file") + loaded_config = None + + try: + loaded_config_dict = yaml.safe_load(loaded_config) + if not ( + loaded_config_dict.get("pagefilter", False) + and loaded_config_dict.get("table_extraction", False) + ): + st.error("Please upload a valid config file") + loaded_config = None + except yaml.YAMLError as e: + st.error("Unable to load yaml file config") + loaded_config = None + + # Extract config + with open("extract_config.yaml", "r") as f: + default_config = f.read() + + if not st.session_state.get("config_is_set", False): + st.session_state["initial_config"] = yaml.safe_load(default_config) + st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"]) + st.session_state["config_is_set"] = True + + if bool(loaded_config): + st.session_state["initial_config"] = loaded_config_dict + st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"]) + st.session_state["config_is_set"] = True + + # Set page filter + page_filter_radio_dict = { + pagefilter["type"]: pagefilter + for pagefilter in st.session_state["initial_config"]["pagefilter"] + } + selected_page_filter = st.radio("Page filter", page_filter_radio_dict.keys()) + set_page_filter(page_filter_radio_dict[selected_page_filter]) + + display_config() + + +if "working_file_pdf" in st.session_state: + # Once a pdf has been uploaded, it will be stored as + # the "original_pdf" key in the session state. + # Hence, the following code will only be executed if a pdf has been uploaded. + + # Display the uploaded pdf + st.markdown( + get_pdf_iframe(st.session_state["working_file_pdf"].name), + unsafe_allow_html=True, + ) + + if "first_time" not in st.session_state: + st.session_state["first_time"] = False + logging.info("Loading config and pdf") + st.session_state["proc"] = ReportProcessor(st.session_state["config"]) + + logging.info("Config and pdf loaded") + + assets = { + "pagefilter": {}, + "table_extractors": [], + } + + # Filtering the pages + st.session_state["proc"].page_filter( + st.session_state["working_file_pdf"].name, + assets, + ) + + logging.info(f"Assets : {assets}") + + if len(assets["pagefilter"]["selected_pages"]) == 0: + # No page has been automatically selected by the page filter + # Hence, we display the full pdf, letting the user select the pages + pdfreader = PdfReader(st.session_state["working_file_pdf"]) + number_pages = len(PdfReader(st.session_state["working_file_pdf"]).pages) + assets["pagefilter"]["selected_pages"] = list(range(number_pages)) + st.session_state["assets"] = assets + st.switch_page("pages/1_Selected_Pages.py") diff --git a/pages/1_Selected_Pages.py b/pages/1_Selected_Pages.py new file mode 100644 index 0000000000000000000000000000000000000000..9b6093f19606e5de8045dc3ddc8a75e6d2c67062 --- /dev/null +++ b/pages/1_Selected_Pages.py @@ -0,0 +1,105 @@ +import streamlit as st +from country_by_country.processor import ReportProcessor +from utils import get_pdf_iframe, set_state +from country_by_country.utils.utils import keep_pages +from pypdf import PdfReader +from menu import display_pages_menu, display_config + +import sys +import copy +import logging + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + +ALL_TABLE_EXTRACTORS = { + extractor["type"]: extractor + for extractor in st.session_state["initial_config"]["table_extraction"] +} + + +def set_validate() -> None: + st.session_state["validate_selected_pages"] = True + + +def set_extractors() -> None: + if st.session_state.get("extractor_keys") is None: + return + selected_extractors_dict = [ + ALL_TABLE_EXTRACTORS[key] for key in st.session_state["extractor_keys"] + ] + set_state(["config", "table_extraction"], selected_extractors_dict) + st.session_state["proc"] = ReportProcessor(st.session_state["config"]) + + +st.set_page_config(layout="wide", page_title="Pages selection") # page_icon="📈" +st.title("Country by Country Tax Reporting analysis : Selected Pages") +st.subheader( + "This page will allow you to select the pages containing your tables", +) +display_pages_menu() +with st.sidebar: + display_config() + +if "working_file_pdf" in st.session_state: + + col1, col2 = st.columns([1, 1]) + + with col2: + # Display the page selector on the right column + pdfreader = PdfReader(st.session_state["working_file_pdf"]) + number_pages = len(PdfReader(st.session_state["working_file_pdf"]).pages) + logging.info("got the assets : " + str(st.session_state["assets"])) + selected_pages = st.multiselect( + "Which page of the following pdf contains the table you want to extract ? Defaults pages are the pages extracted by the decision tree algorithm", + list(range(1, number_pages + 1)), + placeholder="Select a page number", + default=[ + i + 1 + for i in st.session_state["assets"]["pagefilter"]["selected_pages"] + ], + disabled=True if "validate_selected_pages" in st.session_state else False, + ) + + # Set extractors + current_table_extractors = [ + extractor["type"] + for extractor in st.session_state["config"]["table_extraction"] + ] + extractor_keys = st.multiselect( + "Extractors", + key="extractor_keys", + options=ALL_TABLE_EXTRACTORS.keys(), + default=current_table_extractors, + on_change=set_extractors, + ) + + submitted = st.button( + label="Validate your selected pages", + on_click=set_validate, + ) + + selected_pages = sorted(selected_pages) + logging.info("Filtering the pdf with pages : " + str(selected_pages)) + st.session_state["pdf_before_page_validation"] = keep_pages( + st.session_state["working_file_pdf"].name, + [i - 1 for i in selected_pages], + ) + + with col1: + # Display the filtered pdf on the left column + st.markdown( + get_pdf_iframe(st.session_state["pdf_before_page_validation"]), + unsafe_allow_html=True, + ) + + if submitted: + # Once the submission button is clicked, we commit the selected pages + # The next pages will work with the pdf_after_page_validation + st.session_state["assets"]["pagefilter"]["selected_pages"] = [ + i - 1 for i in selected_pages + ] + st.session_state["pdf_after_page_validation"] = keep_pages( + st.session_state["working_file_pdf"].name, + [i - 1 for i in selected_pages], + ) + st.switch_page("pages/2_Metadata.py") diff --git a/pages/2_Metadata.py b/pages/2_Metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..fb98fa6fdf1a6a4b9e965fd2a60d0dfa8127325b --- /dev/null +++ b/pages/2_Metadata.py @@ -0,0 +1,108 @@ +import streamlit as st +from utils import set_algorithm_name, get_pdf_iframe +from menu import display_pages_menu +from country_by_country.utils.constants import ( + JURIDICTIONS, + CURRENCIES, + SECTORS, + COMPANIES, +) +from Levenshtein import distance +import sys +import logging +import pandas as pd +import numpy as np +import re + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + + +st.set_page_config(layout="wide", page_title="Report metadata") +st.title("Country by Country Tax Reporting analysis : Metadata") +st.subheader( + "This page will allow you to fill in metadata about the report : company name, headquarter, currency, unit, ...", +) +display_pages_menu() + + +if "pdf_after_page_validation" in st.session_state: + col1, col2 = st.columns(2) + with col1: + st.markdown( + get_pdf_iframe(st.session_state["pdf_after_page_validation"]), + unsafe_allow_html=True, + ) + with col2: + with st.form("metadata_form"): + if "metadata" in st.session_state: + company_name = st.session_state["metadata"]["company_name"] + sector = st.session_state["metadata"]["sector"] + year = st.session_state["metadata"]["year"] + currency = st.session_state["metadata"]["currency"] + unit = st.session_state["metadata"]["unit"] + headquarter = st.session_state["metadata"]["headquarter"] + else: + company_name = None + sector = None + year = "" + currency = None + unit = None + headquarter = "" + companies = list(COMPANIES.keys()) + company_name = st.selectbox( + "Company name", + companies, + index=companies.index(company_name) if company_name else 0, + ) + + sector = st.selectbox( + "Sector", SECTORS, index=SECTORS.index(sector) if sector else 0 + ) + + year = st.text_input("Year", value=year) + + currencies = { + ( + CURRENCIES[currency]["AlphabeticCode"], + CURRENCIES[currency]["Currency"], + ) + for currency in CURRENCIES + } + currencies = sorted(currencies, key=lambda x: x[0]) + currencies = [f"{currency[0]} - {currency[1]}" for currency in currencies] + currency = st.selectbox( + "Currency", + currencies, + index=currencies.index(currency) if currency else currencies.index("EUR - Euro"), + ) + + units = [ + "units", + "thousands", + "millions", + "10 millions", + "100 millions", + "billions", + ] + unit = st.selectbox("Unit", units, index=units.index(unit) if unit else 0) + + headquarters = list(JURIDICTIONS.keys()) + headquarter = st.selectbox( + "Headquarter location", + headquarters, + index=headquarters.index(headquarter) if headquarter else 0, + ) + + submitted = st.form_submit_button( + label="Submit", + ) + if submitted: + st.session_state["metadata"] = { + "company_name": company_name, + "sector": sector, + "year": year, + "currency": currency, + "unit": unit, + "headquarter": headquarter, + } + st.switch_page("pages/3_Merge_Tables.py") diff --git a/pages/3_Merge_Tables.py b/pages/3_Merge_Tables.py new file mode 100644 index 0000000000000000000000000000000000000000..408f39b1d8a57b1ef8ff5927edfdce4de71d684f --- /dev/null +++ b/pages/3_Merge_Tables.py @@ -0,0 +1,144 @@ +import streamlit as st +import pandas as pd +import sys +import logging + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + +from country_by_country.utils.utils import ( + gather_tables, + check_if_many, + filled_table_extractors, + gather_tables_with_merge, +) +from menu import display_pages_menu +from dotenv import load_dotenv + + +def merge_table(table_extractor: str) -> None: + first_df_columns = pd.Series([]) + table_list = [] + for key, table in st.session_state["tables"].items(): + if table_extractor in key: + if first_df_columns.empty: + first_df_columns = table.columns + # Replace column names for all DataFrames in the list + table.columns = first_df_columns + table_list.append(table) + + st.session_state["new_tables"] = pd.concat( + table_list, ignore_index=True, sort=False + ) + + +def save_merge(table_extractor: str) -> None: + tables_extracted_by_name = gather_tables_with_merge( + st.session_state["assets"], + st.session_state["new_tables"], + table_extractor, + ) + st.session_state["tables"] = tables_extracted_by_name + st.session_state["algorithm_name"] = table_extractor + + +def remove_table(key: str) -> None: + del st.session_state["tables"][key] + if ( + "algorithm_name" in st.session_state + and st.session_state["algorithm_name"] == key + ): + del st.session_state["algorithm_name"] + + +st.set_page_config(layout="wide", page_title="Merge Tables") # page_icon="📈" +st.title("Country by Country Tax Reporting analysis : Headers") +st.subheader( + "This page will allow you to modify the headers and to remove columns", +) +display_pages_menu() +load_dotenv() + + +if "tables" not in st.session_state: + st.markdown( + "# !! Don't change the page while the algorithms are runing, else they will start again" + ) + + +if ( + st.session_state.get("validate_selected_pages", False) + and "pdf_after_page_validation" in st.session_state +): + if "tables" not in st.session_state: + for table_extractor in st.session_state["proc"].table_extractors: + new_asset = table_extractor(st.session_state["pdf_after_page_validation"]) + st.session_state["assets"]["table_extractors"].append(new_asset) + tables_extracted_by_name = gather_tables(st.session_state["assets"]) + logging.info(f"Table extracted : {tables_extracted_by_name}") + + st.session_state["tables"] = tables_extracted_by_name + + if not check_if_many(st.session_state["assets"]): + st.markdown("# !! Nothing to merge") + + if "first_time_merge" not in st.session_state: + st.session_state["first_time_merge"] = False + st.switch_page("pages/4_Clean_Headers.py") + + col1, col2, col3 = st.columns([3, 1, 3]) + is_equal = True + with col1: + table_extractor = st.selectbox( + "Choose an algorithm :", + filled_table_extractors(st.session_state["assets"]), + args=("selectbox2",), + key="selectbox2", + ) + + number_column = None + if table_extractor is not None: + for key, table in st.session_state["tables"].items(): + if table_extractor in key: + with st.container(border=True): + if not number_column: + number_column = table.shape[1] + else: + if number_column != table.shape[1]: + is_equal = False + st.markdown("Table shape :" + str(table.shape)) + st.markdown("Table name : " + key) + st.dataframe( + table, + ) + st.button( + "Remove this table", + type="primary", + on_click=remove_table, + args=(key,), + key=key, + ) + + with col2: + st.markdown( + "You won't be able to merge if the number of columns is not the same for each tables !!" + ) + merged = st.button( + "Merge", + type="primary", + on_click=merge_table, + args=(table_extractor,), + disabled=(False if is_equal else True), + ) + validated = st.button( + "Sauver le merge", + on_click=save_merge, + args=(table_extractor,), + ) + if validated: + st.switch_page("pages/4_Clean_Headers.py") + + with col3: + if merged is True: + edited_df = st.dataframe( + st.session_state["new_tables"], + ) diff --git a/pages/4_Clean_Headers.py b/pages/4_Clean_Headers.py new file mode 100644 index 0000000000000000000000000000000000000000..e7c9dd80f3e8582df330059b8307f8a5a7af32b5 --- /dev/null +++ b/pages/4_Clean_Headers.py @@ -0,0 +1,120 @@ +from menu import display_pages_menu +import streamlit as st +from utils import set_algorithm_name, get_pdf_iframe + +import sys +import logging + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + +keep = "keep the extracted value" +remove = "remove this column" + + +def transpose_current_table(): + st.session_state.tables[ + st.session_state["algorithm_name"] + ] = st.session_state.tables[st.session_state["algorithm_name"]].transpose() + + +def set_headers(algorithm_name: str) -> None: + for header in st.session_state.tables[algorithm_name].columns.values.tolist(): + if st.session_state["widget" + str(header)] == remove: + st.session_state.tables[algorithm_name].drop(columns=[header], inplace=True) + if st.session_state["widget" + str(header)] == keep: + pass + else: + st.session_state.tables[algorithm_name].rename( + columns={header: st.session_state["widget" + str(header)]}, + inplace=True, + ) + + +header_list = [ + keep, + "jurisdiction", + "profit_before_tax", + "tax_accrued", + "tax_paid", + "employees", + "unrelated_revenues", + "related_revenues", + "stated_capital", + "accumulated_earnings", + "tangible_assets", + "total_revenues", + remove, +] + +st.set_page_config(layout="wide") # page_icon="📈" +st.title("Country by Country Tax Reporting analysis : Headers") +st.subheader( + "This page will allow you to modify the headers and to remove columns", +) +display_pages_menu() + +if "tables" not in st.session_state: + st.markdown( + "# !! Don't change the page while the algorithms are runing, else they will start again" + ) + +if ( + st.session_state.get("validate_selected_pages", False) + and "pdf_after_page_validation" in st.session_state +): + col1, col2 = st.columns(2) + with col1: + st.markdown( + get_pdf_iframe(st.session_state["pdf_after_page_validation"]), + unsafe_allow_html=True, + ) + with col2: + index = ( + list(st.session_state.tables.keys()).index( + st.session_state["algorithm_name"], + ) + if "algorithm_name" in st.session_state + else 0 + ) + + st.session_state["algorithm_name"] = st.selectbox( + "Choose the extracted table you want to see", + list(st.session_state.tables.keys()), + index=index, + on_change=set_algorithm_name, + args=("selectbox1",), + key="selectbox1", + ) + st.markdown( + "Table shape :" + + str(st.session_state.tables[st.session_state["algorithm_name"]].shape) + ) + + st.button("Transpose table", on_click=transpose_current_table) + + with st.form(key="my_form"): + for header in st.session_state.tables[ + st.session_state["algorithm_name"] + ].columns.values.tolist(): + st.selectbox( + "Choose the value of the following extracted header : " + + str(header), + header_list, + key="widget" + str(header), + ) + submitted = st.form_submit_button( + label="Submit", + on_click=set_headers, + args=(st.session_state["algorithm_name"],), + ) + + if submitted: + st.switch_page("pages/5_Clean_Tables.py") + + st.markdown("# Current extraction") + st.markdown("The extracted table is displaye below") + st.dataframe( + st.session_state.tables[st.session_state["algorithm_name"]], + width=900, + height=900, + ) diff --git a/pages/5_Clean_Tables.py b/pages/5_Clean_Tables.py new file mode 100644 index 0000000000000000000000000000000000000000..97e1338b8debf44ef8eb0a971fe4a6875a3f17ae --- /dev/null +++ b/pages/5_Clean_Tables.py @@ -0,0 +1,265 @@ +import streamlit as st +from utils import set_algorithm_name, get_pdf_iframe, to_csv_file +from menu import display_pages_menu +from country_by_country.utils.constants import JURIDICTIONS +from Levenshtein import distance +import sys +import logging +import pandas as pd +import numpy as np +import re + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") + + +def check_last_cell_sum(column): + last_cell = column.iloc[-2] # Get the last cell value + result = [""] * (len(column.tolist()) - 2) + try: + sum_except_last = column.iloc[ + :-2 + ].sum() # Calculate the sum of all values except the last one + result.append( + "background-color: red" + if float(last_cell) != sum_except_last + else "background-color: green" + ) + result.append("") + return result + except Exception: + result.append("background-color: red") + result.append("") + return result + + +def column_sum(column): + try: + return column.iloc[:-1].sum() + except Exception: + return None + + +def style_negative(v, props=""): + try: + return props if float(v) < 0 else None + except Exception: + return None + + +def convert_dataframe(dataframe: pd.DataFrame) -> pd.DataFrame: + for column_name in dataframe.columns: + try: + dataframe[column_name] = dataframe[column_name].astype(float) + except Exception: + pass + return dataframe + + +special_characters = "#&()[]@©" + + +def style_symbol(v, props=""): + try: + return props if any(c in special_characters for c in v) else None + except Exception: + return None + + +def style_specific_cells(dataframe: pd.DataFrame, index_list: list): + + color = "background-color: lightgreen" + df1 = pd.DataFrame("", index=dataframe.index, columns=dataframe.columns) + for index in index_list: + df1.iloc[index, 0] = color + return df1 + + +def most_similar_string(input_string: str) -> str: + def update_min(string, min_distance, most_similar, input_string=input_string): + dist = distance(input_string, string) + if dist < min_distance: + return dist, string + else: + return min_distance, most_similar + + if input_string == None: + return "None" + min_distance = float("inf") + most_similar = None + for string in JURIDICTIONS.keys(): + # Compute the distance with the juridiction name + min_distance, most_similar = update_min(string, min_distance, most_similar) + # Compute the distance with the Alpha-2 code + min_distance, most_similar = update_min( + JURIDICTIONS[string]["Alpha-2 code"], min_distance, most_similar + ) + # Compute the distance with the Alpha-3 code + min_distance, most_similar = update_min( + JURIDICTIONS[string]["Alpha-3 code"], min_distance, most_similar + ) + return most_similar + + +def validate(data: pd.DataFrame) -> None: + st.session_state.tables[st.session_state["algorithm_name"]] = data + + +def update_df_csv_to_save() -> None: + for idx, change in st.session_state.changes["edited_rows"].items(): + for label, value in change.items(): + st.session_state.tables[st.session_state["algorithm_name"]].loc[ + idx, label + ] = value + + st.session_state["df_csv_to_save"] = to_csv_file( + st.session_state.tables[st.session_state["algorithm_name"]], + ) + + +st.set_page_config(layout="wide", page_title="Tables customization") # page_icon="📈" +st.title("Country by Country Tax Reporting analysis : Tables") +st.subheader( + "This page will allow you to clean the extracted tables", +) +display_pages_menu() + +if ( + st.session_state.get("validate_selected_pages", False) + and "pdf_after_page_validation" in st.session_state +): + + col3, col4 = st.columns(2) + with col3: + st.markdown( + get_pdf_iframe(st.session_state["pdf_after_page_validation"]), + unsafe_allow_html=True, + ) + + with col4: + index = ( + list(st.session_state.tables.keys()).index( + st.session_state["algorithm_name"], + ) + if "algorithm_name" in st.session_state + else 0 + ) + + st.session_state["algorithm_name"] = st.selectbox( + "Choose the extracted table you want to see", + list(st.session_state.tables.keys()), + index=index, + on_change=set_algorithm_name, + args=("selectbox2",), + key="selectbox2", + ) + + if "algorithm_name" in st.session_state: + st.session_state["df_csv_to_save"] = to_csv_file( + st.session_state.tables[st.session_state["algorithm_name"]] + ) + st.download_button( + label="📥 Download Current Table", + data=( + st.session_state["df_csv_to_save"] + if "df_csv_to_save" in st.session_state + else None + ), + disabled="df_csv_to_save" not in st.session_state, + file_name=( + f"{st.session_state['original_pdf_name']}.csv" + if "original_pdf_name" in st.session_state + else "table.csv" + ), + ) + + st.session_state.tables[st.session_state["algorithm_name"]] = st.data_editor( + st.session_state.tables[st.session_state["algorithm_name"]], + num_rows="dynamic", + on_change=update_df_csv_to_save, + key="changes", + width=800, + height=900, + ) + + col7, col8, col9 = st.columns([1, 1, 1]) + with col7: + total = st.checkbox( + "Calculate the Total of each columns, excluding the last row", value=True + ) + country = st.checkbox("Activate the country filter", value=True) + + with col8: + negativ = st.checkbox( + "Show the negative numbers, for each columns detected as a numerical type" + ) + with col9: + symbol = st.checkbox( + "Show the cells that contain a special symbol : " + special_characters, + value=True, + ) + remove_symbols = st.checkbox("Remove the special symbols") + + dataframe = st.session_state.tables[st.session_state["algorithm_name"]].copy() + + if country: + dataframe.iloc[:-2, 0] = dataframe.iloc[:-2, 0].apply( + lambda x: most_similar_string(x) + ) + + if remove_symbols: + pattern = "\(.*?\)" + "|[" + re.escape(special_characters) + "]" + for column in dataframe.columns: + dataframe[column] = dataframe[column].apply( + lambda x: re.sub(pattern, "", str(x)) + ) + dataframe = convert_dataframe(dataframe) + + if total: + dataframe = convert_dataframe(dataframe) + new_row = dataframe.apply(column_sum, axis=0) + new_row.iloc[0] = "Total Calculated" + dataframe.loc[-1] = new_row.transpose() + + dataframe_styler = dataframe.style + + if total: + dataframe_styler = dataframe_styler.apply( + check_last_cell_sum, + subset=pd.IndexSlice[:, dataframe.columns[1:]], + axis=0, + ) + + if negativ: + dataframe_styler = dataframe_styler.map( + style_negative, + props="color:red;", + ) + if symbol: + dataframe_styler = dataframe_styler.map( + style_symbol, + props="color:red;", + ) + + if country: + index_list = [] + for index, (val1, val2) in enumerate( + zip( + dataframe.iloc[:-1, 0], + st.session_state.tables[st.session_state["algorithm_name"]].iloc[ + :-1, 0 + ], + ) + ): + if val1 != val2: + index_list.append(index) + dataframe_styler = dataframe_styler.apply( + lambda x: style_specific_cells(x, index_list), axis=None + ) + + st.dataframe(dataframe_styler, use_container_width=True, height=1000) + + st.button( + "Save the table above", + on_click=validate, + args=(dataframe_styler.data,), + ) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..4ac6a1eb9707291e521a9f0435405f5bb34b5633 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,31 @@ +bs4==0.0.2 +loguru==0.7.2 +pyyaml==6.0.1 +camelot-py==0.11.0 +opencv-python-headless==4.9.0.80 +ghostscript==0.7 +pypdf==4.0.2 +unstructured==0.12.6 +pdf2image==1.17.0 +unstructured-inference==0.7.24 +pytesseract==0.3.10 +pikepdf==8.13.0 +unstructured-pytesseract==0.3.12 +joblib==1.3.2 +llama-parse==0.3.9 +python-dotenv==1.0.1 +altair==5.2.0 +numpy==1.26.4 +scikit-learn==1.2.2 +pandas==2.2.1 +pydeck==0.8.0 +streamlit==1.32.2 +plotly==5.20.0 +streamlit-navigation-bar==2.0.1 +matplotlib==3.8.3 +langchain-openai==0.1.0 +langchain==0.1.13 +streamlit_aggrid==0.3.4.post3 +fuzzywuzzy==0.18.0 +pillow-heif==0.15.0 +python-Levenshtein==0.24.0 diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2c79b3f8197ca3c5b79587fbd24dc581b1def065 --- /dev/null +++ b/utils.py @@ -0,0 +1,63 @@ +import base64 +from pathlib import Path +from typing import Any + +import pandas as pd +import streamlit as st + + +def get_pdf_iframe(pdf_to_process: str) -> str: + base64_pdf = base64.b64encode(Path(pdf_to_process).read_bytes()).decode("utf-8") + pdf_display = f""" + + """ + return pdf_display + + +def set_algorithm_name(my_key: str) -> None: + st.session_state["algorithm_name"] = st.session_state[my_key] + + +@st.cache_data +def to_csv_file(df: pd.DataFrame) -> bytes: + # Populate the columns with the metadata, if available + # They may not be available if the user skipped the metadata page + # by not clicking on Submit + if "metadata" in st.session_state: + df = df.assign(company=st.session_state["metadata"]["company_name"]) + df = df.assign(sector=st.session_state["metadata"]["sector"]) + df = df.assign(year=st.session_state["metadata"]["year"]) + df = df.assign(currency=st.session_state["metadata"]["currency"]) + df = df.assign(unit=st.session_state["metadata"]["unit"]) + df = df.assign(headquarter=st.session_state["metadata"]["headquarter"]) + else: + df = df.assign(company="") + df = df.assign(sector="") + df = df.assign(year="") + df = df.assign(currency="") + df = df.assign(unit="") + df = df.assign(headquarter="") + + return df.to_csv(index=False).encode("utf-8") + + +def set_state(key: Any, value: Any) -> None: + """ + Sets the session_state[key] to value. + key can be a list to reach nested values. + Ex: ["key1", "key2"] to reach session_state["key1"]["key2"] value. + """ + if isinstance(key, list): + key_list = key + nested_key_string = "session_state" + nested_value = st.session_state + for k in key_list[:-1]: + try: + nested_key_string += f"['{k}']" + nested_value = nested_value[k] + except KeyError as e: + raise KeyError(f"{nested_key_string} does not exist") from e + nested_value[key_list[-1]] = value + else: + st.session_state[key] = value