diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..360753b3fc432ae87463636dce63aa79a82cf662 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o filter=lfs diff=lfs merge=lfs -text
+IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o filter=lfs diff=lfs merge=lfs -text
+IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text
+IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+IndicTrans2/translation_guidelines.pdf filter=lfs diff=lfs merge=lfs -text
diff --git a/IndicTrans2/.gitignore b/IndicTrans2/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..fec8ee9000164ed51a37f7fa08acf4dbc09bba17
--- /dev/null
+++ b/IndicTrans2/.gitignore
@@ -0,0 +1,148 @@
+# ignore libs and data folder we use
+indic_nlp_library
+indic_nlp_resources
+fairseq
+devtest
+checkpoints
+eval_benchmarks
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+.DS_Store
diff --git a/IndicTrans2/LICENSE b/IndicTrans2/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..a8b61558d8d7a59636405ca50d08df64bcf10e92
--- /dev/null
+++ b/IndicTrans2/LICENSE
@@ -0,0 +1,21 @@
+ MIT License
+
+ Copyright (c) AI4Bharat.
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE
\ No newline at end of file
diff --git a/IndicTrans2/README.md b/IndicTrans2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0d718eb59ccfca50ffdd7a4ef2d414c08e3877c5
--- /dev/null
+++ b/IndicTrans2/README.md
@@ -0,0 +1,528 @@
+# IndicTrans2
+
+[📜 Paper](https://arxiv.org/abs/2305.16307) | [🌐 Website](https://ai4bharat.iitm.ac.in/indic-trans2) | [▶️ Demo](https://models.ai4bharat.org/#/nmt/v2) | [🤗 HF Interface](https://github.com/AI4Bharat/IndicTrans2/tree/main/huggingface_interface) | [](https://colab.research.google.com/github/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/colab_inference.ipynb)
+
+IndicTrans2 is the first open-source transformer-based multilingual NMT model that supports high-quality translations across all the 22 scheduled Indic languages — including multiple scripts for low-resouce languages like Kashmiri, Manipuri and Sindhi. It adopts script unification wherever feasible to leverage transfer learning by lexical sharing between languages. Overall, the model supports five scripts Perso-Arabic (Kashmiri, Sindhi, Urdu), Ol Chiki (Santali), Meitei (Manipuri), Latin (English), and Devanagari (used for all the remaining languages).
+
+We open-souce all our training dataset (BPCC), back-translation data (BPCC-BT), final IndicTrans2 models, evaluation benchmarks (IN22, which includes IN22-Gen and IN22-Conv) and training and inference scripts for easier use and adoption within the research community. We hope that this will foster even more research in low-resource Indic languages, leading to further improvements in the quality of low-resource translation through contributions from the research community.
+
+This code repository contains instructions for downloading the artifacts associated with IndicTrans2, as well as the code for training/fine-tuning the multilingual NMT models.
+
+Here is the list of languages supported by the IndicTrans2 models:
+
+
+
+
+ Assamese (asm_Beng) |
+ Kashmiri (Arabic) (kas_Arab) |
+ Punjabi (pan_Guru) |
+
+
+ Bengali (ben_Beng) |
+ Kashmiri (Devanagari) (kas_Deva) |
+ Sanskrit (san_Deva) |
+
+
+ Bodo (brx_Deva) |
+ Maithili (mai_Deva) |
+ Santali (sat_Olck) |
+
+
+ Dogri (doi_Deva) |
+ Malayalam (mal_Mlym) |
+ Sindhi (Arabic) (snd_Arab) |
+
+
+ English (eng_Latn) |
+ Marathi (mar_Deva) |
+ Sindhi (Devanagari) (snd_Deva) |
+
+
+ Konkani (gom_Deva) |
+ Manipuri (Bengali) (mni_Beng) |
+ Tamil (tam_Taml) |
+
+
+ Gujarati (guj_Gujr) |
+ Manipuri (Meitei) (mni_Mtei) |
+ Telugu (tel_Telu) |
+
+
+ Hindi (hin_Deva) |
+ Nepali (npi_Deva) |
+ Urdu (urd_Arab) |
+
+
+ Kannada (kan_Knda) |
+ Odia (ory_Orya) |
+ |
+
+
+
+
+## Updates
+- 🚨 Jan 18, 2025 - Long Context Models- RoPE-based variants of IndicTrans2 models capable of handling sequence lengths **upto 2048 tokens** are available [here](https://huggingface.co/collections/prajdabre/indictrans2-rope-6742ddac669a05db0804db35).
+- 🚨 Dec 20, 2024 - The latest releases of the high-quality human-annotated BPCC-Seed dataset would henceforth be made available on the [AI4Bharat Website](https://ai4bharat.iitm.ac.in/datasets/bpcc).
+- 🚨 Dec 30, 2023 - Migrated IndicTrans2 tokenizer for HF compatible IndicTrans2 models to [IndicTransToolkit](https://github.com/VarunGumma/IndicTransToolkit) and will be maintained separately there from now onwards. Add LoRA fine-tuning scripts for our IndicTrans2 models in [huggingface_interface](https://github.com/AI4Bharat/IndicTrans2/tree/main/huggingface_interface).
+- 🚨 Dec 1, 2023 - Release of Indic-Indic model and corresponding distilled variants for each base model. Please refer to the [Download section](https://github.com/AI4Bharat/IndicTrans2#multilingual-translation-models) for the checkpoints.
+- 🚨 Sep 9, 2023 - Added HF compatible IndicTrans2 models. Please refer to the [README](https://github.com/AI4Bharat/IndicTrans2/tree/main/huggingface_interface) for detailed example usage.
+
+## Tables of Contents
+
+- [Download Models and Other Artifacts](#download-models-and-other-artifacts)
+ - [Multilingual Translation Models](#multilingual-translation-models)
+ - [Training Data](#training-data)
+ - [Evaluation Data](#evaluation-data)
+- [Installation](#installation)
+- [Data](#data)
+ - [Training](#training)
+ - [Evaluation](#evaluation)
+- [Preparing Data for Training](#preparing-data-for-training)
+ - [Using our SPM model and Fairseq dictionary](#using-our-spm-model-and-fairseq-dictionary)
+ - [Training your own SPM models and learning Fairseq dictionary](#training-your-own-spm-models-and-learning-fairseq-dictionary)
+- [Training / Fine-tuning](#training--fine-tuning)
+- [Inference](#inference)
+ - [Fairseq Inference](#fairseq-inference)
+ - [CT2 Inference](#ct2-inference)
+- [Evaluations](#evaluations)
+ - [Baseline Evaluation](#baseline-evaluation)
+- [LICENSE](#license)
+- [Citation](#citation)
+
+## Download Models and Other Artifacts
+
+### Multilingual Translation Models
+
+| Model | En-Indic | Indic-En | Indic-Indic | Evaluations |
+| ---------------------------- | ----------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Base (used for benchmarking) | [Fairseq](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/en-indic-preprint.tar.gz) & [HF](https://huggingface.co/ai4bharat/indictrans2-en-indic-1B) | [fairseq](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/indic-en-preprint.tar.gz) & [HF](https://huggingface.co/ai4bharat/indictrans2-indic-en-1B) | [HF](https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B) | [translations](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/translation_outputs.tar.gz) (as of May 10, 2023), [metrics](https://drive.google.com/drive/folders/1lOOdaU0VdRSBgJEsNav5zC7wwLBis9NI?usp=sharing) |
+| Distilled | [Fairseq](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/en-indic-dist.tar.gz) & [HF](https://huggingface.co/ai4bharat/indictrans2-en-indic-dist-200M) | [Fairseq](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/indic-en-dist.tar.gz) & [HF](https://huggingface.co/ai4bharat/indictrans2-indic-en-dist-200M) | [HF](https://huggingface.co/ai4bharat/indictrans2-indic-indic-dist-320M) |
+
+### Training Data
+
+|Data | URL |
+|-------------------------------------------|--------------------------------------------------------------------------------------------------|
+| ✨ BPCC-Seed Latest Release | [HF Config: bpcc-seed-latest](https://huggingface.co/datasets/ai4bharat/BPCC) |
+| BPCC (*Used in Paper - utilizes the BPCC-Seed V1 dataset*) | [HF Config: bpcc-seed-v1](https://huggingface.co/datasets/ai4bharat/BPCC) |
+| Back-translation (BPCC-BT) | Will be updated |
+| Full Data Split | [Download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/BPCC.zip) |
+
+
+
+### Evaluation Data
+
+| Data | URL |
+| ----------------------- | ------------------------------------------------------------------------------------ |
+| IN22 test set | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/IN22_testset.zip) |
+| FLORES-22 Indic dev set | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/flores-22_dev.zip) |
+
+## Installation
+
+Instructions to setup and install everything before running the code.
+
+```bash
+# Clone the github repository and navigate to the project directory.
+git clone https://github.com/AI4Bharat/IndicTrans2
+cd IndicTrans2
+
+# Install all the dependencies and requirements associated with the project.
+source install.sh
+```
+
+Note: We recommend creating a virtual environment with python>=3.7.
+
+### Additional notes about Installation
+The ``prepare_data_joint_finetuning.sh`` and ``prepare_data_joint_training.sh`` scripts expect that the sentencepiece commandline utility and GNU parallel are installed.
+1. To install the sentencepiece command line utility, please follow the instructions [here](https://github.com/google/sentencepiece?tab=readme-ov-file#build-and-install-sentencepiece-command-line-tools-from-c-source).
+2. Please check if GNU parallel is installed, if not please install the same or alternatively in case of installation issues, remove ``parallel --pipe --keep-order`` from the respective training / finetuning script as well as ``apply_sentence_piece.sh``.
+
+
+## Data
+
+### Training
+
+Bharat Parallel Corpus Collection (BPCC) is a comprehensive and publicly available parallel corpus that includes both existing and new data for all 22 scheduled Indic languages. It is comprised of two parts: BPCC-Mined and BPCC-Human, totaling approximately 230 million bitext pairs. BPCC-Mined contains about 228 million pairs, with nearly 126 million pairs newly added as a part of this work. On the other hand, BPCC-Human consists of 2.2 million gold standard English-Indic pairs, with an additional 644K bitext pairs from English Wikipedia sentences (forming the BPCC-H-Wiki subset) and 139K sentences covering everyday use cases (forming the BPCC-H-Daily subset). It is worth highlighting that BPCC provides the first available datasets for 7 languages and significantly increases the available data for all languages covered.
+
+You can find the contribution from different sources in the following table:
+
+
+
+
+ BPCC-Mined
+ | Existing
+ | Samanantar
+ | 19.4M
+ |
+
+ NLLB
+ | 85M
+ |
+
+ Newly Added
+ | Samanantar++
+ | 121.6M
+ |
+
+ Comparable
+ | 4.3M
+ |
+
+ BPCC-Human |
+ Existing |
+ NLLB |
+ 18.5K |
+
+
+ ILCI |
+ 1.3M |
+
+
+ Massive |
+ 115K |
+
+
+ Newly Added |
+ Wiki |
+ 644K |
+
+
+ Daily |
+ 139K |
+
+
+
+
+Additionally, we provide augmented back-translation data generated by our intermediate IndicTrans2 models for training purposes. Please refer our paper for more details on the selection of sample proportions and sources.
+
+
+
+
+ English BT data (English Original) |
+ 401.9M |
+
+
+ Indic BT data (Indic Original) |
+ 400.9M |
+
+
+
+
+
+
+### Evaluation
+
+IN22 test set is a newly created comprehensive benchmark for evaluating machine translation performance in multi-domain, n-way parallel contexts across 22 Indic languages. It has been created from three distinct subsets, namely IN22-Wiki, IN22-Web and IN22-Conv. The Wikipedia and Web sources subsets offer diverse content spanning news, entertainment, culture, legal, and India-centric topics. IN22-Wiki and IN22-Web have been combined and considered for evaluation purposes and released as IN22-Gen. Meanwhile, IN22-Conv the conversation domain subset is designed to assess translation quality in typical day-to-day conversational-style applications.
+
+
+
+You can download the data artifacts released as a part of this work from the [following section](#download-models-and-other-artifacts).
+
+## Preparing Data for Training
+
+BPCC data is organized under different subsets as described above, where each subset contains language pair subdirectories with the sentences pairs. We also provide LaBSE and LASER for the mined subsets of BPCC. In order to replicate our training setup, you will need to combine the data for corresponding language pairs from different subsets and remove overlapping bitext pairs if any.
+
+Here is the expected directory structure of the data:
+
+```bash
+BPCC
+├── eng_Latn-asm_Beng
+│ ├── train.eng_Latn
+│ └── train.asm_Beng
+├── eng_Latn-ben_Beng
+└── ...
+```
+
+While we provide deduplicated subsets with the current available benchmarks, we highly recommend performing deduplication using the combined monolingual side of all the benchmarks. You can use the following command for deduplication once you combine the monolingual side of all the benchmarks in the directory.
+
+```python3
+python3 scripts/dedup_benchmark.py
+```
+
+- ``: path to the directory containing train data for each language pair in the format `{src_lang}-{tgt_lang}`
+- ``: path to the directory where the deduplicated train data will be written for each language pair in the format `{src_lang}-{tgt_lang}`
+- ``: path to the directory containing the language-wise monolingual side of dev/test set, with monolingual files named as `test.{lang}`
+
+### Using our SPM model and Fairseq dictionary
+
+Once you complete the deduplication of the training data with the available benchmarks, you can preprocess and binarize the data for training models. Please download our trained SPM model and learned Fairseq dictionary using the following links for your experiments.
+
+| | En-Indic | Indic-En | Indic-Indic |
+| ------------------ | -------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- |
+| SPM model | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/en-indic-spm.zip) | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/indic-en-spm.zip) | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/indic-indic-spm.zip) |
+| Fairseq dictionary | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/en-indic-fairseq-dict.zip) | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/indic-en-fairseq-dict.zip) | [download](https://huggingface.co/datasets/ai4bharat/BPCC/resolve/main/additional/indic-indic-fairseq-dict.zip) |
+
+To prepare the data for training En-Indic model, please do the following:
+
+1. Download the SPM model in the experiment directory and rename it as `vocab`.
+2. Download the Fairseq dictionary in the experiment directory and rename it as `final_bin`.
+
+Here is the expected directory for training En-Indic model:
+
+```bash
+en-indic-exp
+├── train
+│ ├── eng_Latn-asm_Beng
+│ │ ├── train.eng_Latn
+│ │ └── train.asm_Beng
+│ ├── eng_Latn-ben_Beng
+│ └── ...
+├── devtest
+│ └── all
+│ ├── eng_Latn-asm_Beng
+│ │ ├── dev.eng_Latn
+│ │ └── dev.asm_Beng
+│ ├── eng_Latn-ben_Beng
+│ └── ...
+├── vocab
+│ ├── model.SRC
+│ ├── model.TGT
+│ ├── vocab.SRC
+│ └── vocab.TGT
+└── final_bin
+ ├── dict.SRC.txt
+ └── dict.TGT.txt
+```
+
+To prepare data for training the Indic-En model, you should reverse the language pair directories within the train and devtest directories. Additionally, make sure to download the corresponding SPM model and Fairseq dictionary and put them in the experiment directory, similar to the procedure mentioned above for En-Indic model training.
+
+You can binarize the data for model training using the following:
+
+```bash
+bash prepare_data_joint_finetuning.sh
+```
+
+- ``: path to the directory containing the raw data for binarization
+
+You will need to follow the same steps for data preparation in case of fine-tuning models.
+
+### Training your own SPM models and learning Fairseq dictionary
+
+If you want to train your own SPM model and learn Fairseq dictionary, then please do the following:
+
+1. Collect a balanced amount of English and Indic monolingual data (we use around 3 million sentences per language-script combination). If some languages have limited data available, increase their representation to achieve a fair distribution of tokens across languages.
+2. Perform script unification for Indic languages wherever possible using `scripts/preprocess_translate.py` and concatenate all Indic data into a single file.
+3. Train two SPM models, one for English and other for Indic side using the following:
+
+```bash
+spm_train --input=train.indic --model_prefix= --vocab_size= --character_coverage=1.0 --model_type=BPE
+```
+
+4. Copy the trained SPM models in the experiment directory mentioned earlier and learn the Fairseq dictionary using the following:
+
+```bash
+bash prepare_data_joint_training.sh
+```
+
+5. You will need to use the same Fairseq dictionary for any subsequent fine-tuning experiments and refer to the steps described above ([link](#using-our-spm-model-and-fairseq-dictionary)).
+
+## Training / Fine-tuning
+
+After binarizing the data, you can use train.sh to train the models. We provide the default hyperparameters used in this work. You can modify the hyperparameters as per your requirement if needed. If you want to train the model on a customized architecture, then please define the architecture in `model_configs/custom_transformer.py`. You can start the model training with the following command:
+
+```bash
+bash train.sh
+```
+
+- ``: path to the directory containing the binarized data
+- ``: custom transformer architecture used for model training
+
+For fine-tuning, the initial steps remain the same. However, the `finetune.sh` script includes an additional argument, `pretrained_ckpt`, which specifies the model checkpoint to be loaded for further fine-tuning. You can perform fine-tuning using the following command:
+
+```bash
+bash finetune.sh
+```
+
+- ``: path to the directory containing the binarized data
+- ``: custom transformer architecture used for model training
+ - `transformer_18_18` - For IT2 Base models
+ - `transformer_base18L` - For IT2 Distilled models
+- ``: path to the fairseq model checkpoint to be loaded for further fine-tuning
+
+You can download the model artifacts released as a part of this work from the [following section](#download-models-and-other-artifacts).
+
+The pretrained checkpoints have 3 directories, a fairseq model directory and 2 CT-ported model directories. Please note that the CT2 models are provided only for efficient inference. For fine-tuning purposes you should use the `fairseq_model`. Post that you can use the [fairseq-ct2-converter](https://opennmt.net/CTranslate2/guides/fairseq.html) to port your fine-tuned checkpoints to CT2 for faster inference.
+
+## Inference
+
+### Fairseq Inference
+
+In order to run inference on our pretrained models using bash interface, please use the following:
+
+```bash
+bash joint_translate.sh
+```
+
+- `infname`: path to the input file containing sentences
+- `outfname`: path to the output file where the translations should be stored
+- `src_lang`: source language
+- `tgt_lang`: target language
+- `ckpt_dir`: path to the fairseq model checkpoint directory
+
+If you want to run the inference using python interface then please execute the following block of code from the root directory:
+
+```python3
+from inference.engine import Model
+
+model = Model(ckpt_dir, model_type="fairseq")
+
+sents = [sent1, sent2,...]
+
+# for a batch of sentences
+model.batch_translate(sents, src_lang, tgt_lang)
+
+# for a paragraph
+model.translate_paragraph(text, src_lang, tgt_lang)
+```
+
+### CT2 Inference
+
+In order to run inference on CT2-ported model using python inference then please execute the following block of code from the root directory:
+
+```python3
+from inference.engine import Model
+
+model = Model(ckpt_dir, model_type="ctranslate2")
+
+sents = [sent1, sent2,...]
+
+# for a batch of sentences
+model.batch_translate(sents, src_lang, tgt_lang)
+
+# for a paragraph
+model.translate_paragraph(text, src_lang, tgt_lang)
+```
+
+## Evaluations
+
+We consider the chrF++ score as our primary metric. Additionally, we also report the BLEU and Comet scores.
+We also perform statistical significance tests for each metric to ascertain whether the differences are statistically significant.
+
+In order to run our evaluation scripts, you will need to organize the evaluation test sets into the following directory structure:
+
+```bash
+eval_benchmarks
+├── flores
+│ └── eng_Latn-asm_Beng
+│ ├── test.eng_Latn
+│ └── test.asm_Beng
+├── in22-gen
+├── in22-conv
+├── ntrex
+└── ...
+```
+
+To compute the BLEU and chrF++ scores for prediction file, you can use the following command:
+
+```bash
+bash compute_metrics.sh
+```
+
+- `pred_fname`: path to the model translations
+- `ref_fname`: path to the reference translations
+- `tgt_lang`: target language
+
+In order to automate the inference over the individual test sets for En-Indic, you can use the following command:
+
+```bash
+bash eval.sh
+```
+
+- ``: path to the evaluation set with language pair subdirectories (for example, flores directory in the above tree structure)
+- ``: path to the fairseq model checkpoint directory
+- ``: system name suffix to store the predictions in the format `test.{lang}.pred.{system}`
+
+In case of Indic-En evaluation, please use the following command:
+
+```bash
+bash eval_rev.sh
+```
+
+- ``: path to the evaluation set with language pair subdirectories (for example, flores directory in the above tree structure)
+- ``: path to the fairseq model checkpoint directory
+- ``: system name suffix to store the predictions in the format `test.{lang}.pred.{system}`
+
+**_Note: You don’t need to reverse the test set directions for each language pair._**
+
+In case of Indic-Indic evaluation, please use the following command:
+
+```bash
+bash pivot_eval.sh
+```
+
+- ``: path to the evaluation set with language pair subdirectories (for example, flores directory in the above tree structure)
+- ``: pivot language (default should be `eng_Latn`)
+- ``: path to the fairseq Indic-En model checkpoint directory
+- ``: path to the fairseq En-Indic model checkpoint directory
+- ``: system name suffix to store the predictions in the format test.{lang}.pred.{system}
+
+In order to perform significance testing for BLEU and chrF++ metrics after you have the predictions for different systems, you can use the following command:
+
+```bash
+bash compute_comet_metrics_significance.sh
+```
+
+- ``: path to the evaluation set with language pair subdirectories (for example, flores directory in the above tree structure)
+
+Similarly, to compute the COMET scores and perform significance testing on predictions of different systems, you can use the following command.
+
+```bash
+bash compute_comet_score.sh
+```
+
+- ``: path to the evaluation set with language pair subdirectories (for example, flores directory in the above tree structure)
+
+Please note that as we compute significance tests with the same script and automate everything, it is best to have all the predictions for all the systems in place to avoid repeating anything.
+Also, we define the systems in the script itself, if you want to try out other systems, make sure to edit it there itself.
+
+### Baseline Evaluation
+
+To generate the translation results for baseline models such as M2M-100, MBART, Azure, Google, and NLLB MoE, you can check the scripts provided in the "baseline_eval" directory of this repository. For NLLB distilled, you can either modify NLLB_MoE eval or use this [repository](https://github.com/pluiez/NLLB-inference). Similarly, for IndicTrans inference, please refer to this [repository](https://github.com/ai4bharat/IndicTrans).
+
+You can download the translation outputs released as a part of this work from the [following section](#download-models-and-other-artifacts).
+
+## LICENSE
+
+The following table lists the licenses associated with the different artifacts released as a part of this work:
+
+| Artifact | LICENSE |
+| ----------------------------------------------------- | --------------------------------------------------------------------- |
+| Existing Mined Corpora (NLLB & Samanantar) | [CC0](https://creativecommons.org/share-your-work/public-domain/cc0/) |
+| Existing Seed Corpora (NLLB-Seed, ILCI, MASSIVE) | [CC0](https://creativecommons.org/share-your-work/public-domain/cc0/) |
+| Newly Added Mined Corpora (Samanantar++ & Comparable) | [CC0](https://creativecommons.org/share-your-work/public-domain/cc0/) |
+| Newly Added Seed Corpora (BPCC-H-Wiki & BPCC-H-Daily) | [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/) |
+| Newly Created IN-22 test set (IN22-Gen & IN22-Conv) | [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/) |
+| Back-translation data (BPCC-BT) | [CC0](https://creativecommons.org/share-your-work/public-domain/cc0/) |
+| Model checkpoints | [MIT](https://github.com/ai4bharat/IndicTrans2/blob/main/LICENSE) |
+
+The mined corpora collection (BPCC-Mined), existing seed corpora (NLLB-Seed, ILCI, MASSIVE), Backtranslation data (BPCC-BT), are released under the following licensing scheme:
+
+- We do not own any of the text from which this data has been extracted.
+- We license the actual packaging of this data under the Creative Commons [CC0 license (“no rights reserved”)](https://creativecommons.org/share-your-work/public-domain/cc0/).
+- To the extent possible under law, [AI4Bharat](https://ai4bharat.iitm.ac.in/) has waived all copyright and related or neighboring rights to BPCC-Mined, existing seed corpora (NLLB-Seed, ILCI, MASSIVE) and BPCC-BT.
+
+## Citation
+
+```bibtex
+@article{gala2023indictrans,
+title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages},
+author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan},
+journal={Transactions on Machine Learning Research},
+issn={2835-8856},
+year={2023},
+url={https://openreview.net/forum?id=vfT4YuzAYA},
+note={}
+}
+```
diff --git a/IndicTrans2/apply_sentence_piece.sh b/IndicTrans2/apply_sentence_piece.sh
new file mode 100644
index 0000000000000000000000000000000000000000..17bb48b604f868488fd671bc5d142bfea313ea7e
--- /dev/null
+++ b/IndicTrans2/apply_sentence_piece.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# This script tokenizes the preprocessed train and dev set using the trained spm models.
+
+
+echo `date`
+exp_dir=$1 # path to the experiment directory
+data_dir=$2 # path to the data directory where all lang pairs are concatenated
+bpe_dir=$3 # path to the tokenized data directory
+src_lang=$4 # source language
+tgt_lang=$5 # target language
+split=$6 # name of the split
+parallel_installed=${7:-false} # If GNU Parallel is installed or not
+
+in_split_dir=$data_dir/$split
+out_split_dir=$bpe_dir/$split
+
+echo "Apply Sentence Piece tokenization to SRC corpus"
+# for very large datasets, it is recommended to use gnu-parallel to speed up applying bpe
+
+if $parallel_installed; then
+ parallel --pipe --keep-order \
+ spm_encode --model=$exp_dir/vocab/model.SRC \
+ --output_format=piece \
+ < $in_split_dir.$src_lang \
+ > $out_split_dir.$src_lang
+else
+ spm_encode --model=$exp_dir/vocab/model.SRC \
+ --output_format=piece \
+ < $in_split_dir.$src_lang \
+ > $out_split_dir.$src_lang
+fi
+
+echo "Apply Sentence Piece tokenization to TGT corpus"
+# for very large datasets, it is recommended to use gnu-parallel to speed up applying bpe
+
+if $parallel_installed; then
+ parallel --pipe --keep-order \
+ spm_encode --model=$exp_dir/vocab/model.TGT \
+ --output_format=piece \
+ < $in_split_dir.$tgt_lang \
+ > $out_split_dir.$tgt_lang
+else
+ spm_encode --model=$exp_dir/vocab/model.TGT \
+ --output_format=piece \
+ < $in_split_dir.$tgt_lang \
+ > $out_split_dir.$tgt_lang
+fi
\ No newline at end of file
diff --git a/IndicTrans2/baseline_eval/azure_translate.py b/IndicTrans2/baseline_eval/azure_translate.py
new file mode 100644
index 0000000000000000000000000000000000000000..37c4c111f43945c7747353058b75ea4162c47c52
--- /dev/null
+++ b/IndicTrans2/baseline_eval/azure_translate.py
@@ -0,0 +1,183 @@
+import os
+import sys
+import glob
+import requests
+from urllib.parse import urlencode
+from dotenv import dotenv_values
+import traceback
+import time
+
+flores_to_iso = {
+ "asm_Beng": "as",
+ "ben_Beng": "bn",
+ "brx_Deva": "brx",
+ "doi_Deva": "doi",
+ "eng_Latn": "en",
+ "gom_Deva": "gom",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "kan_Knda": "kn",
+ "kas_Arab": "ks",
+ "kas_Deva": "ks_Deva",
+ "mai_Deva": "mai",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "mni_Beng",
+ "mni_Mtei": "mni",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "sa",
+ "sat_Olck": "sat",
+ "snd_Arab": "sd",
+ "snd_Deva": "sd_Deva",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+}
+
+
+class AzureTranslator:
+ def __init__(
+ self,
+ subscription_key: str,
+ region: str,
+ endpoint: str = "https://api.cognitive.microsofttranslator.com",
+ ) -> None:
+ self.http_headers = {
+ "Ocp-Apim-Subscription-Key": subscription_key,
+ "Ocp-Apim-Subscription-Region": region,
+ }
+ self.translate_endpoint = endpoint + "/translate?api-version=3.0&"
+ self.languages_endpoint = endpoint + "/languages?api-version=3.0"
+
+ self.supported_languages = self.get_supported_languages()
+
+ def get_supported_languages(self) -> dict:
+ return requests.get(self.languages_endpoint).json()["translation"]
+
+ def batch_translate(self, texts: list, src_lang: str, tgt_lang: str) -> list:
+ if not texts:
+ return texts
+
+ src_lang = flores_to_iso[src_lang]
+ tgt_lang = flores_to_iso[tgt_lang]
+
+ if src_lang not in self.supported_languages:
+ raise NotImplementedError(
+ f"Source language code: `{src_lang}` not supported!"
+ )
+
+ if tgt_lang not in self.supported_languages:
+ raise NotImplementedError(
+ f"Target language code: `{tgt_lang}` not supported!"
+ )
+
+ body = [{"text": text} for text in texts]
+ query_string = urlencode(
+ {
+ "from": src_lang,
+ "to": tgt_lang,
+ }
+ )
+
+ try:
+ response = requests.post(
+ self.translate_endpoint + query_string,
+ headers=self.http_headers,
+ json=body,
+ )
+ except:
+ traceback.print_exc()
+ return None
+
+ try:
+ response = response.json()
+ except:
+ traceback.print_exc()
+ print("Response:", response.text)
+ return None
+
+ return [payload["translations"][0]["text"] for payload in response]
+
+ def text_translate(self, text: str, src_lang: str, tgt_lang: str) -> str:
+ return self.batch_translate([text], src_lang, tgt_lang)[0]
+
+
+if __name__ == "__main__":
+ root_dir = sys.argv[1]
+
+ # Expects a .env file containing the API credentials.
+ config = dotenv_values(os.path.join(os.path.dirname(__file__), ".env"))
+
+ t = AzureTranslator(
+ config["AZURE_TRANSLATOR_TEXT_SUBSCRIPTION_KEY"],
+ config["AZURE_TRANSLATOR_TEXT_REGION"],
+ config["AZURE_TRANSLATOR_TEXT_ENDPOINT"],
+ )
+
+ pairs = sorted(glob.glob(os.path.join(root_dir, "*")))
+
+ for i, pair in enumerate(pairs):
+ basename = os.path.basename(pair)
+
+ print(pair)
+
+ src_lang, tgt_lang = basename.split("-")
+
+ print(f"{src_lang} - {tgt_lang}")
+
+ # source to target translations
+ src_infname = os.path.join(pair, f"test.{src_lang}")
+ tgt_outfname = os.path.join(pair, f"test.{tgt_lang}.pred.azure")
+ if not os.path.exists(src_infname):
+ continue
+
+ src_sents = [
+ sent.replace("\n", "").strip()
+ for sent in open(src_infname, "r").read().split("\n")
+ if sent
+ ]
+
+ if not os.path.exists(tgt_outfname):
+ try:
+ translations = []
+ for i in range(0, len(src_sents), 128):
+ start, end = i, int(min(i + 128, len(src_sents)))
+ translations.extend(
+ t.batch_translate(src_sents[start:end], src_lang, tgt_lang)
+ )
+ with open(tgt_outfname, "w") as f:
+ f.write("\n".join(translations))
+
+ time.sleep(10)
+ except Exception as e:
+ print(e)
+ continue
+
+ # target to source translations
+ tgt_infname = os.path.join(pair, f"test.{tgt_lang}")
+ src_outfname = os.path.join(pair, f"test.{src_lang}.pred.azure")
+ if not os.path.exists(tgt_infname):
+ continue
+
+ tgt_sents = [
+ sent.replace("\n", "").strip()
+ for sent in open(tgt_infname, "r").read().split("\n")
+ if sent
+ ]
+
+ if not os.path.exists(src_outfname):
+ try:
+ translations = []
+ for i in range(0, len(tgt_sents), 128):
+ start, end = i, int(min(i + 128, len(tgt_sents)))
+ translations.extend(
+ t.batch_translate(tgt_sents[start:end], tgt_lang, src_lang)
+ )
+ with open(src_outfname, "w") as f:
+ f.write("\n".join(translations))
+ except Exception as e:
+ continue
+
+ time.sleep(10)
diff --git a/IndicTrans2/baseline_eval/google_translate.py b/IndicTrans2/baseline_eval/google_translate.py
new file mode 100644
index 0000000000000000000000000000000000000000..761c918f978d777bd77d94aa629dfea8a1eca5dd
--- /dev/null
+++ b/IndicTrans2/baseline_eval/google_translate.py
@@ -0,0 +1,129 @@
+import os
+import sys
+import glob
+from tqdm import tqdm
+from google.cloud import translate
+
+# Expects a json file containing the API credentials.
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.join(
+ os.path.dirname(__file__), r"api_key.json"
+)
+
+flores_to_iso = {
+ "asm_Beng": "as",
+ "ben_Beng": "bn",
+ "doi_Deva": "doi",
+ "eng_Latn": "en",
+ "gom_Deva": "gom",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "kan_Knda": "kn",
+ "mai_Deva": "mai",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Mtei": "mni_Mtei",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "sa",
+ "sat_Olck": "sat",
+ "snd_Arab": "sd",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+}
+
+
+# Copy the project id from the json file containing API credentials
+def translate_text(text, src_lang, tgt_lang, project_id="project_id"):
+
+ src_lang = flores_to_iso[src_lang]
+ tgt_lang = flores_to_iso[tgt_lang]
+
+ if src_lang == "mni_Mtei":
+ src_lang = "mni-Mtei"
+
+ if tgt_lang == "mni_Mtei":
+ tgt_lang = "mni-Mtei"
+
+ client = translate.TranslationServiceClient()
+
+ location = "global"
+
+ parent = f"projects/{project_id}/locations/{location}"
+
+ response = client.translate_text(
+ request={
+ "parent": parent,
+ "contents": [text],
+ "mime_type": "text/plain", # mime types: text/plain, text/html
+ "source_language_code": src_lang,
+ "target_language_code": tgt_lang,
+ }
+ )
+
+ translated_text = ""
+ for translation in response.translations:
+ translated_text += translation.translated_text
+
+ return translated_text
+
+
+if __name__ == "__main__":
+ root_dir = sys.argv[1]
+
+ pairs = sorted(glob.glob(os.path.join(root_dir, "*")))
+
+ for pair in pairs:
+
+ print(pair)
+
+ basename = os.path.basename(pair)
+
+ src_lang, tgt_lang = basename.split("-")
+ if src_lang not in flores_to_iso.keys() or tgt_lang not in flores_to_iso.keys():
+ continue
+
+ if src_lang == "eng_Latn":
+ lang = tgt_lang
+ else:
+ lang = src_lang
+
+ lang = flores_to_iso[lang]
+
+ if lang not in "as bn doi gom gu hi kn mai ml mni_Mtei mr ne or pa sa sd ta te ur":
+ continue
+
+ print(f"{src_lang} - {tgt_lang}")
+
+ # source to target translations
+
+ src_infname = os.path.join(pair, f"test.{src_lang}")
+ tgt_outfname = os.path.join(pair, f"test.{tgt_lang}.pred.google")
+ if os.path.exists(src_infname) and not os.path.exists(tgt_outfname):
+ src_sents = [
+ sent.replace("\n", "").strip()
+ for sent in open(src_infname, "r").read().split("\n")
+ if sent
+ ]
+ translations = [
+ translate_text(text, src_lang, tgt_lang).strip() for text in tqdm(src_sents)
+ ]
+ with open(tgt_outfname, "w") as f:
+ f.write("\n".join(translations))
+
+ # # target to source translations
+ tgt_infname = os.path.join(pair, f"test.{tgt_lang}")
+ src_outfname = os.path.join(pair, f"test.{src_lang}.pred.google")
+ if os.path.exists(tgt_infname) and not os.path.exists(src_outfname):
+ tgt_sents = [
+ sent.replace("\n", "").strip()
+ for sent in open(tgt_infname, "r").read().split("\n")
+ if sent
+ ]
+ translations = [
+ translate_text(text, tgt_lang, src_lang).strip() for text in tqdm(tgt_sents)
+ ]
+
+ with open(src_outfname, "w") as f:
+ f.write("\n".join(translations))
diff --git a/IndicTrans2/baseline_eval/m2m100_inference.py b/IndicTrans2/baseline_eval/m2m100_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..e55f7686c5f759067bf86cd67e6597168cbc6a13
--- /dev/null
+++ b/IndicTrans2/baseline_eval/m2m100_inference.py
@@ -0,0 +1,148 @@
+import os
+import re
+import sys
+from tqdm import tqdm
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+
+
+# dictionary mapping flores codes to M2M-100 supported codes
+langs_supported = {
+ "eng_Latn": "en",
+ "ben_Beng": "bn",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "kan_Knda": "kn",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "snd_Arab": "sd",
+ "tam_Taml": "ta",
+ "urd_Arab": "ur",
+}
+
+
+def predict(batch, tokenizer, model, bos_token_id):
+ encoded_batch = tokenizer(batch, padding=True, return_tensors="pt").to(model.device)
+ generated_tokens = model.generate(
+ **encoded_batch,
+ num_beams=5,
+ max_length=256,
+ min_length=0,
+ forced_bos_token_id=bos_token_id,
+ )
+ hypothesis = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+ return hypothesis
+
+
+def main(devtest_data_dir, batch_size):
+ # load the pre-trained M2M-100 tokenizer and model
+ model_name = "facebook/m2m100-12B-last-ckpt"
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+ model.eval()
+
+ # iterate over a list of language pairs from `devtest_data_dir`
+ for pair in sorted(os.listdir(devtest_data_dir)):
+ if "-" not in pair:
+ continue
+
+ src_lang, tgt_lang = pair.split("-")
+
+ # check if the source and target languages are supported
+ if (
+ src_lang not in langs_supported.keys()
+ or tgt_lang not in langs_supported.keys()
+ ):
+ print(f"Skipping {src_lang}-{tgt_lang} ...")
+ continue
+
+ # -------------------------------------------------------------------
+ # source to target evaluation
+ # -------------------------------------------------------------------
+ print(f"Evaluating {src_lang}-{tgt_lang} ...")
+
+ infname = os.path.join(devtest_data_dir, pair, f"test.{src_lang}")
+ outfname = os.path.join(devtest_data_dir, pair, f"test.{tgt_lang}.pred.m2m100")
+
+ with open(infname, "r") as f:
+ src_sents = f.read().split("\n")
+
+ add_new_line = False
+ if src_sents[-1] == "":
+ add_new_line = True
+ src_sents = src_sents[:-1]
+
+ # set the source language for tokenization
+ tokenizer.src_lang = langs_supported[src_lang]
+
+ # process sentences in batches and generate predictions
+ hypothesis = []
+ for i in tqdm(range(0, len(src_sents), batch_size)):
+ start, end = i, int(min(len(src_sents), i + batch_size))
+ batch = src_sents[start:end]
+ bos_token_id = tokenizer.lang_code_to_id[langs_supported[tgt_lang]]
+ hypothesis += predict(batch, tokenizer, model, bos_token_id)
+
+ assert len(hypothesis) == len(src_sents)
+
+ hypothesis = [
+ re.sub("\s+", " ", x.replace("\n", " ").replace("\t", " ")).strip()
+ for x in hypothesis
+ ]
+ if add_new_line:
+ hypothesis = hypothesis
+
+ with open(outfname, "w") as f:
+ f.write("\n".join(hypothesis))
+
+ # -------------------------------------------------------------------
+ # target to source evaluation
+ # -------------------------------------------------------------------
+ infname = os.path.join(devtest_data_dir, pair, f"test.{tgt_lang}")
+ outfname = os.path.join(devtest_data_dir, pair, f"test.{src_lang}.pred.m2m100")
+
+ with open(infname, "r") as f:
+ src_sents = f.read().split("\n")
+
+ add_new_line = False
+ if src_sents[-1] == "":
+ add_new_line = True
+ src_sents = src_sents[:-1]
+
+ # set the source language for tokenization
+ tokenizer.src_lang = langs_supported[tgt_lang]
+
+ # process sentences in batches and generate predictions
+ hypothesis = []
+ for i in tqdm(range(0, len(src_sents), batch_size)):
+ start, end = i, int(min(len(src_sents), i + batch_size))
+ batch = src_sents[start:end]
+ bos_token_id = tokenizer.lang_code_to_id[langs_supported[src_lang]]
+ hypothesis += predict(batch, tokenizer, model, bos_token_id)
+
+ assert len(hypothesis) == len(src_sents)
+
+ hypothesis = [
+ re.sub("\s+", " ", x.replace("\n", " ").replace("\t", " ")).strip()
+ for x in hypothesis
+ ]
+ if add_new_line:
+ hypothesis = hypothesis
+
+ with open(outfname, "w") as f:
+ f.write("\n".join(hypothesis))
+
+
+if __name__ == "__main__":
+ # expects En-X subdirectories pairs within the devtest data directory
+ devtest_data_dir = sys.argv[1]
+ batch_size = int(sys.argv[2])
+
+ if not torch.cuda.is_available():
+ print("No GPU available")
+ sys.exit(1)
+
+ main(devtest_data_dir, batch_size)
diff --git a/IndicTrans2/baseline_eval/mbart_inference.py b/IndicTrans2/baseline_eval/mbart_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad4ff2b171c40d86b9dd2ddc38d9d6503e9b0ed6
--- /dev/null
+++ b/IndicTrans2/baseline_eval/mbart_inference.py
@@ -0,0 +1,159 @@
+import os
+import re
+import sys
+from tqdm import tqdm
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+
+
+# dictionary mapping flores codes to mBART supported codes
+langs_supported = {
+ "eng_Latn": "en_XX",
+ "guj_Gujr": "gu_IN",
+ "hin_Deva": "hi_IN",
+ "npi_Deva": "ne_NP",
+ "ben_Beng": "bn_IN",
+ "mal_Mlym": "ml_IN",
+ "mar_Deva": "mr_IN",
+ "tam_Taml": "ta_IN",
+ "tel_Telu": "te_IN",
+ "urd_Arab": "ur_PK",
+}
+
+
+def predict(batch, tokenizer, model, bos_token_id):
+ encoded_batch = tokenizer(batch, padding=True, return_tensors="pt").to(model.device)
+ generated_tokens = model.generate(
+ **encoded_batch,
+ num_beams=5,
+ max_length=256,
+ min_length=0,
+ forced_bos_token_id=bos_token_id,
+ )
+ hypothesis = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+ return hypothesis
+
+
+def main(devtest_data_dir, batch_size):
+ # load the pre-trained mBART tokenizers and models for English-XX and XX-English translation
+ enxx_model_name = "facebook/mbart-large-50-one-to-many-mmt"
+ xxen_model_name = "facebook/mbart-large-50-many-to-one-mmt"
+ tokenizers = {
+ "enxx": AutoTokenizer.from_pretrained(enxx_model_name),
+ "xxen": AutoTokenizer.from_pretrained(xxen_model_name),
+ }
+ models = {
+ "enxx": AutoModelForSeq2SeqLM.from_pretrained(enxx_model_name).cuda(),
+ "xxen": AutoModelForSeq2SeqLM.from_pretrained(xxen_model_name).cuda(),
+ }
+
+ # set the models to evaluation mode
+ for model_name in models:
+ models[model_name].eval()
+
+ # iterate over a list of language pairs from `devtest_data_dir`
+ for pair in sorted(os.listdir(devtest_data_dir)):
+ if "-" not in pair:
+ continue
+
+ src_lang, tgt_lang = pair.split("-")
+
+ # check if the source and target languages are supported
+ if (
+ src_lang not in langs_supported.keys()
+ or tgt_lang not in langs_supported.keys()
+ ):
+ print(f"Skipping {src_lang}-{tgt_lang} ...")
+ continue
+
+ # -------------------------------------------------------------------
+ # source to target evaluation
+ # -------------------------------------------------------------------
+ print(f"Evaluating {src_lang}-{tgt_lang} ...")
+
+ infname = os.path.join(devtest_data_dir, pair, f"test.{src_lang}")
+ outfname = os.path.join(devtest_data_dir, pair, f"test.{tgt_lang}.pred.mbart50")
+
+ with open(infname, "r") as f:
+ src_sents = f.read().split("\n")
+
+ add_new_line = False
+ if src_sents[-1] == "":
+ add_new_line = True
+ src_sents = src_sents[:-1]
+
+ # set the source language for tokenization
+ tokenizers["enxx"].src_lang = langs_supported[src_lang]
+
+ # process sentences in batches and generate predictions
+ hypothesis = []
+ for i in tqdm(range(0, len(src_sents), batch_size)):
+ start, end = i, int(min(len(src_sents), i + batch_size))
+ batch = src_sents[start:end]
+ bos_token_id = tokenizers["enxx"].lang_code_to_id[langs_supported[tgt_lang]]
+ hypothesis += predict(
+ batch, tokenizers["enxx"], models["enxx"], bos_token_id
+ )
+
+ assert len(hypothesis) == len(src_sents)
+
+ hypothesis = [
+ re.sub("\s+", " ", x.replace("\n", " ").replace("\t", " ")).strip()
+ for x in hypothesis
+ ]
+ if add_new_line:
+ hypothesis = hypothesis
+
+ with open(outfname, "w") as f:
+ f.write("\n".join(hypothesis))
+
+ # -------------------------------------------------------------------
+ # target to source evaluation
+ # -------------------------------------------------------------------
+ infname = os.path.join(devtest_data_dir, pair, f"test.{tgt_lang}")
+ outfname = os.path.join(devtest_data_dir, pair, f"test.{src_lang}.pred.mbart50")
+
+ with open(infname, "r") as f:
+ src_sents = f.read().split("\n")
+
+ add_new_line = False
+ if src_sents[-1] == "":
+ add_new_line = True
+ src_sents = src_sents[:-1]
+
+ # set the source language for tokenization
+ tokenizers["xxen"].src_lang = langs_supported[tgt_lang]
+
+ # process sentences in batches and generate predictions
+ hypothesis = []
+ for i in tqdm(range(0, len(src_sents), batch_size)):
+ start, end = i, int(min(len(src_sents), i + batch_size))
+ batch = src_sents[start:end]
+ bos_token_id = tokenizers["xxen"].lang_code_to_id[langs_supported[src_lang]]
+ hypothesis += predict(
+ batch, tokenizers["xxen"], models["xxen"], bos_token_id
+ )
+
+ assert len(hypothesis) == len(src_sents)
+
+ hypothesis = [
+ re.sub("\s+", " ", x.replace("\n", " ").replace("\t", " ")).strip()
+ for x in hypothesis
+ ]
+ if add_new_line:
+ hypothesis = hypothesis
+
+ with open(outfname, "w") as f:
+ f.write("\n".join(hypothesis))
+
+
+if __name__ == "__main__":
+ # expects En-X subdirectories pairs within the devtest data directory
+ devtest_data_dir = sys.argv[1]
+ batch_size = int(sys.argv[2])
+
+ if not torch.cuda.is_available():
+ print("No GPU available")
+ sys.exit(1)
+
+ main(devtest_data_dir, batch_size)
diff --git a/IndicTrans2/baseline_eval/nllb_moe_cpu_inference.py b/IndicTrans2/baseline_eval/nllb_moe_cpu_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..f477abb13174f1fca232e90012fa879a8414bc8b
--- /dev/null
+++ b/IndicTrans2/baseline_eval/nllb_moe_cpu_inference.py
@@ -0,0 +1,157 @@
+import os
+import re
+import sys
+from tqdm import tqdm
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+
+langs_supported = [
+ "asm_Beng",
+ "ben_Beng",
+ "guj_Gujr",
+ "eng_Latn",
+ "hin_Deva",
+ "kas_Deva",
+ "kas_Arab",
+ "kan_Knda",
+ "mal_Mlym",
+ "mai_Deva",
+ "mar_Deva",
+ "mni_Beng",
+ "npi_Deva",
+ "ory_Orya",
+ "pan_Guru",
+ "san_Deva",
+ "snd_Arab",
+ "sat_Olck",
+ "tam_Taml",
+ "tel_Telu",
+ "urd_Arab",
+]
+
+
+def predict(batch, tokenizer, model, bos_token_id):
+ encoded_batch = tokenizer(batch, padding=True, return_tensors="pt").to(model.device)
+ generated_tokens = model.generate(
+ **encoded_batch,
+ num_beams=5,
+ max_length=256,
+ min_length=0,
+ forced_bos_token_id=bos_token_id,
+ )
+ hypothesis = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+ return hypothesis
+
+
+def main(devtest_data_dir, batch_size):
+ # load the pre-trained NLLB tokenizer and model
+ model_name = "facebook/nllb-moe-54b"
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+ model.eval()
+
+ # iterate over a list of language pairs from `devtest_data_dir`
+ for pair in sorted(os.listdir(devtest_data_dir)):
+ if "-" not in pair:
+ continue
+
+ src_lang, tgt_lang = pair.split("-")
+
+ # check if the source and target languages are supported
+ if (
+ src_lang not in langs_supported.keys()
+ or tgt_lang not in langs_supported.keys()
+ ):
+ print(f"Skipping {src_lang}-{tgt_lang} ...")
+ continue
+
+ # -------------------------------------------------------------------
+ # source to target evaluation
+ # -------------------------------------------------------------------
+ print(f"Evaluating {src_lang}-{tgt_lang} ...")
+
+ infname = os.path.join(devtest_data_dir, pair, f"test.{src_lang}")
+ outfname = os.path.join(
+ devtest_data_dir, pair, f"test.{tgt_lang}.pred.nllb_moe"
+ )
+
+ with open(infname, "r") as f:
+ src_sents = f.read().split("\n")
+
+ add_new_line = False
+ if src_sents[-1] == "":
+ add_new_line = True
+ src_sents = src_sents[:-1]
+
+ # set the source language for tokenization
+ tokenizer.src_lang = src_lang
+
+ # process sentences in batches and generate predictions
+ hypothesis = []
+ for i in tqdm(range(0, len(src_sents), batch_size)):
+ start, end = i, int(min(len(src_sents), i + batch_size))
+ batch = src_sents[start:end]
+ if tgt_lang == "sat_Olck":
+ bos_token_id = tokenizer.lang_code_to_id["sat_Beng"]
+ else:
+ bos_token_id = tokenizer.lang_code_to_id[tgt_lang]
+ hypothesis += predict(batch, tokenizer, model, bos_token_id)
+
+ assert len(hypothesis) == len(src_sents)
+
+ hypothesis = [
+ re.sub("\s+", " ", x.replace("\n", " ").replace("\t", " ")).strip()
+ for x in hypothesis
+ ]
+ if add_new_line:
+ hypothesis = hypothesis
+
+ with open(outfname, "w") as f:
+ f.write("\n".join(hypothesis))
+
+ # -------------------------------------------------------------------
+ # target to source evaluation
+ # -------------------------------------------------------------------
+ infname = os.path.join(devtest_data_dir, pair, f"test.{tgt_lang}")
+ outfname = os.path.join(
+ devtest_data_dir, pair, f"test.{src_lang}.pred.nllb_moe"
+ )
+
+ with open(infname, "r") as f:
+ src_sents = f.read().split("\n")
+
+ add_new_line = False
+ if src_sents[-1] == "":
+ add_new_line = True
+ src_sents = src_sents[:-1]
+
+ # set the source language for tokenization
+ tokenizer.src_lang = "sat_Beng" if tgt_lang == "sat_Olck" else tgt_lang
+
+ # process sentences in batches and generate predictions
+ hypothesis = []
+ for i in tqdm(range(0, len(src_sents), batch_size)):
+ start, end = i, int(min(len(src_sents), i + batch_size))
+ batch = src_sents[start:end]
+ bos_token_id = tokenizer.lang_code_to_id[langs_supported[src_lang]]
+ hypothesis += predict(batch, tokenizer, model, bos_token_id)
+
+ assert len(hypothesis) == len(src_sents)
+
+ hypothesis = [
+ re.sub("\s+", " ", x.replace("\n", " ").replace("\t", " ")).strip()
+ for x in hypothesis
+ ]
+ if add_new_line:
+ hypothesis = hypothesis
+
+ with open(outfname, "w") as f:
+ f.write("\n".join(hypothesis))
+
+
+if __name__ == "__main__":
+ # expects En-X subdirectories pairs within the devtest data directory
+ devtest_data_dir = sys.argv[1]
+ batch_size = int(sys.argv[2])
+
+ main(devtest_data_dir, batch_size)
diff --git a/IndicTrans2/compute_comet_score.sh b/IndicTrans2/compute_comet_score.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b59f1301dea1faf0029fd041e06b114fd8a95f09
--- /dev/null
+++ b/IndicTrans2/compute_comet_score.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# This script computes COMET metrics and also performs significance testing on the evaluation set
+# where each subdirectory contains En-X pair
+
+
+echo `date`
+devtest_data_dir=$1 # path to the evaluation directory
+model_name=${2-"Unbabel/wmt22-comet-da"} # name of the model checkpoint
+
+# predefined list of languages supported by COMET
+langs=(asm_Beng ben_Beng guj_Gujr hin_Deva kan_Knda mal_Mlym mar_Deva ory_Orya pan_Guru tam_Taml tel_Telu urd_Arab)
+
+# we predefine a set of systems which we consider for evaluation
+# feel free to change the below line in case you want to add or remove any system
+system=(google azure nllb mbart50 m2m100 it1 it2)
+
+
+# iterate over the list of predefined languages
+for lang in "${langs[@]}"; do
+
+ mkdir -p "$devtest_data_dir/eng_Latn-$lang/comet"
+
+ # --------------------------------------------------------------
+ # COMET score computation
+ # --------------------------------------------------------------
+
+ # iterate over the list of predefined systems
+ for sys in "${system[@]}"; do
+
+ echo "${sys}"
+
+ # en - indic direction
+ if [ -f "$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys" ]; then
+ echo "eng_Latn-${lang}"
+
+ src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
+ pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys
+ ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
+ out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_${sys}_comet.txt
+
+ # Compute COMET scores using the `comet-score`
+ comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
+ fi
+
+ # indic - en direction
+ if [ -f "$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys" ]; then
+ echo "${lang}-eng_Latn"
+
+ src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
+ pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys
+ ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
+ out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_${sys}_comet.txt
+
+ # Compute COMET scores using the `comet-score`
+ comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
+ fi
+
+ done
+
+ # --------------------------------------------------------------
+ # COMET significance testing
+ # --------------------------------------------------------------
+
+ # en - indic direction
+ src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
+ pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.*
+ ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
+ out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_comet_stat.txt
+
+ # Compute COMET significance scores using the `comet-compare`
+ comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname
+
+
+ # indic-en direction
+ src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
+ pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.*
+ ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
+ out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_comet_stat.txt
+
+ # Compute COMET significance scores using the `comet-compare`
+ comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname
+
+done
diff --git a/IndicTrans2/compute_metrics.sh b/IndicTrans2/compute_metrics.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9c0c053d468fece364cda3120297bfd078891949
--- /dev/null
+++ b/IndicTrans2/compute_metrics.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# This script compute the evaluation metrics such as BLEU, chrF, chrF++ using the
+# detokenized predictions of the translation systems using sacrebleu (version 2.3.1).
+# If the target language is:
+# English: directly use Moses tokenizer that is internally supported (`mteval-v13a`)
+# Indic: use IndicNLP tokenizers and skip tokenization step in sacrebleu.
+
+
+echo `date`
+pred_fname=$1 # path to the predction file
+ref_fname=$2 # path to the reference file
+tgt_lang=$3 # target language
+
+
+if [ $tgt_lang == 'eng_Latn' ]; then
+ # directly tokenize the prediction and reference files using sacrebleu and compute the metric
+ sacrebleu $ref_fname < $pred_fname -m bleu chrf
+ sacrebleu $ref_fname < $pred_fname -m chrf --chrf-word-order 2
+else
+
+ # indicnlp tokenize prediction and reference files before evaluation
+ input_size=`python scripts/preprocess_translate.py $ref_fname $ref_fname.tok $tgt_lang false false`
+ input_size=`python scripts/preprocess_translate.py $pred_fname $pred_fname.tok $tgt_lang false false`
+
+ # since we are tokenizing with indicnlp separately, we are setting tokenize to none here
+ sacrebleu --tokenize none $ref_fname.tok < $pred_fname.tok -m bleu chrf
+ sacrebleu --tokenize none $ref_fname.tok < $pred_fname.tok -m chrf --chrf-word-order 2
+fi
diff --git a/IndicTrans2/compute_metrics_significance.sh b/IndicTrans2/compute_metrics_significance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1a8097d96a654449b26860f543c20b256bac7d0b
--- /dev/null
+++ b/IndicTrans2/compute_metrics_significance.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# This script performs significance testing for metrics such as BLEU, chrF++ using sacrebleu on the evaluation set
+# where each subdirectory contains En-X pair
+
+
+echo `date`
+devtest_data_dir=$1 # path to the evaluation directory
+
+# we predefine a set of systems which we consider for evaluation
+# feel free to change the below line in case you want to add or remove any system
+system=(google azure nllb mbart50 m2m100 it1 it2)
+
+
+# get a list of language pairs in the `devtest_data_dir`
+pairs=$(ls -d $devtest_data_dir/eng_Latn-* | sort)
+
+
+# iterate over each language pair
+for pair in ${pairs[@]}; do
+ # extract the source and target languages from the pair name
+ pair=$(basename $pair)
+ src_lang=$(echo "$pair" | cut -d "-" -f 1)
+ tgt_lang=$(echo "$pair" | cut -d "-" -f 2)
+
+ if [[ $src_lang == "eng_Latn" ]]; then
+
+ # ----------------------------------------------------------------------
+ # en - indic direction
+ # ----------------------------------------------------------------------
+ echo "${src_lang} - ${tgt_lang}"
+
+ # find all the prediction files for different systems and tokenize it using IndicNLP
+ pred_fnames=$devtest_data_dir/$pair/test.${tgt_lang}.pred.*
+ ref_fname=$devtest_data_dir/$pair/test.${tgt_lang}
+
+ for pred_fname in $(find . -type f -name $pred_fnames); do
+ input_size=`python scripts/preprocess_translate.py $pred_fname $pred_fname.tok $tgt_lang false false`
+ done
+
+ input_size=`python scripts/preprocess_translate.py $ref_fname $ref_fname.tok $tgt_lang false false`
+
+ ref_fname=$devtest_data_dir/$pair/test.${tgt_lang}.tok
+ it2_fname=$devtest_data_dir/$pair/test.${tgt_lang}.pred.it2.tok
+ sys_fnames=$devtest_data_dir/$pair/test.${tgt_lang}.pred.*.tok
+ bleu_out_fname=$devtest_data_dir/$pair/${src_lang}_${tgt_lang}_bleu_significance.txt
+ chrF_out_fname=$devtest_data_dir/$pair/${src_lang}_${tgt_lang}_chrF++_significance.txt
+
+ sacrebleu --tokenize none $ref_fname -i $it2_fname $sys_fnames --paired-bs -m bleu --format text > $bleu_out_fname
+ sacrebleu --tokenize none $it2_fname $sys_fnames --paired-bs -m chrf --chrf-word-order 2 --format text > $chrF_out_fname
+
+ # ----------------------------------------------------------------------
+ # indic - en direction
+ # ----------------------------------------------------------------------
+ echo "${tgt_lang} - ${src_lang}"
+
+ ref_fname=$devtest_data_dir/$pair/test.${src_lang}
+ it2_fname=$devtest_data_dir/$pair/test.${src_lang}.pred.it2
+ sys_fnames=$devtest_data_dir/$pair/test.${src_lang}.pred.*
+ bleu_out_fname=$devtest_data_dir/$pair/${tgt_lang}_${src_lang}_bleu_significance.txt
+ chrF_out_fname=$devtest_data_dir/$pair/${tgt_lang}_${src_lang}_chrF++_significance.txt
+
+ sacrebleu --tokenize none $ref_fname -i $it2_fname $sys_fnames --paired-bs -m bleu --format text > $bleu_out_fname
+ sacrebleu --tokenize none $it2_fname $sys_fnames --paired-bs -m chrf --chrf-word-order 2 --format text > $chrF_out_fname
+
+ fi
diff --git a/IndicTrans2/eval.sh b/IndicTrans2/eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..71d0f59faf1a8c1948e7e1d7f7bea267802ba2de
--- /dev/null
+++ b/IndicTrans2/eval.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# This script evaluates the performance of a machine translation system
+# on a evaluation set in forward direction. For example, if the evaluation set
+# consists of language pairs, such as En-X, where En represents the English language
+# and X represents the target Indic language then this script accesses the translation
+# system from the English language (En) to the target Indic language (X) direction.
+
+
+echo `date`
+devtest_data_dir=$1 # path to the evaluation directory
+ckpt_dir=$2 # path to the checkpoint directory
+system=${3:-"it2"} # name of the machine translation system
+
+
+# get a list of language pairs in the `devtest_data_dir`
+pairs=$(ls -d $devtest_data_dir/* | sort)
+
+
+# iterate over each language pair
+for pair in ${pairs[@]}; do
+ # extract the source and target languages from the pair name
+ pair=$(basename $pair)
+ src_lang=$(echo "$pair" | cut -d "-" -f 1)
+ tgt_lang=$(echo "$pair" | cut -d "-" -f 2)
+
+ src_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$src_lang
+ tgt_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$tgt_lang
+
+ # check if the source and target files exists
+ if [ -f "$src_fname" ] && [ -f "$tgt_fname" ]; then
+ echo "Evaluating $src_lang-$tgt_lang ..."
+ else
+ echo "Skipping $src_lang-$tgt_lang ..."
+ continue
+ fi
+
+ # generate translations if the system name contains "it2"
+ if [[ $system == *"it2"* ]]; then
+ echo "Generating Translations"
+ bash joint_translate.sh $src_fname $tgt_fname.pred.$system $src_lang $tgt_lang $ckpt_dir
+ fi
+
+ # compute automatic string-based metrics if the prediction exists for the system
+ if [[ -f "${tgt_fname}.pred.${system}" ]]; then
+ echo "Computing Metrics"
+ bash compute_metrics.sh $tgt_fname.pred.$system $tgt_fname $tgt_lang > $devtest_data_dir/$src_lang-$tgt_lang/${src_lang}_${tgt_lang}_${system}_scores.txt
+ fi
+
+ # remove the intermediate files
+ rm -rf $tgt_fname.pred.$system.*
+ rm -rf $devtest_data_dir/$src_lang-$tgt_lang/*.tok
+
+done
diff --git a/IndicTrans2/eval_rev.sh b/IndicTrans2/eval_rev.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2e795a03a7ce6496288de09acd6d61808bcb0f7e
--- /dev/null
+++ b/IndicTrans2/eval_rev.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# This script evaluates the performance of a machine translation system
+# on a evaluation set in forward direction. For example, if the evaluation set
+# consists of language pairs, such as En-X, where En represents the English language
+# and X represents the target Indic language then this script accesses the translation
+# system from the target Indic language (X) to the English language (En) direction.
+
+
+echo `date`
+devtest_data_dir=$1 # path to the evaluation directory
+ckpt_dir=$2 # path to the checkpoint directory
+system=${3:-"it2"} # name of the machine translation system
+
+
+# get a list of language pairs in the `devtest_data_dir`
+pairs=$(ls -d $devtest_data_dir/* | sort)
+
+
+# iterate over each language pair
+for pair in ${pairs[@]}; do
+ # extract the source and target languages from the pair name
+ pair=$(basename $pair)
+ src_lang=$(echo "$pair" | cut -d "-" -f 1)
+ tgt_lang=$(echo "$pair" | cut -d "-" -f 2)
+
+ src_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$tgt_lang
+ tgt_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$src_lang
+
+ # check if the source and target files exists
+ # in this case, we flip the actual target file as source and vice-versa
+ if [ -f "$src_fname" ] && [ -f "$tgt_fname" ]; then
+ echo "Evaluating $src_lang-$tgt_lang ..."
+ else
+ echo "Skipping $src_lang-$tgt_lang ..."
+ continue
+ fi
+
+ # generate translations if the system name contains "it2"
+ if [[ $system == *"it2"* ]]; then
+ echo "Generating Translations"
+ bash joint_translate.sh $src_fname $tgt_fname.pred.$system $tgt_lang $src_lang $ckpt_dir
+ fi
+
+ # compute automatic string-based metrics if the prediction exists for the system
+ if [[ -f "${tgt_fname}.pred.${system}" ]]; then
+ echo "Computing Metrics"
+ bash compute_metrics.sh $tgt_fname.pred.$system $tgt_fname $src_lang > $devtest_data_dir/$src_lang-$tgt_lang/${tgt_lang}_${src_lang}_${system}_scores.txt
+ fi
+
+ # remove the intermediate files
+ rm -rf $tgt_fname.pred.$system.*
+ rm -rf $devtest_data_dir/$src_lang-$tgt_lang/*.tok
+
+done
diff --git a/IndicTrans2/finetune.sh b/IndicTrans2/finetune.sh
new file mode 100644
index 0000000000000000000000000000000000000000..633108115a00e1a82515c22340c1b54e3b00c1a3
--- /dev/null
+++ b/IndicTrans2/finetune.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# This script finetunes the pretrained translation model on the binarized data using fairseq.
+
+
+echo `date`
+exp_dir=$1 # path of the experiment directory
+model_arch=${2:-"transformer_18_18"} # model architecture (defaults to `transformer_18_18`)
+pretrained_ckpt=$3 # path to the pretrained checkpoint `.pt` file
+
+
+fairseq-train $exp_dir/final_bin \
+--max-source-positions=256 \
+--max-target-positions=256 \
+--source-lang=SRC \
+--target-lang=TGT \
+--max-update=1000000 \
+--save-interval-updates=1000 \
+--arch=$model_arch \
+--activation-fn gelu \
+--criterion=label_smoothed_cross_entropy \
+--label-smoothing=0.1 \
+--optimizer adam \
+--adam-betas "(0.9, 0.98)" \
+--lr-scheduler=inverse_sqrt \
+--clip-norm 1.0 \
+--warmup-init-lr 1e-07 \
+--lr 3e-5 \
+--warmup-updates 2000 \
+--dropout 0.2 \
+--save-dir $exp_dir/model \
+--keep-last-epochs 5 \
+--keep-interval-updates 3 \
+--patience 10 \
+--skip-invalid-size-inputs-valid-test \
+--fp16 \
+--user-dir model_configs \
+--update-freq=4 \
+--distributed-world-size 8 \
+--num-workers 24 \
+--max-tokens 1024 \
+--eval-bleu \
+--eval-bleu-args "{\"beam\": 1, \"lenpen\": 1.0, \"max_len_a\": 1.2, \"max_len_b\": 10}" \
+--eval-bleu-detok moses \
+--eval-bleu-remove-bpe sentencepiece \
+--eval-bleu-print-samples \
+--best-checkpoint-metric bleu \
+--maximize-best-checkpoint-metric \
+--restore-file $pretrained_ckpt \
+--reset-lr-scheduler \
+--reset-meters \
+--reset-dataloader \
+--reset-optimizer \
+--task translation
diff --git a/IndicTrans2/huggingface_interface/.gitignore b/IndicTrans2/huggingface_interface/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..b8676cdaa7fdc63b08e9f9bcfad521784f830715
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/.gitignore
@@ -0,0 +1 @@
+IndicTransTokenizer
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/.gitignore b/IndicTrans2/huggingface_interface/IndicTransToolkit/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d8cfb021707ecf8be6bf803b621467ec0f01e1b6
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/.gitignore
@@ -0,0 +1,4 @@
+dist/
+build/
+*.egg-info/
+*/*/__pycache__/
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/CHANGELOG.md b/IndicTrans2/huggingface_interface/IndicTransToolkit/CHANGELOG.md
new file mode 100644
index 0000000000000000000000000000000000000000..66d30ab1ae4a572c61be4a02115d4034f0c0f5e4
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/CHANGELOG.md
@@ -0,0 +1,16 @@
+# Changelog
+
+# 📢 Release v1.0.3
+- 🚨 The `IndicProcessor` class has been re-written in [Cython](https://github.com/cython/cython) for faster implementation. This gives us atleast `+10 lines/s`.
+- A new `visualize` argument as been added to `preprocess_batch` to track the processing with a `tqdm` bar.
+
+# 📢 Release v1.0.2
+- The repository has been renamed to `IndicTransToolkit`.
+- 🚨 The custom tokenizer is now **removed** from the repository. Please revert to a previous commit ([v1.0.1](https://github.com/VarunGumma/IndicTransToolkit/tree/0e68fb5872f4d821578a5252f90ad43c9649370f)) to use it **(strongly discouraged)**. The official _(and only tokenizer)_ is available on HF along with the models.
+
+# 📢 Release v1.0.0
+- The [PreTrainedTokenizer](https://huggingface.co/docs/transformers/main_classes/tokenizer) for IndicTrans2 is now available on HF 🎉🎉 Note that, you still need the `IndicProcessor` to pre-process the sentences before tokenization.
+- 🚨 **In favor of the standard PreTrainedTokenizer, we deprecated the custom tokenizer. However, this custom tokenizer will still be available here for backward compatibility, but no further updates/bug-fixes will be provided.**
+- The `indic_evaluate` function is now consolidated into a concrete `IndicEvaluator` class.
+- The data collation function for training is consolidated into a concrete `IndicDataCollator` class.
+- A simple batching method is now available in the `IndicProcessor`.
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/PKG-INFO b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..77bc04551c120aeab5efaa66edd6cbb909897d46
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/PKG-INFO
@@ -0,0 +1,130 @@
+Metadata-Version: 2.2
+Name: IndicTransToolkit
+Version: 1.0.3
+Summary: A simple, consistent, and extendable module for IndicTrans2 tokenizer compatible with HuggingFace models
+Home-page: https://github.com/VarunGumma/IndicTransToolkit
+Author: Varun Gumma
+Author-email: varun230999@gmail.com
+License: MIT
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: setuptools>=68.2.2
+Requires-Dist: torch
+Requires-Dist: cython
+Requires-Dist: sacremoses
+Requires-Dist: sentencepiece
+Requires-Dist: transformers
+Requires-Dist: sacrebleu
+Requires-Dist: indic-nlp-library-IT2@ git+https://github.com/VarunGumma/indic_nlp_library.git
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+
+# IndicTransToolkit
+
+## About
+The goal of this repository is to provide a simple, modular, and extendable toolkit for [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) and be compatible with the HuggingFace models released. Please refer to the `CHANGELOG.md` for latest developments.
+
+## Pre-requisites
+ - `Python 3.8+`
+ - [Indic NLP Library](https://github.com/VarunGumma/indic_nlp_library)
+ - Other requirements as listed in `requirements.txt`
+
+## Configuration
+ - Editable installation (Note, this may take a while):
+```bash
+git clone https://github.com/VarunGumma/IndicTransToolkit
+cd IndicTransToolkit
+
+pip install --editable . --use-pep517 # required for pip >= 25.0
+
+# in case it fails, try:
+# pip install --editable . --use-pep517 --config-settings editable_mode=compat
+```
+
+## Examples
+For the training usecase, please refer [here](https://github.com/AI4Bharat/IndicTrans2/tree/main/huggingface_interface).
+
+### PreTainedTokenizer
+```python
+import torch
+from IndicTransToolkit.processor import IndicProcessor # NOW IMPLEMENTED IN CYTHON !!
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+
+ip = IndicProcessor(inference=True)
+tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
+model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
+
+sentences = [
+ "This is a test sentence.",
+ "This is another longer different test sentence.",
+ "Please send an SMS to 9876543210 and an email on newemail123@xyz.com by 15th October, 2023.",
+]
+
+batch = ip.preprocess_batch(sentences, src_lang="eng_Latn", tgt_lang="hin_Deva", visualize=False) # set it to visualize=True to print a progress bar
+batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
+
+with torch.inference_mode():
+ outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)
+
+with tokenizer.as_target_tokenizer():
+ # This scoping is absolutely necessary, as it will instruct the tokenizer to tokenize using the target vocabulary.
+ # Failure to use this scoping will result in gibberish/unexpected predictions as the output will be de-tokenized with the source vocabulary instead.
+ outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+outputs = ip.postprocess_batch(outputs, lang="hin_Deva")
+print(outputs)
+
+>>> ['यह एक परीक्षण वाक्य है।', 'यह एक और लंबा अलग परीक्षण वाक्य है।', 'कृपया 9876543210 पर एक एस. एम. एस. भेजें और 15 अक्टूबर, 2023 तक newemail123@xyz.com पर एक ईमेल भेजें।']
+```
+
+### Evaluation
+- `IndicEvaluator` is a python implementation of [compute_metrics.sh](https://github.com/AI4Bharat/IndicTrans2/blob/main/compute_metrics.sh).
+- We have found that this python implementation gives slightly lower scores than the original `compute_metrics.sh`. So, please use this function cautiously, and feel free to raise a PR if you have found the bug/fix.
+```python
+from IndicTransToolkit import IndicEvaluator
+
+# this method returns a dictionary with BLEU and ChrF2++ scores with appropriate signatures
+evaluator = IndicEvaluator()
+scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=pred_file, refs=ref_file)
+
+# alternatively, you can pass the list of predictions and references instead of files
+# scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=preds, refs=refs)
+```
+
+## Authors
+ - Varun Gumma (varun230999@gmail.com)
+ - Jay Gala (jaygala24@gmail.com)
+ - Pranjal Agadh Chitale (pranjalchitale@gmail.com)
+ - Raj Dabre (prajdabre@gmail.com)
+
+
+## Bugs and Contribution
+Since this a bleeding-edge module, you may encounter broken stuff and import issues once in a while. In case you encounter any bugs or want additional functionalities, please feel free to raise `Issues`/`Pull Requests` or contact the authors.
+
+
+## Citation
+If you use our codebase, or models, please do cite the following paper:
+```bibtex
+@article{
+ gala2023indictrans,
+ title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages},
+ author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan},
+ journal={Transactions on Machine Learning Research},
+ issn={2835-8856},
+ year={2023},
+ url={https://openreview.net/forum?id=vfT4YuzAYA},
+ note={}
+}
+```
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/SOURCES.txt b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/SOURCES.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88be9e169f1ea2b0582199901d1159428ce6c9b1
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/SOURCES.txt
@@ -0,0 +1,15 @@
+LICENSE
+README.md
+pyproject.toml
+setup.py
+IndicTransToolkit/__init__.py
+IndicTransToolkit/collator.py
+IndicTransToolkit/evaluator.py
+IndicTransToolkit/processor.c
+IndicTransToolkit/version.py
+IndicTransToolkit.egg-info/PKG-INFO
+IndicTransToolkit.egg-info/SOURCES.txt
+IndicTransToolkit.egg-info/dependency_links.txt
+IndicTransToolkit.egg-info/not-zip-safe
+IndicTransToolkit.egg-info/requires.txt
+IndicTransToolkit.egg-info/top_level.txt
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/dependency_links.txt b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/not-zip-safe b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/not-zip-safe
new file mode 100644
index 0000000000000000000000000000000000000000..d3f5a12faa99758192ecc4ed3fc22c9249232e86
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/not-zip-safe
@@ -0,0 +1 @@
+
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/requires.txt b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/requires.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0b41de3f606df25802a04f434e72cc9b98f3eee
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/requires.txt
@@ -0,0 +1,8 @@
+setuptools>=68.2.2
+torch
+cython
+sacremoses
+sentencepiece
+transformers
+sacrebleu
+indic-nlp-library-IT2@ git+https://github.com/VarunGumma/indic_nlp_library.git
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/top_level.txt b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c33e91aba8100e26cb1db70df9b34419be0d120c
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit.egg-info/top_level.txt
@@ -0,0 +1 @@
+IndicTransToolkit
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__init__.py b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d370140808aab3aeb5990013db0fad93699d117
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__init__.py
@@ -0,0 +1,9 @@
+from .evaluator import IndicEvaluator
+from .collator import IndicDataCollator
+from .processor import IndicProcessor
+
+__all__ = [
+ "IndicEvaluator",
+ "IndicDataCollator",
+ "IndicProcessor",
+]
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-310.pyc b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e291e42aee0f90808e29c9164d6110c14d125dd8
Binary files /dev/null and b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-310.pyc differ
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-313.pyc b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08baa6cd6fe4f3b6bd84ec255d0e7f3ef7793886
Binary files /dev/null and b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-313.pyc differ
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-310.pyc b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..568522d852c44e82cc851857d7a2156b19985d45
Binary files /dev/null and b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-310.pyc differ
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-313.pyc b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c0a817555b4571f5c52148eeaf28cf758f025d4
Binary files /dev/null and b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-313.pyc differ
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-310.pyc b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e883f35ab363ee01c10e56391688c366584e9039
Binary files /dev/null and b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-310.pyc differ
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-313.pyc b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c844bfe14e8a383f01df69ed104aba20816bd4c
Binary files /dev/null and b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-313.pyc differ
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/processor.cpython-310.pyc b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/processor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df0a143c1ea780adb349783e7410783f06a6aee6
Binary files /dev/null and b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/__pycache__/processor.cpython-310.pyc differ
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/collator.py b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/collator.py
new file mode 100644
index 0000000000000000000000000000000000000000..fea421a890cb70e0809fa0f16728b15657d63131
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/collator.py
@@ -0,0 +1,74 @@
+import numpy as np
+from dataclasses import dataclass
+from typing import Any, Optional, Union
+
+from transformers.utils import PaddingStrategy
+from transformers.tokenization_utils import PreTrainedTokenizerBase
+from transformers.data.data_collator import pad_without_fast_tokenizer_warning
+
+
+@dataclass
+class IndicDataCollator:
+ tokenizer: PreTrainedTokenizerBase
+ model: Optional[Any] = None
+ padding: Union[bool, str, PaddingStrategy] = True
+ max_length: Optional[int] = None
+ pad_to_multiple_of: Optional[int] = None
+ label_pad_token_id: int = -100
+ return_tensors: str = "pt"
+
+ def __call__(self, features, return_tensors=None):
+
+ if return_tensors is None:
+ return_tensors = self.return_tensors
+
+ labels = (
+ [feature["labels"] for feature in features]
+ if "labels" in features[0].keys()
+ else None
+ )
+ # We have to pad the labels before calling `tokenizer.pad` as
+ # this method won't pad them and needs them of the same length to return tensors.
+ if labels is not None:
+ max_label_length = max(len(l) for l in labels)
+ if self.pad_to_multiple_of is not None:
+ max_label_length = (
+ (max_label_length + self.pad_to_multiple_of - 1)
+ // self.pad_to_multiple_of
+ * self.pad_to_multiple_of
+ )
+
+ # fairseq by defaults right pad the labels for seq2seq tasks
+ for feature in features:
+ remainder = [self.label_pad_token_id] * (
+ max_label_length - len(feature["labels"])
+ )
+ if isinstance(feature["labels"], list):
+ feature["labels"] = feature["labels"] + remainder
+ else:
+ feature["labels"] = np.concatenate(
+ [feature["labels"], remainder]
+ ).astype(np.int64)
+
+ self.tokenizer.padding_side = "left"
+ features = pad_without_fast_tokenizer_warning(
+ self.tokenizer,
+ features,
+ padding=self.padding,
+ max_length=self.max_length,
+ return_tensors=return_tensors,
+ pad_to_multiple_of=self.pad_to_multiple_of,
+ )
+
+ # prepare decoder_input_ids
+ if (
+ labels is not None
+ and self.model is not None
+ and hasattr(self.model, "prepare_decoder_input_ids_from_labels")
+ ):
+ decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels(
+ labels=features["labels"]
+ )
+ features["decoder_input_ids"] = decoder_input_ids
+
+ return features
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/evaluator.py b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..6860345d4c68f7c1015f6a4a725b53bd846ebc87
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/evaluator.py
@@ -0,0 +1,151 @@
+from typing import List, Union
+from sacrebleu.metrics import CHRF, BLEU
+
+from indicnlp.tokenize import indic_tokenize
+from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
+
+
+class IndicEvaluator:
+ def __init__(self):
+ # === Metrics ===
+ self._chrf2_metric = CHRF(word_order=2)
+ self._bleu_metric_13a = BLEU(tokenize="13a")
+ self._bleu_metric_none = BLEU(tokenize="none")
+
+ # === Normalizer factory and cache ===
+ self._indic_norm_factory = IndicNormalizerFactory()
+ self._normalizer_cache = {} # Cache normalizers by iso_lang
+
+ # === FLORES -> ISO codes ===
+ self._flores_codes = {
+ "asm_Beng": "as",
+ "awa_Deva": "hi",
+ "ben_Beng": "bn",
+ "bho_Deva": "hi",
+ "brx_Deva": "hi",
+ "doi_Deva": "hi",
+ "eng_Latn": "en",
+ "gom_Deva": "kK",
+ "gon_Deva": "hi",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "hne_Deva": "hi",
+ "kan_Knda": "kn",
+ "kas_Arab": "ur",
+ "kas_Deva": "hi",
+ "kha_Latn": "en",
+ "lus_Latn": "en",
+ "mag_Deva": "hi",
+ "mai_Deva": "hi",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "bn",
+ "mni_Mtei": "hi",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "hi",
+ "sat_Olck": "or",
+ "snd_Arab": "ur",
+ "snd_Deva": "hi",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+ "unr_Deva": "hi",
+ }
+
+ def _get_normalizer(self, iso_lang: str):
+ """
+ Return a cached normalizer for a given iso_lang.
+ """
+ if iso_lang not in self._normalizer_cache:
+ self._normalizer_cache[iso_lang] = self._indic_norm_factory.get_normalizer(iso_lang)
+ return self._normalizer_cache[iso_lang]
+
+ def _preprocess(self, sentences: List[str], lang: str) -> List[str]:
+ """
+ Preprocess the sentences using IndicNLP:
+ 1) Normalization (using a cached normalizer),
+ 2) Trivial tokenization.
+ """
+ iso_lang = self._flores_codes.get(lang, "hi")
+ # Fetch from cache to avoid reconstructing the normalizer
+ normalizer = self._get_normalizer(iso_lang)
+
+ # Local references for speed
+ trivial_tokenize = indic_tokenize.trivial_tokenize
+ normalize_fn = normalizer.normalize
+
+ processed_sentences = []
+ for line in sentences:
+ # single .strip() before normalizing
+ line = line.strip()
+ norm_line = normalize_fn(line)
+ tokens = trivial_tokenize(norm_line, iso_lang)
+ processed_sentences.append(" ".join(tokens))
+
+ return processed_sentences
+
+ def evaluate(
+ self,
+ tgt_lang: str,
+ preds: Union[List[str], str],
+ refs: Union[List[str], str],
+ ):
+ """
+ Evaluate BLEU and chrF2++ scores for the given predictions and references.
+ - If preds/refs are strings (filenames), read them from disk.
+ - If they are lists, evaluate them directly.
+ - For non-English languages, applies Indic NLP preprocessing before scoring.
+ """
+ assert preds is not None and refs is not None, "Predictions and References cannot be None"
+
+ # Convert file paths to lists if needed
+ if isinstance(preds, str):
+ with open(preds, "r", encoding="utf-8") as fp:
+ preds = [line.strip() for line in fp]
+ if isinstance(refs, str):
+ with open(refs, "r", encoding="utf-8") as fr:
+ refs = [line.strip() for line in fr]
+
+ assert len(preds) == len(refs), "Number of predictions and references do not match"
+
+ # Local references to metrics for speed
+ bleu_none = self._bleu_metric_none
+ bleu_13a = self._bleu_metric_13a
+ chrf2 = self._chrf2_metric
+
+ scores = {}
+
+ # For English (eng_Latn), skip Indic NLP normalization
+ if tgt_lang != "eng_Latn":
+ preds_ = self._preprocess(preds, tgt_lang)
+ refs_ = self._preprocess(refs, tgt_lang)
+
+ bleu_score = bleu_none.corpus_score(preds_, [refs_])
+ chrf_score = chrf2.corpus_score(preds_, [refs_])
+
+ scores["bleu"] = {
+ "score": round(bleu_score.score, 1),
+ "signature": bleu_none.get_signature().format(),
+ }
+ scores["chrF2++"] = {
+ "score": round(chrf_score.score, 1),
+ "signature": chrf2.get_signature().format(),
+ }
+
+ else:
+ # For English, 13a tokenization is standard
+ bleu_score = bleu_13a.corpus_score(preds, [refs])
+ chrf_score = chrf2.corpus_score(preds, [refs])
+
+ scores["bleu"] = {
+ "score": round(bleu_score.score, 1),
+ "signature": bleu_13a.get_signature().format(),
+ }
+ scores["chrF2++"] = {
+ "score": round(chrf_score.score, 1),
+ "signature": chrf2.get_signature().format(),
+ }
+
+ return scores
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.c b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.c
new file mode 100644
index 0000000000000000000000000000000000000000..f29222baaed4c8f6b9128e750e5154a0c0ff935e
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.c
@@ -0,0 +1,17851 @@
+/* Generated by Cython 3.0.12 */
+
+/* BEGIN: Cython Metadata
+{
+ "distutils": {
+ "name": "IndicTransToolkit.processor",
+ "sources": [
+ "IndicTransToolkit/processor.pyx"
+ ]
+ },
+ "module_name": "IndicTransToolkit.processor"
+}
+END: Cython Metadata */
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif /* PY_SSIZE_T_CLEAN */
+#if defined(CYTHON_LIMITED_API) && 0
+ #ifndef Py_LIMITED_API
+ #if CYTHON_LIMITED_API+0 > 0x03030000
+ #define Py_LIMITED_API CYTHON_LIMITED_API
+ #else
+ #define Py_LIMITED_API 0x03030000
+ #endif
+ #endif
+#endif
+
+#include "Python.h"
+#ifndef Py_PYTHON_H
+ #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
+ #error Cython requires Python 2.7+ or Python 3.3+.
+#else
+#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API
+#define __PYX_EXTRA_ABI_MODULE_NAME "limited"
+#else
+#define __PYX_EXTRA_ABI_MODULE_NAME ""
+#endif
+#define CYTHON_ABI "3_0_12" __PYX_EXTRA_ABI_MODULE_NAME
+#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI
+#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "."
+#define CYTHON_HEX_VERSION 0x03000CF0
+#define CYTHON_FUTURE_DIVISION 1
+#include
+#ifndef offsetof
+ #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS)
+ #ifndef __stdcall
+ #define __stdcall
+ #endif
+ #ifndef __cdecl
+ #define __cdecl
+ #endif
+ #ifndef __fastcall
+ #define __fastcall
+ #endif
+#endif
+#ifndef DL_IMPORT
+ #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+ #define DL_EXPORT(t) t
+#endif
+#define __PYX_COMMA ,
+#ifndef HAVE_LONG_LONG
+ #define HAVE_LONG_LONG
+#endif
+#ifndef PY_LONG_LONG
+ #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+ #define Py_HUGE_VAL HUGE_VAL
+#endif
+#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX
+#if defined(GRAALVM_PYTHON)
+ /* For very preliminary testing purposes. Most variables are set the same as PyPy.
+ The existence of this section does not imply that anything works or is even tested */
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_LIMITED_API 0
+ #define CYTHON_COMPILING_IN_GRAAL 1
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #undef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 0
+ #undef CYTHON_USE_TYPE_SPECS
+ #define CYTHON_USE_TYPE_SPECS 0
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #if PY_VERSION_HEX < 0x03050000
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #undef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 1
+ #undef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 0
+ #undef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 0
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_GIL
+ #define CYTHON_FAST_GIL 0
+ #undef CYTHON_METH_FASTCALL
+ #define CYTHON_METH_FASTCALL 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #ifndef CYTHON_PEP487_INIT_SUBCLASS
+ #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3)
+ #endif
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+ #undef CYTHON_USE_MODULE_STATE
+ #define CYTHON_USE_MODULE_STATE 0
+ #undef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 0
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+ #endif
+ #undef CYTHON_USE_FREELISTS
+ #define CYTHON_USE_FREELISTS 0
+#elif defined(PYPY_VERSION)
+ #define CYTHON_COMPILING_IN_PYPY 1
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_LIMITED_API 0
+ #define CYTHON_COMPILING_IN_GRAAL 0
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #undef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 0
+ #ifndef CYTHON_USE_TYPE_SPECS
+ #define CYTHON_USE_TYPE_SPECS 0
+ #endif
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #if PY_VERSION_HEX < 0x03050000
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #undef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 1
+ #undef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 0
+ #undef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 0
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_GIL
+ #define CYTHON_FAST_GIL 0
+ #undef CYTHON_METH_FASTCALL
+ #define CYTHON_METH_FASTCALL 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #ifndef CYTHON_PEP487_INIT_SUBCLASS
+ #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3)
+ #endif
+ #if PY_VERSION_HEX < 0x03090000
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+ #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT)
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+ #endif
+ #undef CYTHON_USE_MODULE_STATE
+ #define CYTHON_USE_MODULE_STATE 0
+ #undef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00)
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+ #endif
+ #undef CYTHON_USE_FREELISTS
+ #define CYTHON_USE_FREELISTS 0
+#elif defined(CYTHON_LIMITED_API)
+ #ifdef Py_LIMITED_API
+ #undef __PYX_LIMITED_VERSION_HEX
+ #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API
+ #endif
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_LIMITED_API 1
+ #define CYTHON_COMPILING_IN_GRAAL 0
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #undef CYTHON_CLINE_IN_TRACEBACK
+ #define CYTHON_CLINE_IN_TRACEBACK 0
+ #undef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 0
+ #undef CYTHON_USE_TYPE_SPECS
+ #define CYTHON_USE_TYPE_SPECS 1
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 0
+ #ifndef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #endif
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #undef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 0
+ #undef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 0
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_GIL
+ #define CYTHON_FAST_GIL 0
+ #undef CYTHON_METH_FASTCALL
+ #define CYTHON_METH_FASTCALL 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #ifndef CYTHON_PEP487_INIT_SUBCLASS
+ #define CYTHON_PEP487_INIT_SUBCLASS 1
+ #endif
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+ #undef CYTHON_USE_MODULE_STATE
+ #define CYTHON_USE_MODULE_STATE 1
+ #ifndef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 0
+ #endif
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+ #endif
+ #undef CYTHON_USE_FREELISTS
+ #define CYTHON_USE_FREELISTS 0
+#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL)
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_LIMITED_API 0
+ #define CYTHON_COMPILING_IN_GRAAL 0
+ #define CYTHON_COMPILING_IN_NOGIL 1
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #ifndef CYTHON_USE_TYPE_SPECS
+ #define CYTHON_USE_TYPE_SPECS 0
+ #endif
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #ifndef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #ifndef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #endif
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_GIL
+ #define CYTHON_FAST_GIL 0
+ #ifndef CYTHON_METH_FASTCALL
+ #define CYTHON_METH_FASTCALL 1
+ #endif
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #ifndef CYTHON_PEP487_INIT_SUBCLASS
+ #define CYTHON_PEP487_INIT_SUBCLASS 1
+ #endif
+ #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+ #endif
+ #ifndef CYTHON_USE_MODULE_STATE
+ #define CYTHON_USE_MODULE_STATE 0
+ #endif
+ #ifndef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 1
+ #endif
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 1
+ #endif
+ #ifndef CYTHON_USE_FREELISTS
+ #define CYTHON_USE_FREELISTS 0
+ #endif
+#else
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_CPYTHON 1
+ #define CYTHON_COMPILING_IN_LIMITED_API 0
+ #define CYTHON_COMPILING_IN_GRAAL 0
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #ifndef CYTHON_USE_TYPE_SPECS
+ #define CYTHON_USE_TYPE_SPECS 0
+ #endif
+ #ifndef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 1
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #ifndef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 1
+ #endif
+ #ifndef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 1
+ #endif
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #elif !defined(CYTHON_USE_UNICODE_WRITER)
+ #define CYTHON_USE_UNICODE_WRITER 1
+ #endif
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #ifndef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 1
+ #endif
+ #ifndef CYTHON_FAST_GIL
+ #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6)
+ #endif
+ #ifndef CYTHON_METH_FASTCALL
+ #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1)
+ #endif
+ #ifndef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 1
+ #endif
+ #ifndef CYTHON_PEP487_INIT_SUBCLASS
+ #define CYTHON_PEP487_INIT_SUBCLASS 1
+ #endif
+ #if PY_VERSION_HEX < 0x03050000
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+ #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT)
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+ #endif
+ #ifndef CYTHON_USE_MODULE_STATE
+ #define CYTHON_USE_MODULE_STATE 0
+ #endif
+ #if PY_VERSION_HEX < 0x030400a1
+ #undef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 0
+ #elif !defined(CYTHON_USE_TP_FINALIZE)
+ #define CYTHON_USE_TP_FINALIZE 1
+ #endif
+ #if PY_VERSION_HEX < 0x030600B1
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #elif !defined(CYTHON_USE_DICT_VERSIONS)
+ #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5)
+ #endif
+ #if PY_VERSION_HEX < 0x030700A3
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #elif !defined(CYTHON_USE_EXC_INFO_STACK)
+ #define CYTHON_USE_EXC_INFO_STACK 1
+ #endif
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 1
+ #endif
+ #ifndef CYTHON_USE_FREELISTS
+ #define CYTHON_USE_FREELISTS 1
+ #endif
+#endif
+#if !defined(CYTHON_FAST_PYCCALL)
+#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
+#endif
+#if !defined(CYTHON_VECTORCALL)
+#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1)
+#endif
+#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1)
+#if CYTHON_USE_PYLONG_INTERNALS
+ #if PY_MAJOR_VERSION < 3
+ #include "longintrepr.h"
+ #endif
+ #undef SHIFT
+ #undef BASE
+ #undef MASK
+ #ifdef SIZEOF_VOID_P
+ enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
+ #endif
+#endif
+#ifndef __has_attribute
+ #define __has_attribute(x) 0
+#endif
+#ifndef __has_cpp_attribute
+ #define __has_cpp_attribute(x) 0
+#endif
+#ifndef CYTHON_RESTRICT
+ #if defined(__GNUC__)
+ #define CYTHON_RESTRICT __restrict__
+ #elif defined(_MSC_VER) && _MSC_VER >= 1400
+ #define CYTHON_RESTRICT __restrict
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_RESTRICT restrict
+ #else
+ #define CYTHON_RESTRICT
+ #endif
+#endif
+#ifndef CYTHON_UNUSED
+ #if defined(__cplusplus)
+ /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17
+ * but leads to warnings with -pedantic, since it is a C++17 feature */
+ #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
+ #if __has_cpp_attribute(maybe_unused)
+ #define CYTHON_UNUSED [[maybe_unused]]
+ #endif
+ #endif
+ #endif
+#endif
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_UNUSED_VAR
+# if defined(__cplusplus)
+ template void CYTHON_UNUSED_VAR( const T& ) { }
+# else
+# define CYTHON_UNUSED_VAR(x) (void)(x)
+# endif
+#endif
+#ifndef CYTHON_MAYBE_UNUSED_VAR
+ #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x)
+#endif
+#ifndef CYTHON_NCP_UNUSED
+# if CYTHON_COMPILING_IN_CPYTHON
+# define CYTHON_NCP_UNUSED
+# else
+# define CYTHON_NCP_UNUSED CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_USE_CPP_STD_MOVE
+ #if defined(__cplusplus) && (\
+ __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600))
+ #define CYTHON_USE_CPP_STD_MOVE 1
+ #else
+ #define CYTHON_USE_CPP_STD_MOVE 0
+ #endif
+#endif
+#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
+#ifdef _MSC_VER
+ #ifndef _MSC_STDINT_H_
+ #if _MSC_VER < 1300
+ typedef unsigned char uint8_t;
+ typedef unsigned short uint16_t;
+ typedef unsigned int uint32_t;
+ #else
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int16 uint16_t;
+ typedef unsigned __int32 uint32_t;
+ #endif
+ #endif
+ #if _MSC_VER < 1300
+ #ifdef _WIN64
+ typedef unsigned long long __pyx_uintptr_t;
+ #else
+ typedef unsigned int __pyx_uintptr_t;
+ #endif
+ #else
+ #ifdef _WIN64
+ typedef unsigned __int64 __pyx_uintptr_t;
+ #else
+ typedef unsigned __int32 __pyx_uintptr_t;
+ #endif
+ #endif
+#else
+ #include
+ typedef uintptr_t __pyx_uintptr_t;
+#endif
+#ifndef CYTHON_FALLTHROUGH
+ #if defined(__cplusplus)
+ /* for clang __has_cpp_attribute(fallthrough) is true even before C++17
+ * but leads to warnings with -pedantic, since it is a C++17 feature */
+ #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
+ #if __has_cpp_attribute(fallthrough)
+ #define CYTHON_FALLTHROUGH [[fallthrough]]
+ #endif
+ #endif
+ #ifndef CYTHON_FALLTHROUGH
+ #if __has_cpp_attribute(clang::fallthrough)
+ #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
+ #elif __has_cpp_attribute(gnu::fallthrough)
+ #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
+ #endif
+ #endif
+ #endif
+ #ifndef CYTHON_FALLTHROUGH
+ #if __has_attribute(fallthrough)
+ #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
+ #else
+ #define CYTHON_FALLTHROUGH
+ #endif
+ #endif
+ #if defined(__clang__) && defined(__apple_build_version__)
+ #if __apple_build_version__ < 7000000
+ #undef CYTHON_FALLTHROUGH
+ #define CYTHON_FALLTHROUGH
+ #endif
+ #endif
+#endif
+#ifdef __cplusplus
+ template
+ struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);};
+ #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value)
+#else
+ #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0)
+#endif
+#if CYTHON_COMPILING_IN_PYPY == 1
+ #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000)
+#else
+ #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000)
+#endif
+#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer))
+
+#ifndef CYTHON_INLINE
+ #if defined(__clang__)
+ #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
+ #elif defined(__GNUC__)
+ #define CYTHON_INLINE __inline__
+ #elif defined(_MSC_VER)
+ #define CYTHON_INLINE __inline
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_INLINE inline
+ #else
+ #define CYTHON_INLINE
+ #endif
+#endif
+
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+ #define __Pyx_DefaultClassType PyClass_Type
+ #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+ PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#else
+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+ #define __Pyx_DefaultClassType PyType_Type
+#if CYTHON_COMPILING_IN_LIMITED_API
+ static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f,
+ PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+ PyObject *fv, PyObject *cell, PyObject* fn,
+ PyObject *name, int fline, PyObject *lnos) {
+ PyObject *exception_table = NULL;
+ PyObject *types_module=NULL, *code_type=NULL, *result=NULL;
+ #if __PYX_LIMITED_VERSION_HEX < 0x030B0000
+ PyObject *version_info;
+ PyObject *py_minor_version = NULL;
+ #endif
+ long minor_version = 0;
+ PyObject *type, *value, *traceback;
+ PyErr_Fetch(&type, &value, &traceback);
+ #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000
+ minor_version = 11;
+ #else
+ if (!(version_info = PySys_GetObject("version_info"))) goto end;
+ if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end;
+ minor_version = PyLong_AsLong(py_minor_version);
+ Py_DECREF(py_minor_version);
+ if (minor_version == -1 && PyErr_Occurred()) goto end;
+ #endif
+ if (!(types_module = PyImport_ImportModule("types"))) goto end;
+ if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end;
+ if (minor_version <= 7) {
+ (void)p;
+ result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code,
+ c, n, v, fn, name, fline, lnos, fv, cell);
+ } else if (minor_version <= 10) {
+ result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code,
+ c, n, v, fn, name, fline, lnos, fv, cell);
+ } else {
+ if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end;
+ result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code,
+ c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell);
+ }
+ end:
+ Py_XDECREF(code_type);
+ Py_XDECREF(exception_table);
+ Py_XDECREF(types_module);
+ if (type) {
+ PyErr_Restore(type, value, traceback);
+ }
+ return result;
+ }
+ #ifndef CO_OPTIMIZED
+ #define CO_OPTIMIZED 0x0001
+ #endif
+ #ifndef CO_NEWLOCALS
+ #define CO_NEWLOCALS 0x0002
+ #endif
+ #ifndef CO_VARARGS
+ #define CO_VARARGS 0x0004
+ #endif
+ #ifndef CO_VARKEYWORDS
+ #define CO_VARKEYWORDS 0x0008
+ #endif
+ #ifndef CO_ASYNC_GENERATOR
+ #define CO_ASYNC_GENERATOR 0x0200
+ #endif
+ #ifndef CO_GENERATOR
+ #define CO_GENERATOR 0x0020
+ #endif
+ #ifndef CO_COROUTINE
+ #define CO_COROUTINE 0x0080
+ #endif
+#elif PY_VERSION_HEX >= 0x030B0000
+ static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f,
+ PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+ PyObject *fv, PyObject *cell, PyObject* fn,
+ PyObject *name, int fline, PyObject *lnos) {
+ PyCodeObject *result;
+ PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0);
+ if (!empty_bytes) return NULL;
+ result =
+ #if PY_VERSION_HEX >= 0x030C0000
+ PyUnstable_Code_NewWithPosOnlyArgs
+ #else
+ PyCode_NewWithPosOnlyArgs
+ #endif
+ (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes);
+ Py_DECREF(empty_bytes);
+ return result;
+ }
+#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+ PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#else
+ #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+#endif
+#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE)
+ #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type)
+#else
+ #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type))
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is)
+ #define __Pyx_Py_Is(x, y) Py_Is(x, y)
+#else
+ #define __Pyx_Py_Is(x, y) ((x) == (y))
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone)
+ #define __Pyx_Py_IsNone(ob) Py_IsNone(ob)
+#else
+ #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None)
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue)
+ #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob)
+#else
+ #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True)
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse)
+ #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob)
+#else
+ #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False)
+#endif
+#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj))
+#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o)
+#else
+ #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o)
+#endif
+#ifndef CO_COROUTINE
+ #define CO_COROUTINE 0x80
+#endif
+#ifndef CO_ASYNC_GENERATOR
+ #define CO_ASYNC_GENERATOR 0x200
+#endif
+#ifndef Py_TPFLAGS_CHECKTYPES
+ #define Py_TPFLAGS_CHECKTYPES 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_INDEX
+ #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_FINALIZE
+ #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#ifndef Py_TPFLAGS_SEQUENCE
+ #define Py_TPFLAGS_SEQUENCE 0
+#endif
+#ifndef Py_TPFLAGS_MAPPING
+ #define Py_TPFLAGS_MAPPING 0
+#endif
+#ifndef METH_STACKLESS
+ #define METH_STACKLESS 0
+#endif
+#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
+ #ifndef METH_FASTCALL
+ #define METH_FASTCALL 0x80
+ #endif
+ typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
+ typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
+ Py_ssize_t nargs, PyObject *kwnames);
+#else
+ #if PY_VERSION_HEX >= 0x030d00A4
+ # define __Pyx_PyCFunctionFast PyCFunctionFast
+ # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords
+ #else
+ # define __Pyx_PyCFunctionFast _PyCFunctionFast
+ # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
+ #endif
+#endif
+#if CYTHON_METH_FASTCALL
+ #define __Pyx_METH_FASTCALL METH_FASTCALL
+ #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast
+ #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords
+#else
+ #define __Pyx_METH_FASTCALL METH_VARARGS
+ #define __Pyx_PyCFunction_FastCall PyCFunction
+ #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords
+#endif
+#if CYTHON_VECTORCALL
+ #define __pyx_vectorcallfunc vectorcallfunc
+ #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET
+ #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n))
+#elif CYTHON_BACKPORT_VECTORCALL
+ typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args,
+ size_t nargsf, PyObject *kwnames);
+ #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1))
+ #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET))
+#else
+ #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0
+ #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n))
+#endif
+#if PY_MAJOR_VERSION >= 0x030900B1
+#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func)
+#else
+#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func)
+#endif
+#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func)
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth)
+#elif !CYTHON_COMPILING_IN_LIMITED_API
+#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags)
+static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) {
+ return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self;
+}
+#endif
+static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+ return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc;
+#else
+ return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc;
+#endif
+}
+#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc)
+#if __PYX_LIMITED_VERSION_HEX < 0x030900B1
+ #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b))
+ typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *);
+#else
+ #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b)
+ #define __Pyx_PyCMethod PyCMethod
+#endif
+#ifndef METH_METHOD
+ #define METH_METHOD 0x200
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
+ #define PyObject_Malloc(s) PyMem_Malloc(s)
+ #define PyObject_Free(p) PyMem_Free(p)
+ #define PyObject_Realloc(p) PyMem_Realloc(p)
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+ #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0)
+ #define __Pyx_PyFrame_SetLineNumber(frame, lineno)
+#else
+ #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0)
+ #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno)
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+ #define __Pyx_PyThreadState_Current PyThreadState_Get()
+#elif !CYTHON_FAST_THREAD_STATE
+ #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#elif PY_VERSION_HEX >= 0x030d00A1
+ #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked()
+#elif PY_VERSION_HEX >= 0x03060000
+ #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
+#elif PY_VERSION_HEX >= 0x03000000
+ #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#else
+ #define __Pyx_PyThreadState_Current _PyThreadState_Current
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op)
+{
+ void *result;
+ result = PyModule_GetState(op);
+ if (!result)
+ Py_FatalError("Couldn't find the module state");
+ return result;
+}
+#endif
+#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype)
+#if CYTHON_COMPILING_IN_LIMITED_API
+ #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name))
+#else
+ #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name)
+#endif
+#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
+#include "pythread.h"
+#define Py_tss_NEEDS_INIT 0
+typedef int Py_tss_t;
+static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
+ *key = PyThread_create_key();
+ return 0;
+}
+static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
+ Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
+ *key = Py_tss_NEEDS_INIT;
+ return key;
+}
+static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
+ PyObject_Free(key);
+}
+static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
+ return *key != Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
+ PyThread_delete_key(*key);
+ *key = Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
+ return PyThread_set_key_value(*key, value);
+}
+static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
+ return PyThread_get_key_value(*key);
+}
+#endif
+#if PY_MAJOR_VERSION < 3
+ #if CYTHON_COMPILING_IN_PYPY
+ #if PYPY_VERSION_NUM < 0x07030600
+ #if defined(__cplusplus) && __cplusplus >= 201402L
+ [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]]
+ #elif defined(__GNUC__) || defined(__clang__)
+ __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")))
+ #elif defined(_MSC_VER)
+ __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))
+ #endif
+ static CYTHON_INLINE int PyGILState_Check(void) {
+ return 0;
+ }
+ #else // PYPY_VERSION_NUM < 0x07030600
+ #endif // PYPY_VERSION_NUM < 0x07030600
+ #else
+ static CYTHON_INLINE int PyGILState_Check(void) {
+ PyThreadState * tstate = _PyThreadState_Current;
+ return tstate && (tstate == PyGILState_GetThisThreadState());
+ }
+ #endif
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized)
+#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
+#else
+#define __Pyx_PyDict_NewPresized(n) PyDict_New()
+#endif
+#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
+#else
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS
+#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
+static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) {
+ PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name);
+ if (res == NULL) PyErr_Clear();
+ return res;
+}
+#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000)
+#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError
+#define __Pyx_PyDict_GetItemStr PyDict_GetItem
+#else
+static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) {
+#if CYTHON_COMPILING_IN_PYPY
+ return PyDict_GetItem(dict, name);
+#else
+ PyDictEntry *ep;
+ PyDictObject *mp = (PyDictObject*) dict;
+ long hash = ((PyStringObject *) name)->ob_shash;
+ assert(hash != -1);
+ ep = (mp->ma_lookup)(mp, name, hash);
+ if (ep == NULL) {
+ return NULL;
+ }
+ return ep->me_value;
+#endif
+}
+#define __Pyx_PyDict_GetItemStr PyDict_GetItem
+#endif
+#if CYTHON_USE_TYPE_SLOTS
+ #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags)
+ #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0)
+ #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext)
+#else
+ #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp))
+ #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature)
+ #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+ #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v)
+#else
+ #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v)
+#endif
+#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000
+#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\
+ PyTypeObject *type = Py_TYPE((PyObject*)obj);\
+ assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\
+ PyObject_GC_Del(obj);\
+ Py_DECREF(type);\
+}
+#else
+#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj)
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+ #define CYTHON_PEP393_ENABLED 1
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i)
+ #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U)
+ #define __Pyx_PyUnicode_KIND(u) ((void)u, (0))
+ #define __Pyx_PyUnicode_DATA(u) ((void*)u)
+ #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i))
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u))
+#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+ #define CYTHON_PEP393_ENABLED 1
+ #if PY_VERSION_HEX >= 0x030C0000
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #else
+ #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
+ 0 : _PyUnicode_Ready((PyObject *)(op)))
+ #endif
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+ #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u)
+ #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u))
+ #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
+ #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
+ #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch)
+ #if PY_VERSION_HEX >= 0x030C0000
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
+ #else
+ #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
+ #else
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+ #endif
+ #endif
+#else
+ #define CYTHON_PEP393_ENABLED 0
+ #define PyUnicode_1BYTE_KIND 1
+ #define PyUnicode_2BYTE_KIND 2
+ #define PyUnicode_4BYTE_KIND 4
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+ #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U)
+ #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE))
+ #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
+ #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+ #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch)
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
+#else
+ #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
+ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+ #if !defined(PyUnicode_DecodeUnicodeEscape)
+ #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors)
+ #endif
+ #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500)
+ #undef PyUnicode_Contains
+ #define PyUnicode_Contains(u, s) PySequence_Contains(u, s)
+ #endif
+ #if !defined(PyByteArray_Check)
+ #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type)
+ #endif
+ #if !defined(PyObject_Format)
+ #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt)
+ #endif
+#endif
+#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
+#else
+ #define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
+ #define PyObject_ASCII(o) PyObject_Repr(o)
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBaseString_Type PyUnicode_Type
+ #define PyStringObject PyUnicodeObject
+ #define PyString_Type PyUnicode_Type
+ #define PyString_Check PyUnicode_Check
+ #define PyString_CheckExact PyUnicode_CheckExact
+#ifndef PyObject_Unicode
+ #define PyObject_Unicode PyObject_Str
+#endif
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+ #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+ #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+ #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+ #define __Pyx_PySequence_ListKeepNew(obj)\
+ (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj))
+#else
+ #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj)
+#endif
+#ifndef PySet_CheckExact
+ #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type)
+#endif
+#if PY_VERSION_HEX >= 0x030900A4
+ #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
+ #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
+#else
+ #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
+ #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
+#endif
+#if CYTHON_ASSUME_SAFE_MACROS
+ #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i)
+ #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
+ #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0))
+ #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0))
+ #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o)
+ #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o)
+ #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o)
+ #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o)
+ #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o)
+#else
+ #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i)
+ #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq)
+ #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v)
+ #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v)
+ #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o)
+ #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o)
+ #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o)
+ #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o)
+ #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o)
+#endif
+#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1
+ #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name)
+#else
+ static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) {
+ PyObject *module = PyImport_AddModule(name);
+ Py_XINCREF(module);
+ return module;
+ }
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyIntObject PyLongObject
+ #define PyInt_Type PyLong_Type
+ #define PyInt_Check(op) PyLong_Check(op)
+ #define PyInt_CheckExact(op) PyLong_CheckExact(op)
+ #define __Pyx_Py3Int_Check(op) PyLong_Check(op)
+ #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op)
+ #define PyInt_FromString PyLong_FromString
+ #define PyInt_FromUnicode PyLong_FromUnicode
+ #define PyInt_FromLong PyLong_FromLong
+ #define PyInt_FromSize_t PyLong_FromSize_t
+ #define PyInt_FromSsize_t PyLong_FromSsize_t
+ #define PyInt_AsLong PyLong_AsLong
+ #define PyInt_AS_LONG PyLong_AS_LONG
+ #define PyInt_AsSsize_t PyLong_AsSsize_t
+ #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
+ #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+ #define PyNumber_Int PyNumber_Long
+#else
+ #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op))
+ #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op))
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBoolObject PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+ #ifndef PyUnicode_InternFromString
+ #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+ #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+ typedef long Py_hash_t;
+ #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+ #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t
+#else
+ #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+ #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t
+#endif
+#if CYTHON_USE_ASYNC_SLOTS
+ #if PY_VERSION_HEX >= 0x030500B1
+ #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
+ #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
+ #else
+ #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
+ #endif
+#else
+ #define __Pyx_PyType_AsAsync(obj) NULL
+#endif
+#ifndef __Pyx_PyAsyncMethodsStruct
+ typedef struct {
+ unaryfunc am_await;
+ unaryfunc am_aiter;
+ unaryfunc am_anext;
+ } __Pyx_PyAsyncMethodsStruct;
+#endif
+
+#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS)
+ #if !defined(_USE_MATH_DEFINES)
+ #define _USE_MATH_DEFINES
+ #endif
+#endif
+#include
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+ float value;
+ memset(&value, 0xFF, sizeof(value));
+ return value;
+}
+#endif
+#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
+#define __Pyx_truncl trunc
+#else
+#define __Pyx_truncl truncl
+#endif
+
+#define __PYX_MARK_ERR_POS(f_index, lineno) \
+ { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
+#define __PYX_ERR(f_index, lineno, Ln_error) \
+ { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
+
+#ifdef CYTHON_EXTERN_C
+ #undef __PYX_EXTERN_C
+ #define __PYX_EXTERN_C CYTHON_EXTERN_C
+#elif defined(__PYX_EXTERN_C)
+ #ifdef _MSC_VER
+ #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.")
+ #else
+ #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.
+ #endif
+#else
+ #ifdef __cplusplus
+ #define __PYX_EXTERN_C extern "C"
+ #else
+ #define __PYX_EXTERN_C extern
+ #endif
+#endif
+
+#define __PYX_HAVE__IndicTransToolkit__processor
+#define __PYX_HAVE_API__IndicTransToolkit__processor
+/* Early includes */
+#ifdef _OPENMP
+#include
+#endif /* _OPENMP */
+
+#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
+ const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_uchar_cast(c) ((unsigned char)c)
+#define __Pyx_long_cast(x) ((long)x)
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\
+ (sizeof(type) < sizeof(Py_ssize_t)) ||\
+ (sizeof(type) > sizeof(Py_ssize_t) &&\
+ likely(v < (type)PY_SSIZE_T_MAX ||\
+ v == (type)PY_SSIZE_T_MAX) &&\
+ (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
+ v == (type)PY_SSIZE_T_MIN))) ||\
+ (sizeof(type) == sizeof(Py_ssize_t) &&\
+ (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
+ v == (type)PY_SSIZE_T_MAX))) )
+static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
+ return (size_t) i < (size_t) limit;
+}
+#if defined (__cplusplus) && __cplusplus >= 201103L
+ #include
+ #define __Pyx_sst_abs(value) std::abs(value)
+#elif SIZEOF_INT >= SIZEOF_SIZE_T
+ #define __Pyx_sst_abs(value) abs(value)
+#elif SIZEOF_LONG >= SIZEOF_SIZE_T
+ #define __Pyx_sst_abs(value) labs(value)
+#elif defined (_MSC_VER)
+ #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define __Pyx_sst_abs(value) llabs(value)
+#elif defined (__GNUC__)
+ #define __Pyx_sst_abs(value) __builtin_llabs(value)
+#else
+ #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
+#endif
+static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*);
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+ #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o)
+#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
+#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
+#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
+#define __Pyx_PySequence_Tuple(obj)\
+ (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
+#if CYTHON_ASSUME_SAFE_MACROS
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
+#else
+#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+ #if PY_VERSION_HEX >= 0x030C00A7
+ #ifndef _PyLong_SIGN_MASK
+ #define _PyLong_SIGN_MASK 3
+ #endif
+ #ifndef _PyLong_NON_SIZE_BITS
+ #define _PyLong_NON_SIZE_BITS 3
+ #endif
+ #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK)
+ #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0)
+ #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x))
+ #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1)
+ #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0)
+ #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0])
+ #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS))
+ #define __Pyx_PyLong_SignedDigitCount(x)\
+ ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x))
+ #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue)
+ #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x)
+ #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x)
+ #else
+ #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS))
+ #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0])
+ #endif
+ typedef Py_ssize_t __Pyx_compact_pylong;
+ typedef size_t __Pyx_compact_upylong;
+ #else
+ #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0)
+ #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0)
+ #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0)
+ #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0)
+ #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0])
+ #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x))
+ #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x)
+ #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1)
+ #define __Pyx_PyLong_CompactValue(x)\
+ ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0]))
+ typedef sdigit __Pyx_compact_pylong;
+ typedef digit __Pyx_compact_upylong;
+ #endif
+ #if PY_VERSION_HEX >= 0x030C00A5
+ #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit)
+ #else
+ #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit)
+ #endif
+#endif
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+#include
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ PyObject* ascii_chars_u = NULL;
+ PyObject* ascii_chars_b = NULL;
+ const char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ if (strcmp(default_encoding_c, "ascii") == 0) {
+ __Pyx_sys_getdefaultencoding_not_ascii = 0;
+ } else {
+ char ascii_chars[128];
+ int c;
+ for (c = 0; c < 128; c++) {
+ ascii_chars[c] = (char) c;
+ }
+ __Pyx_sys_getdefaultencoding_not_ascii = 1;
+ ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+ if (!ascii_chars_u) goto bad;
+ ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+ if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+ PyErr_Format(
+ PyExc_ValueError,
+ "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+ default_encoding_c);
+ goto bad;
+ }
+ Py_DECREF(ascii_chars_u);
+ Py_DECREF(ascii_chars_b);
+ }
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ Py_XDECREF(ascii_chars_u);
+ Py_XDECREF(ascii_chars_b);
+ return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+#include
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
+ if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+ strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+ #define likely(x) __builtin_expect(!!(x), 1)
+ #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+ #define likely(x) (x)
+ #define unlikely(x) (x)
+#endif /* __GNUC__ */
+static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }
+
+#if !CYTHON_USE_MODULE_STATE
+static PyObject *__pyx_m = NULL;
+#endif
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm = __FILE__;
+static const char *__pyx_filename;
+
+/* #### Code section: filename_table ### */
+
+static const char *__pyx_f[] = {
+ "IndicTransToolkit\\\\processor.pyx",
+ "",
+};
+/* #### Code section: utility_code_proto_before_types ### */
+/* ForceInitThreads.proto */
+#ifndef __PYX_FORCE_INIT_THREADS
+ #define __PYX_FORCE_INIT_THREADS 0
+#endif
+
+/* #### Code section: numeric_typedefs ### */
+/* #### Code section: complex_type_declarations ### */
+/* #### Code section: type_declarations ### */
+
+/*--- Type declarations ---*/
+struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor;
+struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch;
+struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch;
+
+/* "IndicTransToolkit/processor.pyx":449
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ * cpdef list preprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] batch,
+ */
+struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch {
+ int __pyx_n;
+ PyObject *tgt_lang;
+ int is_target;
+ int visualize;
+};
+
+/* "IndicTransToolkit/processor.pyx":479
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ * cpdef list postprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] sents,
+ */
+struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch {
+ int __pyx_n;
+ PyObject *lang;
+ int visualize;
+};
+
+/* "IndicTransToolkit/processor.pyx":20
+ *
+ *
+ * cdef class IndicProcessor: # <<<<<<<<<<<<<<
+ * cdef public bint inference
+ *
+ */
+struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor {
+ PyObject_HEAD
+ struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *__pyx_vtab;
+ int inference;
+ PyObject *_MULTISPACE_REGEX;
+ PyObject *_DIGIT_SPACE_PERCENT;
+ PyObject *_DOUBLE_QUOT_PUNC;
+ PyObject *_DIGIT_NBSP_DIGIT;
+ PyObject *_END_BRACKET_SPACE_PUNC_REGEX;
+ PyObject *_URL_PATTERN;
+ PyObject *_NUMERAL_PATTERN;
+ PyObject *_EMAIL_PATTERN;
+ PyObject *_OTHER_PATTERN;
+ PyObject *_PUNC_REPLACEMENTS;
+ PyObject *_INDIC_FAILURE_CASES;
+ PyObject *_flores_codes;
+ PyObject *_digits_translation_table;
+ PyObject *_placeholder_entity_maps;
+ PyObject *_en_tok;
+ PyObject *_en_normalizer;
+ PyObject *_en_detok;
+ PyObject *_xliterator;
+};
+
+
+
+struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor {
+ PyObject *(*_apply_punc_replacements)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *);
+ PyObject *(*_punc_norm)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *);
+ PyObject *(*_wrap_with_placeholders)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *);
+ PyObject *(*_normalize)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *);
+ PyObject *(*_do_indic_tokenize_and_transliterate)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, int);
+ PyObject *(*_preprocess)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, PyObject *, int);
+ PyObject *(*_postprocess)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *);
+ PyObject *(*preprocess_batch)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args);
+ PyObject *(*postprocess_batch)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args);
+};
+static struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *__pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor;
+/* #### Code section: utility_code_proto ### */
+
+/* --- Runtime support code (head) --- */
+/* Refnanny.proto */
+#ifndef CYTHON_REFNANNY
+ #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+ typedef struct {
+ void (*INCREF)(void*, PyObject*, Py_ssize_t);
+ void (*DECREF)(void*, PyObject*, Py_ssize_t);
+ void (*GOTREF)(void*, PyObject*, Py_ssize_t);
+ void (*GIVEREF)(void*, PyObject*, Py_ssize_t);
+ void* (*SetupContext)(const char*, Py_ssize_t, const char*);
+ void (*FinishContext)(void**);
+ } __Pyx_RefNannyAPIStruct;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+ #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+ if (acquire_gil) {\
+ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\
+ PyGILState_Release(__pyx_gilstate_save);\
+ } else {\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\
+ }
+ #define __Pyx_RefNannyFinishContextNogil() {\
+ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+ __Pyx_RefNannyFinishContext();\
+ PyGILState_Release(__pyx_gilstate_save);\
+ }
+#else
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__))
+ #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext()
+#endif
+ #define __Pyx_RefNannyFinishContextNogil() {\
+ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+ __Pyx_RefNannyFinishContext();\
+ PyGILState_Release(__pyx_gilstate_save);\
+ }
+ #define __Pyx_RefNannyFinishContext()\
+ __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+ #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+ #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+ #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+ #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+ #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0)
+ #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0)
+ #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0)
+ #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0)
+#else
+ #define __Pyx_RefNannyDeclarations
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)
+ #define __Pyx_RefNannyFinishContextNogil()
+ #define __Pyx_RefNannyFinishContext()
+ #define __Pyx_INCREF(r) Py_INCREF(r)
+ #define __Pyx_DECREF(r) Py_DECREF(r)
+ #define __Pyx_GOTREF(r)
+ #define __Pyx_GIVEREF(r)
+ #define __Pyx_XINCREF(r) Py_XINCREF(r)
+ #define __Pyx_XDECREF(r) Py_XDECREF(r)
+ #define __Pyx_XGOTREF(r)
+ #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_Py_XDECREF_SET(r, v) do {\
+ PyObject *tmp = (PyObject *) r;\
+ r = v; Py_XDECREF(tmp);\
+ } while (0)
+#define __Pyx_XDECREF_SET(r, v) do {\
+ PyObject *tmp = (PyObject *) r;\
+ r = v; __Pyx_XDECREF(tmp);\
+ } while (0)
+#define __Pyx_DECREF_SET(r, v) do {\
+ PyObject *tmp = (PyObject *) r;\
+ r = v; __Pyx_DECREF(tmp);\
+ } while (0)
+#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/* PyErrExceptionMatches.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err)
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err);
+#else
+#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err)
+#endif
+
+/* PyThreadStateGet.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate;
+#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current;
+#if PY_VERSION_HEX >= 0x030C00A6
+#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL)
+#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL)
+#else
+#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL)
+#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type)
+#endif
+#else
+#define __Pyx_PyThreadState_declare
+#define __Pyx_PyThreadState_assign
+#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL)
+#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred()
+#endif
+
+/* PyErrFetchRestore.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
+#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6
+#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
+#else
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#endif
+#else
+#define __Pyx_PyErr_Clear() PyErr_Clear()
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb)
+#endif
+
+/* PyObjectGetAttrStr.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+/* PyObjectGetAttrStrNoError.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name);
+
+/* GetBuiltinName.proto */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+/* TupleAndListFromArray.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n);
+static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n);
+#endif
+
+/* IncludeStringH.proto */
+#include
+
+/* BytesEquals.proto */
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals);
+
+/* UnicodeEquals.proto */
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals);
+
+/* fastcall.proto */
+#if CYTHON_AVOID_BORROWED_REFS
+ #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i)
+#elif CYTHON_ASSUME_SAFE_MACROS
+ #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i)
+#else
+ #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i)
+#endif
+#if CYTHON_AVOID_BORROWED_REFS
+ #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg)
+ #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg)
+#else
+ #define __Pyx_Arg_NewRef_VARARGS(arg) arg
+ #define __Pyx_Arg_XDECREF_VARARGS(arg)
+#endif
+#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds)
+#define __Pyx_KwValues_VARARGS(args, nargs) NULL
+#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s)
+#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw)
+#if CYTHON_METH_FASTCALL
+ #define __Pyx_Arg_FASTCALL(args, i) args[i]
+ #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds)
+ #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs))
+ static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s);
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000
+ CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues);
+ #else
+ #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw)
+ #endif
+ #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs
+ to have the same reference counting */
+ #define __Pyx_Arg_XDECREF_FASTCALL(arg)
+#else
+ #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS
+ #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS
+ #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS
+ #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS
+ #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS
+ #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg)
+ #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start)
+#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start)
+#else
+#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop)
+#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop)
+#endif
+
+/* RaiseDoubleKeywords.proto */
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+/* ParseKeywords.proto */
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues,
+ PyObject **argnames[],
+ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,
+ const char* function_name);
+
+/* RaiseArgTupleInvalid.proto */
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+ Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+/* PyFunctionFastCall.proto */
+#if CYTHON_FAST_PYCALL
+#if !CYTHON_VECTORCALL
+#define __Pyx_PyFunction_FastCall(func, args, nargs)\
+ __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL)
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs);
+#endif
+#define __Pyx_BUILD_ASSERT_EXPR(cond)\
+ (sizeof(char [1 - 2*!(cond)]) - 1)
+#ifndef Py_MEMBER_SIZE
+#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member)
+#endif
+#if !CYTHON_VECTORCALL
+#if PY_VERSION_HEX >= 0x03080000
+ #include "frameobject.h"
+#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API && !defined(PYPY_VERSION)
+ #ifndef Py_BUILD_CORE
+ #define Py_BUILD_CORE 1
+ #endif
+ #include "internal/pycore_frame.h"
+#endif
+ #define __Pxy_PyFrame_Initialize_Offsets()
+ #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus)
+#else
+ static size_t __pyx_pyframe_localsplus_offset = 0;
+ #include "frameobject.h"
+ #define __Pxy_PyFrame_Initialize_Offsets()\
+ ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\
+ (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus)))
+ #define __Pyx_PyFrame_GetLocalsplus(frame)\
+ (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset))
+#endif
+#endif
+#endif
+
+/* PyObjectCall.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/* PyObjectCallMethO.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
+#endif
+
+/* PyObjectFastCall.proto */
+#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL)
+static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs);
+
+/* IterFinish.proto */
+static CYTHON_INLINE int __Pyx_IterFinish(void);
+
+/* PyObjectCallNoArg.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func);
+
+/* PyObjectCallOneArg.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
+
+/* PyObjectGetMethod.proto */
+static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method);
+
+/* PyObjectCallMethod0.proto */
+static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name);
+
+/* RaiseNeedMoreValuesToUnpack.proto */
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
+
+/* RaiseTooManyValuesToUnpack.proto */
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
+
+/* UnpackItemEndCheck.proto */
+static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected);
+
+/* RaiseNoneIterError.proto */
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
+
+/* UnpackTupleError.proto */
+static void __Pyx_UnpackTupleError(PyObject *, Py_ssize_t index);
+
+/* UnpackTuple2.proto */
+#define __Pyx_unpack_tuple2(tuple, value1, value2, is_tuple, has_known_size, decref_tuple)\
+ (likely(is_tuple || PyTuple_Check(tuple)) ?\
+ (likely(has_known_size || PyTuple_GET_SIZE(tuple) == 2) ?\
+ __Pyx_unpack_tuple2_exact(tuple, value1, value2, decref_tuple) :\
+ (__Pyx_UnpackTupleError(tuple, 2), -1)) :\
+ __Pyx_unpack_tuple2_generic(tuple, value1, value2, has_known_size, decref_tuple))
+static CYTHON_INLINE int __Pyx_unpack_tuple2_exact(
+ PyObject* tuple, PyObject** value1, PyObject** value2, int decref_tuple);
+static int __Pyx_unpack_tuple2_generic(
+ PyObject* tuple, PyObject** value1, PyObject** value2, int has_known_size, int decref_tuple);
+
+/* dict_iter.proto */
+static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* dict, int is_dict, PyObject* method_name,
+ Py_ssize_t* p_orig_length, int* p_is_dict);
+static CYTHON_INLINE int __Pyx_dict_iter_next(PyObject* dict_or_iter, Py_ssize_t orig_length, Py_ssize_t* ppos,
+ PyObject** pkey, PyObject** pvalue, PyObject** pitem, int is_dict);
+
+/* UnicodeAsUCS4.proto */
+static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject*);
+
+/* object_ord.proto */
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyObject_Ord(c)\
+ (likely(PyUnicode_Check(c)) ? (long)__Pyx_PyUnicode_AsPy_UCS4(c) : __Pyx__PyObject_Ord(c))
+#else
+#define __Pyx_PyObject_Ord(c) __Pyx__PyObject_Ord(c)
+#endif
+static long __Pyx__PyObject_Ord(PyObject* c);
+
+/* PyDictVersioning.proto */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1)
+#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
+ (version_var) = __PYX_GET_DICT_VERSION(dict);\
+ (cache_var) = (value);
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
+ static PY_UINT64_T __pyx_dict_version = 0;\
+ static PyObject *__pyx_dict_cached_value = NULL;\
+ if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
+ (VAR) = __pyx_dict_cached_value;\
+ } else {\
+ (VAR) = __pyx_dict_cached_value = (LOOKUP);\
+ __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
+ }\
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
+#else
+#define __PYX_GET_DICT_VERSION(dict) (0)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP);
+#endif
+
+/* GetModuleGlobalName.proto */
+#if CYTHON_USE_DICT_VERSIONS
+#define __Pyx_GetModuleGlobalName(var, name) do {\
+ static PY_UINT64_T __pyx_dict_version = 0;\
+ static PyObject *__pyx_dict_cached_value = NULL;\
+ (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
+ (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
+ __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\
+ PY_UINT64_T __pyx_dict_version;\
+ PyObject *__pyx_dict_cached_value;\
+ (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
+#else
+#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name)
+#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name)
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
+#endif
+
+/* IncludeStructmemberH.proto */
+#include
+
+/* FixUpExtensionType.proto */
+#if CYTHON_USE_TYPE_SPECS
+static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type);
+#endif
+
+/* FetchSharedCythonModule.proto */
+static PyObject *__Pyx_FetchSharedCythonABIModule(void);
+
+/* FetchCommonType.proto */
+#if !CYTHON_USE_TYPE_SPECS
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type);
+#else
+static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases);
+#endif
+
+/* PyMethodNew.proto */
+#if CYTHON_COMPILING_IN_LIMITED_API
+static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) {
+ PyObject *typesModule=NULL, *methodType=NULL, *result=NULL;
+ CYTHON_UNUSED_VAR(typ);
+ if (!self)
+ return __Pyx_NewRef(func);
+ typesModule = PyImport_ImportModule("types");
+ if (!typesModule) return NULL;
+ methodType = PyObject_GetAttrString(typesModule, "MethodType");
+ Py_DECREF(typesModule);
+ if (!methodType) return NULL;
+ result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL);
+ Py_DECREF(methodType);
+ return result;
+}
+#elif PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) {
+ CYTHON_UNUSED_VAR(typ);
+ if (!self)
+ return __Pyx_NewRef(func);
+ return PyMethod_New(func, self);
+}
+#else
+ #define __Pyx_PyMethod_New PyMethod_New
+#endif
+
+/* PyVectorcallFastCallDict.proto */
+#if CYTHON_METH_FASTCALL
+static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw);
+#endif
+
+/* CythonFunctionShared.proto */
+#define __Pyx_CyFunction_USED
+#define __Pyx_CYFUNCTION_STATICMETHOD 0x01
+#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02
+#define __Pyx_CYFUNCTION_CCLASS 0x04
+#define __Pyx_CYFUNCTION_COROUTINE 0x08
+#define __Pyx_CyFunction_GetClosure(f)\
+ (((__pyx_CyFunctionObject *) (f))->func_closure)
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+ #define __Pyx_CyFunction_GetClassObj(f)\
+ (((__pyx_CyFunctionObject *) (f))->func_classobj)
+#else
+ #define __Pyx_CyFunction_GetClassObj(f)\
+ ((PyObject*) ((PyCMethodObject *) (f))->mm_class)
+#endif
+#define __Pyx_CyFunction_SetClassObj(f, classobj)\
+ __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj))
+#define __Pyx_CyFunction_Defaults(type, f)\
+ ((type *)(((__pyx_CyFunctionObject *) (f))->defaults))
+#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\
+ ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g)
+typedef struct {
+#if CYTHON_COMPILING_IN_LIMITED_API
+ PyObject_HEAD
+ PyObject *func;
+#elif PY_VERSION_HEX < 0x030900B1
+ PyCFunctionObject func;
+#else
+ PyCMethodObject func;
+#endif
+#if CYTHON_BACKPORT_VECTORCALL
+ __pyx_vectorcallfunc func_vectorcall;
+#endif
+#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API
+ PyObject *func_weakreflist;
+#endif
+ PyObject *func_dict;
+ PyObject *func_name;
+ PyObject *func_qualname;
+ PyObject *func_doc;
+ PyObject *func_globals;
+ PyObject *func_code;
+ PyObject *func_closure;
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+ PyObject *func_classobj;
+#endif
+ void *defaults;
+ int defaults_pyobjects;
+ size_t defaults_size;
+ int flags;
+ PyObject *defaults_tuple;
+ PyObject *defaults_kwdict;
+ PyObject *(*defaults_getter)(PyObject *);
+ PyObject *func_annotations;
+ PyObject *func_is_coroutine;
+} __pyx_CyFunctionObject;
+#undef __Pyx_CyOrPyCFunction_Check
+#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType)
+#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type)
+#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType)
+static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc);
+#undef __Pyx_IsSameCFunction
+#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc)
+static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml,
+ int flags, PyObject* qualname,
+ PyObject *closure,
+ PyObject *module, PyObject *globals,
+ PyObject* code);
+static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj);
+static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m,
+ size_t size,
+ int pyobjects);
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m,
+ PyObject *tuple);
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m,
+ PyObject *dict);
+static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m,
+ PyObject *dict);
+static int __pyx_CyFunction_init(PyObject *module);
+#if CYTHON_METH_FASTCALL
+static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+#if CYTHON_BACKPORT_VECTORCALL
+#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall)
+#else
+#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall)
+#endif
+#endif
+
+/* CythonFunction.proto */
+static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml,
+ int flags, PyObject* qualname,
+ PyObject *closure,
+ PyObject *module, PyObject *globals,
+ PyObject* code);
+
+/* RaiseUnexpectedTypeError.proto */
+static int __Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj);
+
+/* UnpackUnboundCMethod.proto */
+typedef struct {
+ PyObject *type;
+ PyObject **method_name;
+ PyCFunction func;
+ PyObject *method;
+ int flag;
+} __Pyx_CachedCFunction;
+
+/* CallUnboundCMethod0.proto */
+static PyObject* __Pyx__CallUnboundCMethod0(__Pyx_CachedCFunction* cfunc, PyObject* self);
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_CallUnboundCMethod0(cfunc, self)\
+ (likely((cfunc)->func) ?\
+ (likely((cfunc)->flag == METH_NOARGS) ? (*((cfunc)->func))(self, NULL) :\
+ (PY_VERSION_HEX >= 0x030600B1 && likely((cfunc)->flag == METH_FASTCALL) ?\
+ (PY_VERSION_HEX >= 0x030700A0 ?\
+ (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)(cfunc)->func)(self, &__pyx_empty_tuple, 0) :\
+ (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)(cfunc)->func)(self, &__pyx_empty_tuple, 0, NULL)) :\
+ (PY_VERSION_HEX >= 0x030700A0 && (cfunc)->flag == (METH_FASTCALL | METH_KEYWORDS) ?\
+ (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)(cfunc)->func)(self, &__pyx_empty_tuple, 0, NULL) :\
+ (likely((cfunc)->flag == (METH_VARARGS | METH_KEYWORDS)) ? ((*(PyCFunctionWithKeywords)(void*)(PyCFunction)(cfunc)->func)(self, __pyx_empty_tuple, NULL)) :\
+ ((cfunc)->flag == METH_VARARGS ? (*((cfunc)->func))(self, __pyx_empty_tuple) :\
+ __Pyx__CallUnboundCMethod0(cfunc, self)))))) :\
+ __Pyx__CallUnboundCMethod0(cfunc, self))
+#else
+#define __Pyx_CallUnboundCMethod0(cfunc, self) __Pyx__CallUnboundCMethod0(cfunc, self)
+#endif
+
+/* set_iter.proto */
+static CYTHON_INLINE PyObject* __Pyx_set_iterator(PyObject* iterable, int is_set,
+ Py_ssize_t* p_orig_length, int* p_source_is_set);
+static CYTHON_INLINE int __Pyx_set_iter_next(
+ PyObject* iter_obj, Py_ssize_t orig_length,
+ Py_ssize_t* ppos, PyObject **value,
+ int source_is_set);
+
+/* GCCDiagnostics.proto */
+#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+#define __Pyx_HAS_GCC_DIAGNOSTIC
+#endif
+
+/* BuildPyUnicode.proto */
+static PyObject* __Pyx_PyUnicode_BuildFromAscii(Py_ssize_t ulength, char* chars, int clength,
+ int prepend_sign, char padding_char);
+
+/* CIntToPyUnicode.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_int(int value, Py_ssize_t width, char padding_char, char format_char);
+
+/* JoinPyUnicode.proto */
+static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength,
+ Py_UCS4 max_char);
+
+/* PyObjectFormatSimple.proto */
+#if CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyObject_FormatSimple(s, f) (\
+ likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\
+ PyObject_Format(s, f))
+#elif PY_MAJOR_VERSION < 3
+ #define __Pyx_PyObject_FormatSimple(s, f) (\
+ likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\
+ likely(PyString_CheckExact(s)) ? PyUnicode_FromEncodedObject(s, NULL, "strict") :\
+ PyObject_Format(s, f))
+#elif CYTHON_USE_TYPE_SLOTS
+ #define __Pyx_PyObject_FormatSimple(s, f) (\
+ likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\
+ likely(PyLong_CheckExact(s)) ? PyLong_Type.tp_repr(s) :\
+ likely(PyFloat_CheckExact(s)) ? PyFloat_Type.tp_repr(s) :\
+ PyObject_Format(s, f))
+#else
+ #define __Pyx_PyObject_FormatSimple(s, f) (\
+ likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\
+ PyObject_Format(s, f))
+#endif
+
+/* UnicodeConcatInPlace.proto */
+# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+ #if CYTHON_REFNANNY
+ #define __Pyx_PyUnicode_ConcatInPlace(left, right) __Pyx_PyUnicode_ConcatInPlaceImpl(&left, right, __pyx_refnanny)
+ #else
+ #define __Pyx_PyUnicode_ConcatInPlace(left, right) __Pyx_PyUnicode_ConcatInPlaceImpl(&left, right)
+ #endif
+ static CYTHON_INLINE PyObject *__Pyx_PyUnicode_ConcatInPlaceImpl(PyObject **p_left, PyObject *right
+ #if CYTHON_REFNANNY
+ , void* __pyx_refnanny
+ #endif
+ );
+#else
+#define __Pyx_PyUnicode_ConcatInPlace __Pyx_PyUnicode_Concat
+#endif
+#define __Pyx_PyUnicode_ConcatInPlaceSafe(left, right) ((unlikely((left) == Py_None) || unlikely((right) == Py_None)) ?\
+ PyNumber_InPlaceAdd(left, right) : __Pyx_PyUnicode_ConcatInPlace(left, right))
+
+/* CallUnboundCMethod1.proto */
+static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg);
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg);
+#else
+#define __Pyx_CallUnboundCMethod1(cfunc, self, arg) __Pyx__CallUnboundCMethod1(cfunc, self, arg)
+#endif
+
+/* dict_getitem_default.proto */
+static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value);
+
+/* CallUnboundCMethod2.proto */
+static PyObject* __Pyx__CallUnboundCMethod2(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg1, PyObject* arg2);
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030600B1
+static CYTHON_INLINE PyObject *__Pyx_CallUnboundCMethod2(__Pyx_CachedCFunction *cfunc, PyObject *self, PyObject *arg1, PyObject *arg2);
+#else
+#define __Pyx_CallUnboundCMethod2(cfunc, self, arg1, arg2) __Pyx__CallUnboundCMethod2(cfunc, self, arg1, arg2)
+#endif
+
+/* GetItemInt.proto */
+#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+ __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\
+ (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\
+ __Pyx_GetItemInt_Generic(o, to_py_func(i))))
+#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+ __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+ (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+ int wraparound, int boundscheck);
+#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+ __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+ (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+ int wraparound, int boundscheck);
+static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
+ int is_list, int wraparound, int boundscheck);
+
+/* PyUnicode_Unicode.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Unicode(PyObject *obj);
+
+/* ListCompAppend.proto */
+#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
+static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) {
+ PyListObject* L = (PyListObject*) list;
+ Py_ssize_t len = Py_SIZE(list);
+ if (likely(L->allocated > len)) {
+ Py_INCREF(x);
+ #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000
+ L->ob_item[len] = x;
+ #else
+ PyList_SET_ITEM(list, len, x);
+ #endif
+ __Pyx_SET_SIZE(list, len + 1);
+ return 0;
+ }
+ return PyList_Append(list, x);
+}
+#else
+#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x)
+#endif
+
+/* ArgTypeTest.proto */
+#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\
+ ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\
+ __Pyx__ArgTypeTest(obj, type, name, exact))
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact);
+
+/* KeywordStringCheck.proto */
+static int __Pyx_CheckKeywordStrings(PyObject *kw, const char* function_name, int kw_allowed);
+
+/* RaiseException.proto */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+/* ValidateBasesTuple.proto */
+#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS
+static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases);
+#endif
+
+/* PyType_Ready.proto */
+CYTHON_UNUSED static int __Pyx_PyType_Ready(PyTypeObject *t);
+
+/* PyObject_GenericGetAttrNoDict.proto */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr
+#endif
+
+/* PyObject_GenericGetAttr.proto */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr
+#endif
+
+/* SetVTable.proto */
+static int __Pyx_SetVtable(PyTypeObject* typeptr , void* vtable);
+
+/* GetVTable.proto */
+static void* __Pyx_GetVtable(PyTypeObject *type);
+
+/* MergeVTables.proto */
+#if !CYTHON_COMPILING_IN_LIMITED_API
+static int __Pyx_MergeVtables(PyTypeObject *type);
+#endif
+
+/* SetupReduce.proto */
+#if !CYTHON_COMPILING_IN_LIMITED_API
+static int __Pyx_setup_reduce(PyObject* type_obj);
+#endif
+
+/* Import.proto */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+/* ImportDottedModule.proto */
+static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple);
+#if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple);
+#endif
+
+/* ImportFrom.proto */
+static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name);
+
+/* CLineInTraceback.proto */
+#ifdef CYTHON_CLINE_IN_TRACEBACK
+#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
+#else
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
+#endif
+
+/* CodeObjectCache.proto */
+#if !CYTHON_COMPILING_IN_LIMITED_API
+typedef struct {
+ PyCodeObject* code_object;
+ int code_line;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+ int count;
+ int max_count;
+ __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+#endif
+
+/* AddTraceback.proto */
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
+
+/* FormatTypeName.proto */
+#if CYTHON_COMPILING_IN_LIMITED_API
+typedef PyObject *__Pyx_TypeName;
+#define __Pyx_FMT_TYPENAME "%U"
+static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp);
+#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj)
+#else
+typedef const char *__Pyx_TypeName;
+#define __Pyx_FMT_TYPENAME "%.200s"
+#define __Pyx_PyType_GetName(tp) ((tp)->tp_name)
+#define __Pyx_DECREF_TypeName(obj)
+#endif
+
+/* FastTypeChecks.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
+#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2)
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
+#else
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2))
+#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
+#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
+#endif
+#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2)
+#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)
+
+/* CheckBinaryVersion.proto */
+static unsigned long __Pyx_get_runtime_version(void);
+static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer);
+
+/* InitStrings.proto */
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+/* #### Code section: module_declarations ### */
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__apply_punc_replacements(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text, PyObject *__pyx_v_replacements); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__punc_norm(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__wrap_with_placeholders(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__normalize(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__do_indic_tokenize_and_transliterate(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_normalizer, PyObject *__pyx_v_iso_lang, int __pyx_v_transliterate); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__preprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_src_lang, PyObject *__pyx_v_tgt_lang, PyObject *__pyx_v_normalizer, int __pyx_v_is_target); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__postprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_lang); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_batch, PyObject *__pyx_v_src_lang, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args); /* proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sents, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args); /* proto*/
+
+/* Module declarations from "IndicTransToolkit.processor" */
+/* #### Code section: typeinfo ### */
+/* #### Code section: before_global_var ### */
+#define __Pyx_MODULE_NAME "IndicTransToolkit.processor"
+extern int __pyx_module_is_main_IndicTransToolkit__processor;
+int __pyx_module_is_main_IndicTransToolkit__processor = 0;
+
+/* Implementation of "IndicTransToolkit.processor" */
+/* #### Code section: global_var ### */
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_chr;
+static PyObject *__pyx_builtin_TypeError;
+/* #### Code section: string_decls ### */
+static const char __pyx_k_[] = "\340\247\246";
+static const char __pyx_k_0[] = "0";
+static const char __pyx_k_1[] = "1";
+static const char __pyx_k_2[] = "2";
+static const char __pyx_k_3[] = "3";
+static const char __pyx_k_4[] = "4";
+static const char __pyx_k_5[] = "5";
+static const char __pyx_k_6[] = "6";
+static const char __pyx_k_7[] = "7";
+static const char __pyx_k_8[] = "8";
+static const char __pyx_k_9[] = "9";
+static const char __pyx_k_C[] = "\302\240\302\272C";
+static const char __pyx_k_d[] = "(\\d) %";
+static const char __pyx_k_m[] = "m";
+static const char __pyx_k_n[] = "n\302\272\302\240";
+static const char __pyx_k_r[] = "\\r";
+static const char __pyx_k_s[] = "\\(\\s*";
+static const char __pyx_k_ID[] = "";
+static const char __pyx_k__142[] = " >";
+static const char __pyx_k__143[] = "]";
+static const char __pyx_k__144[] = " ]";
+static const char __pyx_k__145[] = "<";
+static const char __pyx_k__146[] = "< ";
+static const char __pyx_k__147[] = "[";
+static const char __pyx_k__148[] = "[ ";
+static const char __pyx_k__149[] = ">/";
+static const char __pyx_k__151[] = "]/";
+static const char __pyx_k__153[] = " \340\245\215 ";
+static const char __pyx_k__154[] = "\340\245\215";
+static const char __pyx_k__155[] = "_";
+static const char __pyx_k__156[] = " \330\237";
+static const char __pyx_k__157[] = "\330\237";
+static const char __pyx_k__159[] = " \333\224";
+static const char __pyx_k__160[] = "\333\224";
+static const char __pyx_k__162[] = " \330\214";
+static const char __pyx_k__163[] = "\330\214";
+static const char __pyx_k__165[] = "\331\256\333\252";
+static const char __pyx_k__166[] = "\330\240";
+static const char __pyx_k__168[] = "\340\254\257\340\254\274";
+static const char __pyx_k__169[] = "\340\255\237";
+static const char __pyx_k__171[] = "*";
+static const char __pyx_k__182[] = "?";
+static const char __pyx_k_desc[] = "desc";
+static const char __pyx_k_lang[] = "lang";
+static const char __pyx_k_line[] = "line";
+static const char __pyx_k_main[] = "__main__";
+static const char __pyx_k_name[] = "__name__";
+static const char __pyx_k_self[] = "self";
+static const char __pyx_k_spec[] = "__spec__";
+static const char __pyx_k_test[] = "__test__";
+static const char __pyx_k_tqdm[] = "tqdm";
+static const char __pyx_k_unit[] = "unit";
+static const char __pyx_k_1_2_2[] = "\\1.\\2";
+static const char __pyx_k_Queue[] = "Queue";
+static const char __pyx_k_Union[] = "Union";
+static const char __pyx_k_batch[] = "batch";
+static const char __pyx_k_clear[] = "clear";
+static const char __pyx_k_group[] = "group";
+static const char __pyx_k_items[] = "items";
+static const char __pyx_k_queue[] = "queue";
+static const char __pyx_k_range[] = "range";
+static const char __pyx_k_regex[] = "regex";
+static const char __pyx_k_s_s_2[] = "\\s;\\s?";
+static const char __pyx_k_sents[] = "sents";
+static const char __pyx_k_split[] = "split";
+static const char __pyx_k_strip[] = "strip";
+static const char __pyx_k_total[] = "total";
+static const char __pyx_k_enable[] = "enable";
+static const char __pyx_k_escape[] = "escape";
+static const char __pyx_k_import[] = "__import__";
+static const char __pyx_k_reduce[] = "__reduce__";
+static const char __pyx_k_typing[] = "typing";
+static const char __pyx_k_compile[] = "compile";
+static const char __pyx_k_disable[] = "disable";
+static const char __pyx_k_findall[] = "findall";
+static const char __pyx_k_replace[] = "replace";
+static const char __pyx_k_asm_Beng[] = "asm_Beng";
+static const char __pyx_k_awa_Deva[] = "awa_Deva";
+static const char __pyx_k_ben_Beng[] = "ben_Beng";
+static const char __pyx_k_bho_Deva[] = "bho_Deva";
+static const char __pyx_k_brx_Deva[] = "brx_Deva";
+static const char __pyx_k_doi_Deva[] = "doi_Deva";
+static const char __pyx_k_eng_Latn[] = "eng_Latn";
+static const char __pyx_k_getstate[] = "__getstate__";
+static const char __pyx_k_gom_Deva[] = "gom_Deva";
+static const char __pyx_k_gon_Deva[] = "gon_Deva";
+static const char __pyx_k_guj_Gujr[] = "guj_Gujr";
+static const char __pyx_k_hin_Deva[] = "hin_Deva";
+static const char __pyx_k_hne_Deva[] = "hne_Deva";
+static const char __pyx_k_kan_Knda[] = "kan_Knda";
+static const char __pyx_k_kas_Arab[] = "kas_Arab";
+static const char __pyx_k_kas_Deva[] = "kas_Deva";
+static const char __pyx_k_kha_Latn[] = "kha_Latn";
+static const char __pyx_k_lus_Latn[] = "lus_Latn";
+static const char __pyx_k_mag_Deva[] = "mag_Deva";
+static const char __pyx_k_mai_Deva[] = "mai_Deva";
+static const char __pyx_k_mal_Mlym[] = "mal_Mlym";
+static const char __pyx_k_mar_Deva[] = "mar_Deva";
+static const char __pyx_k_mni_Beng[] = "mni_Beng";
+static const char __pyx_k_mni_Mtei[] = "mni_Mtei";
+static const char __pyx_k_npi_Deva[] = "npi_Deva";
+static const char __pyx_k_ory_Orya[] = "ory_Orya";
+static const char __pyx_k_pan_Guru[] = "pan_Guru";
+static const char __pyx_k_san_Deva[] = "san_Deva";
+static const char __pyx_k_sat_Olck[] = "sat_Olck";
+static const char __pyx_k_setstate[] = "__setstate__";
+static const char __pyx_k_snd_Arab[] = "snd_Arab";
+static const char __pyx_k_snd_Deva[] = "snd_Deva";
+static const char __pyx_k_src_lang[] = "src_lang";
+static const char __pyx_k_tam_Taml[] = "tam_Taml";
+static const char __pyx_k_tel_Telu[] = "tel_Telu";
+static const char __pyx_k_tgt_lang[] = "tgt_lang";
+static const char __pyx_k_tokenize[] = "tokenize";
+static const char __pyx_k_unr_Deva[] = "unr_Deva";
+static const char __pyx_k_urd_Arab[] = "urd_Arab";
+static const char __pyx_k_TypeError[] = "TypeError";
+static const char __pyx_k_inference[] = "inference";
+static const char __pyx_k_is_target[] = "is_target";
+static const char __pyx_k_isenabled[] = "isenabled";
+static const char __pyx_k_normalize[] = "normalize";
+static const char __pyx_k_pyx_state[] = "__pyx_state";
+static const char __pyx_k_reduce_ex[] = "__reduce_ex__";
+static const char __pyx_k_translate[] = "translate";
+static const char __pyx_k_visualize[] = "visualize";
+static const char __pyx_k_detokenize[] = "detokenize";
+static const char __pyx_k_pyx_vtable[] = "__pyx_vtable__";
+static const char __pyx_k_sacremoses[] = "sacremoses";
+static const char __pyx_k_A_Za_z0_9_w[] = "[A-Za-z0-9]*[#|@]\\w+";
+static const char __pyx_k_initializing[] = "_initializing";
+static const char __pyx_k_is_coroutine[] = "_is_coroutine";
+static const char __pyx_k_stringsource[] = "";
+static const char __pyx_k_reduce_cython[] = "__reduce_cython__";
+static const char __pyx_k_transliterate[] = "transliterate";
+static const char __pyx_k_IndicProcessor[] = "IndicProcessor";
+static const char __pyx_k_MosesTokenizer[] = "MosesTokenizer";
+static const char __pyx_k_Pre_processing[] = " | > Pre-processing ";
+static const char __pyx_k_get_normalizer[] = "get_normalizer";
+static const char __pyx_k_indic_tokenize[] = "indic_tokenize";
+static const char __pyx_k_Post_processing[] = " | > Post-processing ";
+static const char __pyx_k_setstate_cython[] = "__setstate_cython__";
+static const char __pyx_k_MosesDetokenizer[] = "MosesDetokenizer";
+static const char __pyx_k_indic_detokenize[] = "indic_detokenize";
+static const char __pyx_k_preprocess_batch[] = "preprocess_batch";
+static const char __pyx_k_trivial_tokenize[] = "trivial_tokenize";
+static const char __pyx_k_indicnlp_tokenize[] = "indicnlp.tokenize";
+static const char __pyx_k_postprocess_batch[] = "postprocess_batch";
+static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines";
+static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
+static const char __pyx_k_trivial_detokenize[] = "trivial_detokenize";
+static const char __pyx_k_MosesPunctNormalizer[] = "MosesPunctNormalizer";
+static const char __pyx_k_b_w_https_ftp_w_w_w_b[] = "\\b(?.";
+static const char __pyx_k_IndicNormalizerFactory[] = "IndicNormalizerFactory";
+static const char __pyx_k_UnicodeIndicTransliterator[] = "UnicodeIndicTransliterator";
+static const char __pyx_k_IndicTransToolkit_processor[] = "IndicTransToolkit.processor";
+static const char __pyx_k_IndicProcessor___reduce_cython[] = "IndicProcessor.__reduce_cython__";
+static const char __pyx_k_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2[] = "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}";
+static const char __pyx_k_Cython_version_of_the_IndicProc[] = "\nCython version of the IndicProcessor class with optimizations for performance.\nOnly preprocess_batch and postprocess_batch are exposed as cpdef methods.\nAll other methods are internal (cdef) for optimized Cython usage.\n";
+static const char __pyx_k_IndicProcessor_preprocess_batch[] = "IndicProcessor.preprocess_batch";
+static const char __pyx_k_IndicTransToolkit_processor_pyx[] = "IndicTransToolkit\\processor.pyx";
+static const char __pyx_k_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d[] = "(~?\\d+\\.?\\d*\\s?%?\\s?-?\\s?~?\\d+\\.?\\d*\\s?%|~?\\d+%|\\d+[-\\/.,:']\\d+[-\\/.,:'+]\\d+(?:\\.\\d+)?|\\d+[-\\/.:'+]\\d+(?:\\.\\d+)?)";
+static const char __pyx_k_IndicProcessor___setstate_cython[] = "IndicProcessor.__setstate_cython__";
+static const char __pyx_k_IndicProcessor_postprocess_batch[] = "IndicProcessor.postprocess_batch";
+static const char __pyx_k_indicnlp_normalize_indic_normali[] = "indicnlp.normalize.indic_normalize";
+static const char __pyx_k_indicnlp_transliterate_unicode_t[] = "indicnlp.transliterate.unicode_transliterate";
+static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__";
+/* #### Code section: decls ### */
+static PyObject *__pyx_lambda_funcdef_lambda(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_m); /* proto */
+static int __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor___cinit__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, int __pyx_v_inference); /* proto */
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_batch, PyObject *__pyx_v_src_lang, PyObject *__pyx_v_tgt_lang, int __pyx_v_is_target, int __pyx_v_visualize); /* proto */
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sents, PyObject *__pyx_v_lang, int __pyx_v_visualize); /* proto */
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference___get__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self); /* proto */
+static int __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference_2__set__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_value); /* proto */
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_6__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_8__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */
+static PyObject *__pyx_tp_new_17IndicTransToolkit_9processor_IndicProcessor(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static __Pyx_CachedCFunction __pyx_umethod_PyDict_Type_get = {0, 0, 0, 0, 0};
+static __Pyx_CachedCFunction __pyx_umethod_PyUnicode_Type_strip = {0, 0, 0, 0, 0};
+static __Pyx_CachedCFunction __pyx_umethod_PyUnicode_Type_translate = {0, 0, 0, 0, 0};
+/* #### Code section: late_includes ### */
+/* #### Code section: module_state ### */
+typedef struct {
+ PyObject *__pyx_d;
+ PyObject *__pyx_b;
+ PyObject *__pyx_cython_runtime;
+ PyObject *__pyx_empty_tuple;
+ PyObject *__pyx_empty_bytes;
+ PyObject *__pyx_empty_unicode;
+ #ifdef __Pyx_CyFunction_USED
+ PyTypeObject *__pyx_CyFunctionType;
+ #endif
+ #ifdef __Pyx_FusedFunction_USED
+ PyTypeObject *__pyx_FusedFunctionType;
+ #endif
+ #ifdef __Pyx_Generator_USED
+ PyTypeObject *__pyx_GeneratorType;
+ #endif
+ #ifdef __Pyx_IterableCoroutine_USED
+ PyTypeObject *__pyx_IterableCoroutineType;
+ #endif
+ #ifdef __Pyx_Coroutine_USED
+ PyTypeObject *__pyx_CoroutineAwaitType;
+ #endif
+ #ifdef __Pyx_Coroutine_USED
+ PyTypeObject *__pyx_CoroutineType;
+ #endif
+ #if CYTHON_USE_MODULE_STATE
+ PyObject *__pyx_type_17IndicTransToolkit_9processor_IndicProcessor;
+ #endif
+ PyTypeObject *__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor;
+ PyObject *__pyx_n_u_;
+ PyObject *__pyx_kp_u_0;
+ PyObject *__pyx_kp_u_1;
+ PyObject *__pyx_kp_u_1_2;
+ PyObject *__pyx_kp_u_1_2_2;
+ PyObject *__pyx_kp_u_1_3;
+ PyObject *__pyx_kp_u_1_4;
+ PyObject *__pyx_kp_u_2;
+ PyObject *__pyx_kp_u_2_2;
+ PyObject *__pyx_kp_u_3;
+ PyObject *__pyx_kp_u_4;
+ PyObject *__pyx_kp_u_5;
+ PyObject *__pyx_kp_u_6;
+ PyObject *__pyx_kp_u_7;
+ PyObject *__pyx_kp_u_8;
+ PyObject *__pyx_kp_u_9;
+ PyObject *__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2;
+ PyObject *__pyx_kp_u_A_Za_z0_9_w;
+ PyObject *__pyx_n_u_Arab;
+ PyObject *__pyx_n_u_Aran;
+ PyObject *__pyx_kp_u_C;
+ PyObject *__pyx_kp_u_C_2;
+ PyObject *__pyx_n_s_Dict;
+ PyObject *__pyx_kp_u_ID;
+ PyObject *__pyx_kp_u_ID_2;
+ PyObject *__pyx_kp_u_ID_3;
+ PyObject *__pyx_kp_u_ID_4;
+ PyObject *__pyx_kp_u_ID_5;
+ PyObject *__pyx_n_s_IndicNormalizerFactory;
+ PyObject *__pyx_n_s_IndicProcessor;
+ PyObject *__pyx_n_s_IndicProcessor___reduce_cython;
+ PyObject *__pyx_n_s_IndicProcessor___setstate_cython;
+ PyObject *__pyx_n_s_IndicProcessor_postprocess_batch;
+ PyObject *__pyx_n_s_IndicProcessor_preprocess_batch;
+ PyObject *__pyx_n_s_IndicTransToolkit_processor;
+ PyObject *__pyx_kp_s_IndicTransToolkit_processor_pyx;
+ PyObject *__pyx_n_u_Latn;
+ PyObject *__pyx_n_s_List;
+ PyObject *__pyx_n_s_MosesDetokenizer;
+ PyObject *__pyx_n_s_MosesPunctNormalizer;
+ PyObject *__pyx_n_s_MosesTokenizer;
+ PyObject *__pyx_n_u_Mtei;
+ PyObject *__pyx_kp_u_None;
+ PyObject *__pyx_n_u_Olck;
+ PyObject *__pyx_kp_u_Post_processing;
+ PyObject *__pyx_kp_u_Pre_processing;
+ PyObject *__pyx_n_s_Queue;
+ PyObject *__pyx_n_s_TypeError;
+ PyObject *__pyx_n_s_UnicodeIndicTransliterator;
+ PyObject *__pyx_n_s_Union;
+ PyObject *__pyx_n_u__10;
+ PyObject *__pyx_n_u__100;
+ PyObject *__pyx_kp_u__101;
+ PyObject *__pyx_kp_u__102;
+ PyObject *__pyx_kp_u__103;
+ PyObject *__pyx_kp_u__104;
+ PyObject *__pyx_kp_u__105;
+ PyObject *__pyx_kp_u__106;
+ PyObject *__pyx_kp_u__107;
+ PyObject *__pyx_kp_u__108;
+ PyObject *__pyx_kp_u__109;
+ PyObject *__pyx_n_u__11;
+ PyObject *__pyx_kp_u__110;
+ PyObject *__pyx_kp_u__111;
+ PyObject *__pyx_kp_u__112;
+ PyObject *__pyx_kp_u__113;
+ PyObject *__pyx_kp_u__114;
+ PyObject *__pyx_kp_u__115;
+ PyObject *__pyx_kp_u__116;
+ PyObject *__pyx_kp_u__117;
+ PyObject *__pyx_kp_u__118;
+ PyObject *__pyx_kp_u__119;
+ PyObject *__pyx_n_u__12;
+ PyObject *__pyx_kp_u__120;
+ PyObject *__pyx_kp_u__121;
+ PyObject *__pyx_kp_u__122;
+ PyObject *__pyx_kp_u__123;
+ PyObject *__pyx_kp_u__124;
+ PyObject *__pyx_kp_u__125;
+ PyObject *__pyx_kp_u__126;
+ PyObject *__pyx_kp_u__127;
+ PyObject *__pyx_kp_u__128;
+ PyObject *__pyx_kp_u__129;
+ PyObject *__pyx_n_u__13;
+ PyObject *__pyx_kp_u__130;
+ PyObject *__pyx_kp_u__131;
+ PyObject *__pyx_kp_u__132;
+ PyObject *__pyx_kp_u__133;
+ PyObject *__pyx_kp_u__134;
+ PyObject *__pyx_kp_u__135;
+ PyObject *__pyx_kp_u__136;
+ PyObject *__pyx_kp_u__137;
+ PyObject *__pyx_n_u__138;
+ PyObject *__pyx_kp_u__139;
+ PyObject *__pyx_n_u__14;
+ PyObject *__pyx_kp_u__140;
+ PyObject *__pyx_kp_u__141;
+ PyObject *__pyx_kp_u__142;
+ PyObject *__pyx_kp_u__143;
+ PyObject *__pyx_kp_u__144;
+ PyObject *__pyx_kp_u__145;
+ PyObject *__pyx_kp_u__146;
+ PyObject *__pyx_kp_u__147;
+ PyObject *__pyx_kp_u__148;
+ PyObject *__pyx_kp_u__149;
+ PyObject *__pyx_n_u__15;
+ PyObject *__pyx_kp_u__151;
+ PyObject *__pyx_kp_u__153;
+ PyObject *__pyx_kp_u__154;
+ PyObject *__pyx_n_u__155;
+ PyObject *__pyx_kp_u__156;
+ PyObject *__pyx_kp_u__157;
+ PyObject *__pyx_kp_u__159;
+ PyObject *__pyx_n_u__16;
+ PyObject *__pyx_kp_u__160;
+ PyObject *__pyx_kp_u__162;
+ PyObject *__pyx_kp_u__163;
+ PyObject *__pyx_kp_u__165;
+ PyObject *__pyx_n_u__166;
+ PyObject *__pyx_kp_u__168;
+ PyObject *__pyx_n_u__169;
+ PyObject *__pyx_n_u__17;
+ PyObject *__pyx_n_s__171;
+ PyObject *__pyx_n_u__18;
+ PyObject *__pyx_n_s__182;
+ PyObject *__pyx_n_u__19;
+ PyObject *__pyx_n_u__2;
+ PyObject *__pyx_n_u__20;
+ PyObject *__pyx_n_u__21;
+ PyObject *__pyx_n_u__22;
+ PyObject *__pyx_n_u__23;
+ PyObject *__pyx_n_u__24;
+ PyObject *__pyx_n_u__25;
+ PyObject *__pyx_n_u__26;
+ PyObject *__pyx_n_u__27;
+ PyObject *__pyx_n_u__28;
+ PyObject *__pyx_n_u__29;
+ PyObject *__pyx_n_u__3;
+ PyObject *__pyx_n_u__30;
+ PyObject *__pyx_n_u__31;
+ PyObject *__pyx_n_u__32;
+ PyObject *__pyx_n_u__33;
+ PyObject *__pyx_n_u__34;
+ PyObject *__pyx_n_u__35;
+ PyObject *__pyx_n_u__36;
+ PyObject *__pyx_n_u__37;
+ PyObject *__pyx_n_u__38;
+ PyObject *__pyx_n_u__39;
+ PyObject *__pyx_n_u__4;
+ PyObject *__pyx_n_u__40;
+ PyObject *__pyx_n_u__41;
+ PyObject *__pyx_n_u__42;
+ PyObject *__pyx_n_u__43;
+ PyObject *__pyx_n_u__44;
+ PyObject *__pyx_n_u__45;
+ PyObject *__pyx_n_u__46;
+ PyObject *__pyx_n_u__47;
+ PyObject *__pyx_n_u__48;
+ PyObject *__pyx_n_u__49;
+ PyObject *__pyx_n_u__5;
+ PyObject *__pyx_n_u__50;
+ PyObject *__pyx_n_u__51;
+ PyObject *__pyx_n_u__52;
+ PyObject *__pyx_n_u__53;
+ PyObject *__pyx_n_u__54;
+ PyObject *__pyx_n_u__55;
+ PyObject *__pyx_n_u__56;
+ PyObject *__pyx_n_u__57;
+ PyObject *__pyx_n_u__58;
+ PyObject *__pyx_n_u__59;
+ PyObject *__pyx_n_u__6;
+ PyObject *__pyx_n_u__60;
+ PyObject *__pyx_n_u__61;
+ PyObject *__pyx_n_u__62;
+ PyObject *__pyx_n_u__63;
+ PyObject *__pyx_n_u__64;
+ PyObject *__pyx_n_u__65;
+ PyObject *__pyx_n_u__66;
+ PyObject *__pyx_n_u__67;
+ PyObject *__pyx_n_u__68;
+ PyObject *__pyx_n_u__69;
+ PyObject *__pyx_n_u__7;
+ PyObject *__pyx_n_u__70;
+ PyObject *__pyx_n_u__71;
+ PyObject *__pyx_n_u__72;
+ PyObject *__pyx_n_u__73;
+ PyObject *__pyx_n_u__74;
+ PyObject *__pyx_n_u__75;
+ PyObject *__pyx_n_u__76;
+ PyObject *__pyx_n_u__77;
+ PyObject *__pyx_n_u__78;
+ PyObject *__pyx_n_u__79;
+ PyObject *__pyx_n_u__8;
+ PyObject *__pyx_n_u__80;
+ PyObject *__pyx_n_u__81;
+ PyObject *__pyx_n_u__82;
+ PyObject *__pyx_n_u__83;
+ PyObject *__pyx_n_u__84;
+ PyObject *__pyx_n_u__85;
+ PyObject *__pyx_n_u__86;
+ PyObject *__pyx_n_u__87;
+ PyObject *__pyx_n_u__88;
+ PyObject *__pyx_n_u__89;
+ PyObject *__pyx_n_u__9;
+ PyObject *__pyx_n_u__90;
+ PyObject *__pyx_n_u__91;
+ PyObject *__pyx_n_u__92;
+ PyObject *__pyx_n_u__93;
+ PyObject *__pyx_n_u__94;
+ PyObject *__pyx_n_u__95;
+ PyObject *__pyx_n_u__96;
+ PyObject *__pyx_n_u__97;
+ PyObject *__pyx_n_u__98;
+ PyObject *__pyx_n_u__99;
+ PyObject *__pyx_n_u_as;
+ PyObject *__pyx_n_u_asm_Beng;
+ PyObject *__pyx_n_s_asyncio_coroutines;
+ PyObject *__pyx_n_u_awa_Deva;
+ PyObject *__pyx_kp_u_b_w_https_ftp_w_w_w_b;
+ PyObject *__pyx_n_s_batch;
+ PyObject *__pyx_n_u_ben_Beng;
+ PyObject *__pyx_n_u_bho_Deva;
+ PyObject *__pyx_n_u_bn;
+ PyObject *__pyx_n_u_brx_Deva;
+ PyObject *__pyx_n_s_chr;
+ PyObject *__pyx_n_s_cinit___locals_lambda;
+ PyObject *__pyx_n_s_clear;
+ PyObject *__pyx_n_s_cline_in_traceback;
+ PyObject *__pyx_n_s_compile;
+ PyObject *__pyx_kp_u_d;
+ PyObject *__pyx_kp_u_d_d;
+ PyObject *__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d;
+ PyObject *__pyx_n_s_desc;
+ PyObject *__pyx_n_s_detokenize;
+ PyObject *__pyx_kp_u_disable;
+ PyObject *__pyx_n_u_doi_Deva;
+ PyObject *__pyx_n_u_en;
+ PyObject *__pyx_kp_u_enable;
+ PyObject *__pyx_n_u_eng_Latn;
+ PyObject *__pyx_n_s_escape;
+ PyObject *__pyx_n_s_findall;
+ PyObject *__pyx_kp_u_gc;
+ PyObject *__pyx_n_s_get;
+ PyObject *__pyx_n_s_get_normalizer;
+ PyObject *__pyx_n_s_getstate;
+ PyObject *__pyx_n_u_gom_Deva;
+ PyObject *__pyx_n_u_gon_Deva;
+ PyObject *__pyx_n_s_group;
+ PyObject *__pyx_n_u_gu;
+ PyObject *__pyx_n_u_guj_Gujr;
+ PyObject *__pyx_n_u_hi;
+ PyObject *__pyx_n_u_hin_Deva;
+ PyObject *__pyx_n_u_hne_Deva;
+ PyObject *__pyx_n_s_import;
+ PyObject *__pyx_n_s_indic_detokenize;
+ PyObject *__pyx_n_s_indic_tokenize;
+ PyObject *__pyx_n_s_indicnlp_normalize_indic_normali;
+ PyObject *__pyx_n_s_indicnlp_tokenize;
+ PyObject *__pyx_n_s_indicnlp_transliterate_unicode_t;
+ PyObject *__pyx_n_s_inference;
+ PyObject *__pyx_n_s_initializing;
+ PyObject *__pyx_n_s_is_coroutine;
+ PyObject *__pyx_n_s_is_target;
+ PyObject *__pyx_kp_u_isenabled;
+ PyObject *__pyx_n_s_items;
+ PyObject *__pyx_n_u_kK;
+ PyObject *__pyx_n_u_kan_Knda;
+ PyObject *__pyx_n_u_kas_Arab;
+ PyObject *__pyx_n_u_kas_Deva;
+ PyObject *__pyx_n_u_kha_Latn;
+ PyObject *__pyx_n_u_kn;
+ PyObject *__pyx_n_s_lang;
+ PyObject *__pyx_n_u_line;
+ PyObject *__pyx_n_u_lus_Latn;
+ PyObject *__pyx_n_s_m;
+ PyObject *__pyx_n_u_mag_Deva;
+ PyObject *__pyx_n_u_mai_Deva;
+ PyObject *__pyx_n_s_main;
+ PyObject *__pyx_n_u_mal_Mlym;
+ PyObject *__pyx_n_u_mar_Deva;
+ PyObject *__pyx_n_u_ml;
+ PyObject *__pyx_n_u_mni_Beng;
+ PyObject *__pyx_n_u_mni_Mtei;
+ PyObject *__pyx_n_u_mr;
+ PyObject *__pyx_kp_u_n;
+ PyObject *__pyx_kp_u_n_2;
+ PyObject *__pyx_n_s_name;
+ PyObject *__pyx_n_u_ne;
+ PyObject *__pyx_kp_s_no_default___reduce___due_to_non;
+ PyObject *__pyx_n_s_normalize;
+ PyObject *__pyx_n_u_npi_Deva;
+ PyObject *__pyx_n_u_or;
+ PyObject *__pyx_n_u_ory;
+ PyObject *__pyx_n_u_ory_Orya;
+ PyObject *__pyx_n_u_pa;
+ PyObject *__pyx_n_u_pan_Guru;
+ PyObject *__pyx_n_s_postprocess_batch;
+ PyObject *__pyx_n_s_preprocess_batch;
+ PyObject *__pyx_n_s_put;
+ PyObject *__pyx_n_s_pyx_state;
+ PyObject *__pyx_n_s_pyx_vtable;
+ PyObject *__pyx_n_s_queue;
+ PyObject *__pyx_kp_u_r;
+ PyObject *__pyx_n_s_range;
+ PyObject *__pyx_n_s_re;
+ PyObject *__pyx_n_s_reduce;
+ PyObject *__pyx_n_s_reduce_cython;
+ PyObject *__pyx_n_s_reduce_ex;
+ PyObject *__pyx_n_s_regex;
+ PyObject *__pyx_n_s_replace;
+ PyObject *__pyx_kp_u_s;
+ PyObject *__pyx_kp_u_s_2;
+ PyObject *__pyx_kp_u_s_3;
+ PyObject *__pyx_kp_u_s_s;
+ PyObject *__pyx_kp_u_s_s_2;
+ PyObject *__pyx_n_s_sacremoses;
+ PyObject *__pyx_n_u_san_Deva;
+ PyObject *__pyx_n_u_sat_Olck;
+ PyObject *__pyx_n_s_self;
+ PyObject *__pyx_n_s_sents;
+ PyObject *__pyx_n_s_setstate;
+ PyObject *__pyx_n_s_setstate_cython;
+ PyObject *__pyx_n_u_snd_Arab;
+ PyObject *__pyx_n_u_snd_Deva;
+ PyObject *__pyx_n_s_spec;
+ PyObject *__pyx_n_s_split;
+ PyObject *__pyx_n_s_src_lang;
+ PyObject *__pyx_kp_s_stringsource;
+ PyObject *__pyx_n_s_strip;
+ PyObject *__pyx_n_s_sub;
+ PyObject *__pyx_n_u_ta;
+ PyObject *__pyx_n_u_tam_Taml;
+ PyObject *__pyx_n_u_te;
+ PyObject *__pyx_n_u_tel_Telu;
+ PyObject *__pyx_n_s_test;
+ PyObject *__pyx_n_s_tgt_lang;
+ PyObject *__pyx_n_s_tokenize;
+ PyObject *__pyx_n_s_total;
+ PyObject *__pyx_n_s_tqdm;
+ PyObject *__pyx_n_s_translate;
+ PyObject *__pyx_n_s_transliterate;
+ PyObject *__pyx_n_s_trivial_detokenize;
+ PyObject *__pyx_n_s_trivial_tokenize;
+ PyObject *__pyx_n_s_typing;
+ PyObject *__pyx_n_s_unit;
+ PyObject *__pyx_n_u_unr_Deva;
+ PyObject *__pyx_n_u_ur;
+ PyObject *__pyx_n_u_urd_Arab;
+ PyObject *__pyx_n_s_visualize;
+ PyObject *__pyx_int_0;
+ PyObject *__pyx_tuple__150;
+ PyObject *__pyx_tuple__152;
+ PyObject *__pyx_tuple__158;
+ PyObject *__pyx_tuple__161;
+ PyObject *__pyx_tuple__164;
+ PyObject *__pyx_tuple__167;
+ PyObject *__pyx_tuple__170;
+ PyObject *__pyx_tuple__172;
+ PyObject *__pyx_tuple__174;
+ PyObject *__pyx_tuple__175;
+ PyObject *__pyx_tuple__177;
+ PyObject *__pyx_tuple__178;
+ PyObject *__pyx_tuple__180;
+ PyObject *__pyx_codeobj__173;
+ PyObject *__pyx_codeobj__176;
+ PyObject *__pyx_codeobj__179;
+ PyObject *__pyx_codeobj__181;
+} __pyx_mstate;
+
+#if CYTHON_USE_MODULE_STATE
+#ifdef __cplusplus
+namespace {
+ extern struct PyModuleDef __pyx_moduledef;
+} /* anonymous namespace */
+#else
+static struct PyModuleDef __pyx_moduledef;
+#endif
+
+#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o))
+
+#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef)))
+
+#define __pyx_m (PyState_FindModule(&__pyx_moduledef))
+#else
+static __pyx_mstate __pyx_mstate_global_static =
+#ifdef __cplusplus
+ {};
+#else
+ {0};
+#endif
+static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static;
+#endif
+/* #### Code section: module_state_clear ### */
+#if CYTHON_USE_MODULE_STATE
+static int __pyx_m_clear(PyObject *m) {
+ __pyx_mstate *clear_module_state = __pyx_mstate(m);
+ if (!clear_module_state) return 0;
+ Py_CLEAR(clear_module_state->__pyx_d);
+ Py_CLEAR(clear_module_state->__pyx_b);
+ Py_CLEAR(clear_module_state->__pyx_cython_runtime);
+ Py_CLEAR(clear_module_state->__pyx_empty_tuple);
+ Py_CLEAR(clear_module_state->__pyx_empty_bytes);
+ Py_CLEAR(clear_module_state->__pyx_empty_unicode);
+ #ifdef __Pyx_CyFunction_USED
+ Py_CLEAR(clear_module_state->__pyx_CyFunctionType);
+ #endif
+ #ifdef __Pyx_FusedFunction_USED
+ Py_CLEAR(clear_module_state->__pyx_FusedFunctionType);
+ #endif
+ Py_CLEAR(clear_module_state->__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor);
+ Py_CLEAR(clear_module_state->__pyx_type_17IndicTransToolkit_9processor_IndicProcessor);
+ Py_CLEAR(clear_module_state->__pyx_n_u_);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_0);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_1);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_1_2);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_1_2_2);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_1_3);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_1_4);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_2);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_2_2);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_3);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_4);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_5);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_6);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_7);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_8);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_9);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_A_Za_z0_9_w);
+ Py_CLEAR(clear_module_state->__pyx_n_u_Arab);
+ Py_CLEAR(clear_module_state->__pyx_n_u_Aran);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_C);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_C_2);
+ Py_CLEAR(clear_module_state->__pyx_n_s_Dict);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_ID);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_ID_2);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_ID_3);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_ID_4);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_ID_5);
+ Py_CLEAR(clear_module_state->__pyx_n_s_IndicNormalizerFactory);
+ Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor);
+ Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor___reduce_cython);
+ Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor___setstate_cython);
+ Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor_postprocess_batch);
+ Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor_preprocess_batch);
+ Py_CLEAR(clear_module_state->__pyx_n_s_IndicTransToolkit_processor);
+ Py_CLEAR(clear_module_state->__pyx_kp_s_IndicTransToolkit_processor_pyx);
+ Py_CLEAR(clear_module_state->__pyx_n_u_Latn);
+ Py_CLEAR(clear_module_state->__pyx_n_s_List);
+ Py_CLEAR(clear_module_state->__pyx_n_s_MosesDetokenizer);
+ Py_CLEAR(clear_module_state->__pyx_n_s_MosesPunctNormalizer);
+ Py_CLEAR(clear_module_state->__pyx_n_s_MosesTokenizer);
+ Py_CLEAR(clear_module_state->__pyx_n_u_Mtei);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_None);
+ Py_CLEAR(clear_module_state->__pyx_n_u_Olck);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_Post_processing);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_Pre_processing);
+ Py_CLEAR(clear_module_state->__pyx_n_s_Queue);
+ Py_CLEAR(clear_module_state->__pyx_n_s_TypeError);
+ Py_CLEAR(clear_module_state->__pyx_n_s_UnicodeIndicTransliterator);
+ Py_CLEAR(clear_module_state->__pyx_n_s_Union);
+ Py_CLEAR(clear_module_state->__pyx_n_u__10);
+ Py_CLEAR(clear_module_state->__pyx_n_u__100);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__101);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__102);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__103);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__104);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__105);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__106);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__107);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__108);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__109);
+ Py_CLEAR(clear_module_state->__pyx_n_u__11);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__110);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__111);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__112);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__113);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__114);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__115);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__116);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__117);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__118);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__119);
+ Py_CLEAR(clear_module_state->__pyx_n_u__12);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__120);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__121);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__122);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__123);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__124);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__125);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__126);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__127);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__128);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__129);
+ Py_CLEAR(clear_module_state->__pyx_n_u__13);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__130);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__131);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__132);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__133);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__134);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__135);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__136);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__137);
+ Py_CLEAR(clear_module_state->__pyx_n_u__138);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__139);
+ Py_CLEAR(clear_module_state->__pyx_n_u__14);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__140);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__141);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__142);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__143);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__144);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__145);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__146);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__147);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__148);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__149);
+ Py_CLEAR(clear_module_state->__pyx_n_u__15);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__151);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__153);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__154);
+ Py_CLEAR(clear_module_state->__pyx_n_u__155);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__156);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__157);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__159);
+ Py_CLEAR(clear_module_state->__pyx_n_u__16);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__160);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__162);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__163);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__165);
+ Py_CLEAR(clear_module_state->__pyx_n_u__166);
+ Py_CLEAR(clear_module_state->__pyx_kp_u__168);
+ Py_CLEAR(clear_module_state->__pyx_n_u__169);
+ Py_CLEAR(clear_module_state->__pyx_n_u__17);
+ Py_CLEAR(clear_module_state->__pyx_n_s__171);
+ Py_CLEAR(clear_module_state->__pyx_n_u__18);
+ Py_CLEAR(clear_module_state->__pyx_n_s__182);
+ Py_CLEAR(clear_module_state->__pyx_n_u__19);
+ Py_CLEAR(clear_module_state->__pyx_n_u__2);
+ Py_CLEAR(clear_module_state->__pyx_n_u__20);
+ Py_CLEAR(clear_module_state->__pyx_n_u__21);
+ Py_CLEAR(clear_module_state->__pyx_n_u__22);
+ Py_CLEAR(clear_module_state->__pyx_n_u__23);
+ Py_CLEAR(clear_module_state->__pyx_n_u__24);
+ Py_CLEAR(clear_module_state->__pyx_n_u__25);
+ Py_CLEAR(clear_module_state->__pyx_n_u__26);
+ Py_CLEAR(clear_module_state->__pyx_n_u__27);
+ Py_CLEAR(clear_module_state->__pyx_n_u__28);
+ Py_CLEAR(clear_module_state->__pyx_n_u__29);
+ Py_CLEAR(clear_module_state->__pyx_n_u__3);
+ Py_CLEAR(clear_module_state->__pyx_n_u__30);
+ Py_CLEAR(clear_module_state->__pyx_n_u__31);
+ Py_CLEAR(clear_module_state->__pyx_n_u__32);
+ Py_CLEAR(clear_module_state->__pyx_n_u__33);
+ Py_CLEAR(clear_module_state->__pyx_n_u__34);
+ Py_CLEAR(clear_module_state->__pyx_n_u__35);
+ Py_CLEAR(clear_module_state->__pyx_n_u__36);
+ Py_CLEAR(clear_module_state->__pyx_n_u__37);
+ Py_CLEAR(clear_module_state->__pyx_n_u__38);
+ Py_CLEAR(clear_module_state->__pyx_n_u__39);
+ Py_CLEAR(clear_module_state->__pyx_n_u__4);
+ Py_CLEAR(clear_module_state->__pyx_n_u__40);
+ Py_CLEAR(clear_module_state->__pyx_n_u__41);
+ Py_CLEAR(clear_module_state->__pyx_n_u__42);
+ Py_CLEAR(clear_module_state->__pyx_n_u__43);
+ Py_CLEAR(clear_module_state->__pyx_n_u__44);
+ Py_CLEAR(clear_module_state->__pyx_n_u__45);
+ Py_CLEAR(clear_module_state->__pyx_n_u__46);
+ Py_CLEAR(clear_module_state->__pyx_n_u__47);
+ Py_CLEAR(clear_module_state->__pyx_n_u__48);
+ Py_CLEAR(clear_module_state->__pyx_n_u__49);
+ Py_CLEAR(clear_module_state->__pyx_n_u__5);
+ Py_CLEAR(clear_module_state->__pyx_n_u__50);
+ Py_CLEAR(clear_module_state->__pyx_n_u__51);
+ Py_CLEAR(clear_module_state->__pyx_n_u__52);
+ Py_CLEAR(clear_module_state->__pyx_n_u__53);
+ Py_CLEAR(clear_module_state->__pyx_n_u__54);
+ Py_CLEAR(clear_module_state->__pyx_n_u__55);
+ Py_CLEAR(clear_module_state->__pyx_n_u__56);
+ Py_CLEAR(clear_module_state->__pyx_n_u__57);
+ Py_CLEAR(clear_module_state->__pyx_n_u__58);
+ Py_CLEAR(clear_module_state->__pyx_n_u__59);
+ Py_CLEAR(clear_module_state->__pyx_n_u__6);
+ Py_CLEAR(clear_module_state->__pyx_n_u__60);
+ Py_CLEAR(clear_module_state->__pyx_n_u__61);
+ Py_CLEAR(clear_module_state->__pyx_n_u__62);
+ Py_CLEAR(clear_module_state->__pyx_n_u__63);
+ Py_CLEAR(clear_module_state->__pyx_n_u__64);
+ Py_CLEAR(clear_module_state->__pyx_n_u__65);
+ Py_CLEAR(clear_module_state->__pyx_n_u__66);
+ Py_CLEAR(clear_module_state->__pyx_n_u__67);
+ Py_CLEAR(clear_module_state->__pyx_n_u__68);
+ Py_CLEAR(clear_module_state->__pyx_n_u__69);
+ Py_CLEAR(clear_module_state->__pyx_n_u__7);
+ Py_CLEAR(clear_module_state->__pyx_n_u__70);
+ Py_CLEAR(clear_module_state->__pyx_n_u__71);
+ Py_CLEAR(clear_module_state->__pyx_n_u__72);
+ Py_CLEAR(clear_module_state->__pyx_n_u__73);
+ Py_CLEAR(clear_module_state->__pyx_n_u__74);
+ Py_CLEAR(clear_module_state->__pyx_n_u__75);
+ Py_CLEAR(clear_module_state->__pyx_n_u__76);
+ Py_CLEAR(clear_module_state->__pyx_n_u__77);
+ Py_CLEAR(clear_module_state->__pyx_n_u__78);
+ Py_CLEAR(clear_module_state->__pyx_n_u__79);
+ Py_CLEAR(clear_module_state->__pyx_n_u__8);
+ Py_CLEAR(clear_module_state->__pyx_n_u__80);
+ Py_CLEAR(clear_module_state->__pyx_n_u__81);
+ Py_CLEAR(clear_module_state->__pyx_n_u__82);
+ Py_CLEAR(clear_module_state->__pyx_n_u__83);
+ Py_CLEAR(clear_module_state->__pyx_n_u__84);
+ Py_CLEAR(clear_module_state->__pyx_n_u__85);
+ Py_CLEAR(clear_module_state->__pyx_n_u__86);
+ Py_CLEAR(clear_module_state->__pyx_n_u__87);
+ Py_CLEAR(clear_module_state->__pyx_n_u__88);
+ Py_CLEAR(clear_module_state->__pyx_n_u__89);
+ Py_CLEAR(clear_module_state->__pyx_n_u__9);
+ Py_CLEAR(clear_module_state->__pyx_n_u__90);
+ Py_CLEAR(clear_module_state->__pyx_n_u__91);
+ Py_CLEAR(clear_module_state->__pyx_n_u__92);
+ Py_CLEAR(clear_module_state->__pyx_n_u__93);
+ Py_CLEAR(clear_module_state->__pyx_n_u__94);
+ Py_CLEAR(clear_module_state->__pyx_n_u__95);
+ Py_CLEAR(clear_module_state->__pyx_n_u__96);
+ Py_CLEAR(clear_module_state->__pyx_n_u__97);
+ Py_CLEAR(clear_module_state->__pyx_n_u__98);
+ Py_CLEAR(clear_module_state->__pyx_n_u__99);
+ Py_CLEAR(clear_module_state->__pyx_n_u_as);
+ Py_CLEAR(clear_module_state->__pyx_n_u_asm_Beng);
+ Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines);
+ Py_CLEAR(clear_module_state->__pyx_n_u_awa_Deva);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_b_w_https_ftp_w_w_w_b);
+ Py_CLEAR(clear_module_state->__pyx_n_s_batch);
+ Py_CLEAR(clear_module_state->__pyx_n_u_ben_Beng);
+ Py_CLEAR(clear_module_state->__pyx_n_u_bho_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_bn);
+ Py_CLEAR(clear_module_state->__pyx_n_u_brx_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_s_chr);
+ Py_CLEAR(clear_module_state->__pyx_n_s_cinit___locals_lambda);
+ Py_CLEAR(clear_module_state->__pyx_n_s_clear);
+ Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback);
+ Py_CLEAR(clear_module_state->__pyx_n_s_compile);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_d);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_d_d);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d);
+ Py_CLEAR(clear_module_state->__pyx_n_s_desc);
+ Py_CLEAR(clear_module_state->__pyx_n_s_detokenize);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_disable);
+ Py_CLEAR(clear_module_state->__pyx_n_u_doi_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_en);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_enable);
+ Py_CLEAR(clear_module_state->__pyx_n_u_eng_Latn);
+ Py_CLEAR(clear_module_state->__pyx_n_s_escape);
+ Py_CLEAR(clear_module_state->__pyx_n_s_findall);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_gc);
+ Py_CLEAR(clear_module_state->__pyx_n_s_get);
+ Py_CLEAR(clear_module_state->__pyx_n_s_get_normalizer);
+ Py_CLEAR(clear_module_state->__pyx_n_s_getstate);
+ Py_CLEAR(clear_module_state->__pyx_n_u_gom_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_gon_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_s_group);
+ Py_CLEAR(clear_module_state->__pyx_n_u_gu);
+ Py_CLEAR(clear_module_state->__pyx_n_u_guj_Gujr);
+ Py_CLEAR(clear_module_state->__pyx_n_u_hi);
+ Py_CLEAR(clear_module_state->__pyx_n_u_hin_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_hne_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_s_import);
+ Py_CLEAR(clear_module_state->__pyx_n_s_indic_detokenize);
+ Py_CLEAR(clear_module_state->__pyx_n_s_indic_tokenize);
+ Py_CLEAR(clear_module_state->__pyx_n_s_indicnlp_normalize_indic_normali);
+ Py_CLEAR(clear_module_state->__pyx_n_s_indicnlp_tokenize);
+ Py_CLEAR(clear_module_state->__pyx_n_s_indicnlp_transliterate_unicode_t);
+ Py_CLEAR(clear_module_state->__pyx_n_s_inference);
+ Py_CLEAR(clear_module_state->__pyx_n_s_initializing);
+ Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine);
+ Py_CLEAR(clear_module_state->__pyx_n_s_is_target);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_isenabled);
+ Py_CLEAR(clear_module_state->__pyx_n_s_items);
+ Py_CLEAR(clear_module_state->__pyx_n_u_kK);
+ Py_CLEAR(clear_module_state->__pyx_n_u_kan_Knda);
+ Py_CLEAR(clear_module_state->__pyx_n_u_kas_Arab);
+ Py_CLEAR(clear_module_state->__pyx_n_u_kas_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_kha_Latn);
+ Py_CLEAR(clear_module_state->__pyx_n_u_kn);
+ Py_CLEAR(clear_module_state->__pyx_n_s_lang);
+ Py_CLEAR(clear_module_state->__pyx_n_u_line);
+ Py_CLEAR(clear_module_state->__pyx_n_u_lus_Latn);
+ Py_CLEAR(clear_module_state->__pyx_n_s_m);
+ Py_CLEAR(clear_module_state->__pyx_n_u_mag_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_mai_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_s_main);
+ Py_CLEAR(clear_module_state->__pyx_n_u_mal_Mlym);
+ Py_CLEAR(clear_module_state->__pyx_n_u_mar_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_ml);
+ Py_CLEAR(clear_module_state->__pyx_n_u_mni_Beng);
+ Py_CLEAR(clear_module_state->__pyx_n_u_mni_Mtei);
+ Py_CLEAR(clear_module_state->__pyx_n_u_mr);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_n);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_n_2);
+ Py_CLEAR(clear_module_state->__pyx_n_s_name);
+ Py_CLEAR(clear_module_state->__pyx_n_u_ne);
+ Py_CLEAR(clear_module_state->__pyx_kp_s_no_default___reduce___due_to_non);
+ Py_CLEAR(clear_module_state->__pyx_n_s_normalize);
+ Py_CLEAR(clear_module_state->__pyx_n_u_npi_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_or);
+ Py_CLEAR(clear_module_state->__pyx_n_u_ory);
+ Py_CLEAR(clear_module_state->__pyx_n_u_ory_Orya);
+ Py_CLEAR(clear_module_state->__pyx_n_u_pa);
+ Py_CLEAR(clear_module_state->__pyx_n_u_pan_Guru);
+ Py_CLEAR(clear_module_state->__pyx_n_s_postprocess_batch);
+ Py_CLEAR(clear_module_state->__pyx_n_s_preprocess_batch);
+ Py_CLEAR(clear_module_state->__pyx_n_s_put);
+ Py_CLEAR(clear_module_state->__pyx_n_s_pyx_state);
+ Py_CLEAR(clear_module_state->__pyx_n_s_pyx_vtable);
+ Py_CLEAR(clear_module_state->__pyx_n_s_queue);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_r);
+ Py_CLEAR(clear_module_state->__pyx_n_s_range);
+ Py_CLEAR(clear_module_state->__pyx_n_s_re);
+ Py_CLEAR(clear_module_state->__pyx_n_s_reduce);
+ Py_CLEAR(clear_module_state->__pyx_n_s_reduce_cython);
+ Py_CLEAR(clear_module_state->__pyx_n_s_reduce_ex);
+ Py_CLEAR(clear_module_state->__pyx_n_s_regex);
+ Py_CLEAR(clear_module_state->__pyx_n_s_replace);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_s);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_s_2);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_s_3);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_s_s);
+ Py_CLEAR(clear_module_state->__pyx_kp_u_s_s_2);
+ Py_CLEAR(clear_module_state->__pyx_n_s_sacremoses);
+ Py_CLEAR(clear_module_state->__pyx_n_u_san_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_sat_Olck);
+ Py_CLEAR(clear_module_state->__pyx_n_s_self);
+ Py_CLEAR(clear_module_state->__pyx_n_s_sents);
+ Py_CLEAR(clear_module_state->__pyx_n_s_setstate);
+ Py_CLEAR(clear_module_state->__pyx_n_s_setstate_cython);
+ Py_CLEAR(clear_module_state->__pyx_n_u_snd_Arab);
+ Py_CLEAR(clear_module_state->__pyx_n_u_snd_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_s_spec);
+ Py_CLEAR(clear_module_state->__pyx_n_s_split);
+ Py_CLEAR(clear_module_state->__pyx_n_s_src_lang);
+ Py_CLEAR(clear_module_state->__pyx_kp_s_stringsource);
+ Py_CLEAR(clear_module_state->__pyx_n_s_strip);
+ Py_CLEAR(clear_module_state->__pyx_n_s_sub);
+ Py_CLEAR(clear_module_state->__pyx_n_u_ta);
+ Py_CLEAR(clear_module_state->__pyx_n_u_tam_Taml);
+ Py_CLEAR(clear_module_state->__pyx_n_u_te);
+ Py_CLEAR(clear_module_state->__pyx_n_u_tel_Telu);
+ Py_CLEAR(clear_module_state->__pyx_n_s_test);
+ Py_CLEAR(clear_module_state->__pyx_n_s_tgt_lang);
+ Py_CLEAR(clear_module_state->__pyx_n_s_tokenize);
+ Py_CLEAR(clear_module_state->__pyx_n_s_total);
+ Py_CLEAR(clear_module_state->__pyx_n_s_tqdm);
+ Py_CLEAR(clear_module_state->__pyx_n_s_translate);
+ Py_CLEAR(clear_module_state->__pyx_n_s_transliterate);
+ Py_CLEAR(clear_module_state->__pyx_n_s_trivial_detokenize);
+ Py_CLEAR(clear_module_state->__pyx_n_s_trivial_tokenize);
+ Py_CLEAR(clear_module_state->__pyx_n_s_typing);
+ Py_CLEAR(clear_module_state->__pyx_n_s_unit);
+ Py_CLEAR(clear_module_state->__pyx_n_u_unr_Deva);
+ Py_CLEAR(clear_module_state->__pyx_n_u_ur);
+ Py_CLEAR(clear_module_state->__pyx_n_u_urd_Arab);
+ Py_CLEAR(clear_module_state->__pyx_n_s_visualize);
+ Py_CLEAR(clear_module_state->__pyx_int_0);
+ Py_CLEAR(clear_module_state->__pyx_tuple__150);
+ Py_CLEAR(clear_module_state->__pyx_tuple__152);
+ Py_CLEAR(clear_module_state->__pyx_tuple__158);
+ Py_CLEAR(clear_module_state->__pyx_tuple__161);
+ Py_CLEAR(clear_module_state->__pyx_tuple__164);
+ Py_CLEAR(clear_module_state->__pyx_tuple__167);
+ Py_CLEAR(clear_module_state->__pyx_tuple__170);
+ Py_CLEAR(clear_module_state->__pyx_tuple__172);
+ Py_CLEAR(clear_module_state->__pyx_tuple__174);
+ Py_CLEAR(clear_module_state->__pyx_tuple__175);
+ Py_CLEAR(clear_module_state->__pyx_tuple__177);
+ Py_CLEAR(clear_module_state->__pyx_tuple__178);
+ Py_CLEAR(clear_module_state->__pyx_tuple__180);
+ Py_CLEAR(clear_module_state->__pyx_codeobj__173);
+ Py_CLEAR(clear_module_state->__pyx_codeobj__176);
+ Py_CLEAR(clear_module_state->__pyx_codeobj__179);
+ Py_CLEAR(clear_module_state->__pyx_codeobj__181);
+ return 0;
+}
+#endif
+/* #### Code section: module_state_traverse ### */
+#if CYTHON_USE_MODULE_STATE
+static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
+ __pyx_mstate *traverse_module_state = __pyx_mstate(m);
+ if (!traverse_module_state) return 0;
+ Py_VISIT(traverse_module_state->__pyx_d);
+ Py_VISIT(traverse_module_state->__pyx_b);
+ Py_VISIT(traverse_module_state->__pyx_cython_runtime);
+ Py_VISIT(traverse_module_state->__pyx_empty_tuple);
+ Py_VISIT(traverse_module_state->__pyx_empty_bytes);
+ Py_VISIT(traverse_module_state->__pyx_empty_unicode);
+ #ifdef __Pyx_CyFunction_USED
+ Py_VISIT(traverse_module_state->__pyx_CyFunctionType);
+ #endif
+ #ifdef __Pyx_FusedFunction_USED
+ Py_VISIT(traverse_module_state->__pyx_FusedFunctionType);
+ #endif
+ Py_VISIT(traverse_module_state->__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor);
+ Py_VISIT(traverse_module_state->__pyx_type_17IndicTransToolkit_9processor_IndicProcessor);
+ Py_VISIT(traverse_module_state->__pyx_n_u_);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_0);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_1);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_1_2);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_1_2_2);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_1_3);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_1_4);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_2);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_2_2);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_3);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_4);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_5);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_6);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_7);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_8);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_9);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_A_Za_z0_9_w);
+ Py_VISIT(traverse_module_state->__pyx_n_u_Arab);
+ Py_VISIT(traverse_module_state->__pyx_n_u_Aran);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_C);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_C_2);
+ Py_VISIT(traverse_module_state->__pyx_n_s_Dict);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_ID);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_ID_2);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_ID_3);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_ID_4);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_ID_5);
+ Py_VISIT(traverse_module_state->__pyx_n_s_IndicNormalizerFactory);
+ Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor);
+ Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor___reduce_cython);
+ Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor___setstate_cython);
+ Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor_postprocess_batch);
+ Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor_preprocess_batch);
+ Py_VISIT(traverse_module_state->__pyx_n_s_IndicTransToolkit_processor);
+ Py_VISIT(traverse_module_state->__pyx_kp_s_IndicTransToolkit_processor_pyx);
+ Py_VISIT(traverse_module_state->__pyx_n_u_Latn);
+ Py_VISIT(traverse_module_state->__pyx_n_s_List);
+ Py_VISIT(traverse_module_state->__pyx_n_s_MosesDetokenizer);
+ Py_VISIT(traverse_module_state->__pyx_n_s_MosesPunctNormalizer);
+ Py_VISIT(traverse_module_state->__pyx_n_s_MosesTokenizer);
+ Py_VISIT(traverse_module_state->__pyx_n_u_Mtei);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_None);
+ Py_VISIT(traverse_module_state->__pyx_n_u_Olck);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_Post_processing);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_Pre_processing);
+ Py_VISIT(traverse_module_state->__pyx_n_s_Queue);
+ Py_VISIT(traverse_module_state->__pyx_n_s_TypeError);
+ Py_VISIT(traverse_module_state->__pyx_n_s_UnicodeIndicTransliterator);
+ Py_VISIT(traverse_module_state->__pyx_n_s_Union);
+ Py_VISIT(traverse_module_state->__pyx_n_u__10);
+ Py_VISIT(traverse_module_state->__pyx_n_u__100);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__101);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__102);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__103);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__104);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__105);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__106);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__107);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__108);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__109);
+ Py_VISIT(traverse_module_state->__pyx_n_u__11);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__110);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__111);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__112);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__113);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__114);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__115);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__116);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__117);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__118);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__119);
+ Py_VISIT(traverse_module_state->__pyx_n_u__12);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__120);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__121);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__122);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__123);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__124);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__125);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__126);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__127);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__128);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__129);
+ Py_VISIT(traverse_module_state->__pyx_n_u__13);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__130);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__131);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__132);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__133);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__134);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__135);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__136);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__137);
+ Py_VISIT(traverse_module_state->__pyx_n_u__138);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__139);
+ Py_VISIT(traverse_module_state->__pyx_n_u__14);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__140);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__141);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__142);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__143);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__144);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__145);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__146);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__147);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__148);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__149);
+ Py_VISIT(traverse_module_state->__pyx_n_u__15);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__151);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__153);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__154);
+ Py_VISIT(traverse_module_state->__pyx_n_u__155);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__156);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__157);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__159);
+ Py_VISIT(traverse_module_state->__pyx_n_u__16);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__160);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__162);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__163);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__165);
+ Py_VISIT(traverse_module_state->__pyx_n_u__166);
+ Py_VISIT(traverse_module_state->__pyx_kp_u__168);
+ Py_VISIT(traverse_module_state->__pyx_n_u__169);
+ Py_VISIT(traverse_module_state->__pyx_n_u__17);
+ Py_VISIT(traverse_module_state->__pyx_n_s__171);
+ Py_VISIT(traverse_module_state->__pyx_n_u__18);
+ Py_VISIT(traverse_module_state->__pyx_n_s__182);
+ Py_VISIT(traverse_module_state->__pyx_n_u__19);
+ Py_VISIT(traverse_module_state->__pyx_n_u__2);
+ Py_VISIT(traverse_module_state->__pyx_n_u__20);
+ Py_VISIT(traverse_module_state->__pyx_n_u__21);
+ Py_VISIT(traverse_module_state->__pyx_n_u__22);
+ Py_VISIT(traverse_module_state->__pyx_n_u__23);
+ Py_VISIT(traverse_module_state->__pyx_n_u__24);
+ Py_VISIT(traverse_module_state->__pyx_n_u__25);
+ Py_VISIT(traverse_module_state->__pyx_n_u__26);
+ Py_VISIT(traverse_module_state->__pyx_n_u__27);
+ Py_VISIT(traverse_module_state->__pyx_n_u__28);
+ Py_VISIT(traverse_module_state->__pyx_n_u__29);
+ Py_VISIT(traverse_module_state->__pyx_n_u__3);
+ Py_VISIT(traverse_module_state->__pyx_n_u__30);
+ Py_VISIT(traverse_module_state->__pyx_n_u__31);
+ Py_VISIT(traverse_module_state->__pyx_n_u__32);
+ Py_VISIT(traverse_module_state->__pyx_n_u__33);
+ Py_VISIT(traverse_module_state->__pyx_n_u__34);
+ Py_VISIT(traverse_module_state->__pyx_n_u__35);
+ Py_VISIT(traverse_module_state->__pyx_n_u__36);
+ Py_VISIT(traverse_module_state->__pyx_n_u__37);
+ Py_VISIT(traverse_module_state->__pyx_n_u__38);
+ Py_VISIT(traverse_module_state->__pyx_n_u__39);
+ Py_VISIT(traverse_module_state->__pyx_n_u__4);
+ Py_VISIT(traverse_module_state->__pyx_n_u__40);
+ Py_VISIT(traverse_module_state->__pyx_n_u__41);
+ Py_VISIT(traverse_module_state->__pyx_n_u__42);
+ Py_VISIT(traverse_module_state->__pyx_n_u__43);
+ Py_VISIT(traverse_module_state->__pyx_n_u__44);
+ Py_VISIT(traverse_module_state->__pyx_n_u__45);
+ Py_VISIT(traverse_module_state->__pyx_n_u__46);
+ Py_VISIT(traverse_module_state->__pyx_n_u__47);
+ Py_VISIT(traverse_module_state->__pyx_n_u__48);
+ Py_VISIT(traverse_module_state->__pyx_n_u__49);
+ Py_VISIT(traverse_module_state->__pyx_n_u__5);
+ Py_VISIT(traverse_module_state->__pyx_n_u__50);
+ Py_VISIT(traverse_module_state->__pyx_n_u__51);
+ Py_VISIT(traverse_module_state->__pyx_n_u__52);
+ Py_VISIT(traverse_module_state->__pyx_n_u__53);
+ Py_VISIT(traverse_module_state->__pyx_n_u__54);
+ Py_VISIT(traverse_module_state->__pyx_n_u__55);
+ Py_VISIT(traverse_module_state->__pyx_n_u__56);
+ Py_VISIT(traverse_module_state->__pyx_n_u__57);
+ Py_VISIT(traverse_module_state->__pyx_n_u__58);
+ Py_VISIT(traverse_module_state->__pyx_n_u__59);
+ Py_VISIT(traverse_module_state->__pyx_n_u__6);
+ Py_VISIT(traverse_module_state->__pyx_n_u__60);
+ Py_VISIT(traverse_module_state->__pyx_n_u__61);
+ Py_VISIT(traverse_module_state->__pyx_n_u__62);
+ Py_VISIT(traverse_module_state->__pyx_n_u__63);
+ Py_VISIT(traverse_module_state->__pyx_n_u__64);
+ Py_VISIT(traverse_module_state->__pyx_n_u__65);
+ Py_VISIT(traverse_module_state->__pyx_n_u__66);
+ Py_VISIT(traverse_module_state->__pyx_n_u__67);
+ Py_VISIT(traverse_module_state->__pyx_n_u__68);
+ Py_VISIT(traverse_module_state->__pyx_n_u__69);
+ Py_VISIT(traverse_module_state->__pyx_n_u__7);
+ Py_VISIT(traverse_module_state->__pyx_n_u__70);
+ Py_VISIT(traverse_module_state->__pyx_n_u__71);
+ Py_VISIT(traverse_module_state->__pyx_n_u__72);
+ Py_VISIT(traverse_module_state->__pyx_n_u__73);
+ Py_VISIT(traverse_module_state->__pyx_n_u__74);
+ Py_VISIT(traverse_module_state->__pyx_n_u__75);
+ Py_VISIT(traverse_module_state->__pyx_n_u__76);
+ Py_VISIT(traverse_module_state->__pyx_n_u__77);
+ Py_VISIT(traverse_module_state->__pyx_n_u__78);
+ Py_VISIT(traverse_module_state->__pyx_n_u__79);
+ Py_VISIT(traverse_module_state->__pyx_n_u__8);
+ Py_VISIT(traverse_module_state->__pyx_n_u__80);
+ Py_VISIT(traverse_module_state->__pyx_n_u__81);
+ Py_VISIT(traverse_module_state->__pyx_n_u__82);
+ Py_VISIT(traverse_module_state->__pyx_n_u__83);
+ Py_VISIT(traverse_module_state->__pyx_n_u__84);
+ Py_VISIT(traverse_module_state->__pyx_n_u__85);
+ Py_VISIT(traverse_module_state->__pyx_n_u__86);
+ Py_VISIT(traverse_module_state->__pyx_n_u__87);
+ Py_VISIT(traverse_module_state->__pyx_n_u__88);
+ Py_VISIT(traverse_module_state->__pyx_n_u__89);
+ Py_VISIT(traverse_module_state->__pyx_n_u__9);
+ Py_VISIT(traverse_module_state->__pyx_n_u__90);
+ Py_VISIT(traverse_module_state->__pyx_n_u__91);
+ Py_VISIT(traverse_module_state->__pyx_n_u__92);
+ Py_VISIT(traverse_module_state->__pyx_n_u__93);
+ Py_VISIT(traverse_module_state->__pyx_n_u__94);
+ Py_VISIT(traverse_module_state->__pyx_n_u__95);
+ Py_VISIT(traverse_module_state->__pyx_n_u__96);
+ Py_VISIT(traverse_module_state->__pyx_n_u__97);
+ Py_VISIT(traverse_module_state->__pyx_n_u__98);
+ Py_VISIT(traverse_module_state->__pyx_n_u__99);
+ Py_VISIT(traverse_module_state->__pyx_n_u_as);
+ Py_VISIT(traverse_module_state->__pyx_n_u_asm_Beng);
+ Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines);
+ Py_VISIT(traverse_module_state->__pyx_n_u_awa_Deva);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_b_w_https_ftp_w_w_w_b);
+ Py_VISIT(traverse_module_state->__pyx_n_s_batch);
+ Py_VISIT(traverse_module_state->__pyx_n_u_ben_Beng);
+ Py_VISIT(traverse_module_state->__pyx_n_u_bho_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_bn);
+ Py_VISIT(traverse_module_state->__pyx_n_u_brx_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_s_chr);
+ Py_VISIT(traverse_module_state->__pyx_n_s_cinit___locals_lambda);
+ Py_VISIT(traverse_module_state->__pyx_n_s_clear);
+ Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback);
+ Py_VISIT(traverse_module_state->__pyx_n_s_compile);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_d);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_d_d);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d);
+ Py_VISIT(traverse_module_state->__pyx_n_s_desc);
+ Py_VISIT(traverse_module_state->__pyx_n_s_detokenize);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_disable);
+ Py_VISIT(traverse_module_state->__pyx_n_u_doi_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_en);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_enable);
+ Py_VISIT(traverse_module_state->__pyx_n_u_eng_Latn);
+ Py_VISIT(traverse_module_state->__pyx_n_s_escape);
+ Py_VISIT(traverse_module_state->__pyx_n_s_findall);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_gc);
+ Py_VISIT(traverse_module_state->__pyx_n_s_get);
+ Py_VISIT(traverse_module_state->__pyx_n_s_get_normalizer);
+ Py_VISIT(traverse_module_state->__pyx_n_s_getstate);
+ Py_VISIT(traverse_module_state->__pyx_n_u_gom_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_gon_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_s_group);
+ Py_VISIT(traverse_module_state->__pyx_n_u_gu);
+ Py_VISIT(traverse_module_state->__pyx_n_u_guj_Gujr);
+ Py_VISIT(traverse_module_state->__pyx_n_u_hi);
+ Py_VISIT(traverse_module_state->__pyx_n_u_hin_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_hne_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_s_import);
+ Py_VISIT(traverse_module_state->__pyx_n_s_indic_detokenize);
+ Py_VISIT(traverse_module_state->__pyx_n_s_indic_tokenize);
+ Py_VISIT(traverse_module_state->__pyx_n_s_indicnlp_normalize_indic_normali);
+ Py_VISIT(traverse_module_state->__pyx_n_s_indicnlp_tokenize);
+ Py_VISIT(traverse_module_state->__pyx_n_s_indicnlp_transliterate_unicode_t);
+ Py_VISIT(traverse_module_state->__pyx_n_s_inference);
+ Py_VISIT(traverse_module_state->__pyx_n_s_initializing);
+ Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine);
+ Py_VISIT(traverse_module_state->__pyx_n_s_is_target);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_isenabled);
+ Py_VISIT(traverse_module_state->__pyx_n_s_items);
+ Py_VISIT(traverse_module_state->__pyx_n_u_kK);
+ Py_VISIT(traverse_module_state->__pyx_n_u_kan_Knda);
+ Py_VISIT(traverse_module_state->__pyx_n_u_kas_Arab);
+ Py_VISIT(traverse_module_state->__pyx_n_u_kas_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_kha_Latn);
+ Py_VISIT(traverse_module_state->__pyx_n_u_kn);
+ Py_VISIT(traverse_module_state->__pyx_n_s_lang);
+ Py_VISIT(traverse_module_state->__pyx_n_u_line);
+ Py_VISIT(traverse_module_state->__pyx_n_u_lus_Latn);
+ Py_VISIT(traverse_module_state->__pyx_n_s_m);
+ Py_VISIT(traverse_module_state->__pyx_n_u_mag_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_mai_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_s_main);
+ Py_VISIT(traverse_module_state->__pyx_n_u_mal_Mlym);
+ Py_VISIT(traverse_module_state->__pyx_n_u_mar_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_ml);
+ Py_VISIT(traverse_module_state->__pyx_n_u_mni_Beng);
+ Py_VISIT(traverse_module_state->__pyx_n_u_mni_Mtei);
+ Py_VISIT(traverse_module_state->__pyx_n_u_mr);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_n);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_n_2);
+ Py_VISIT(traverse_module_state->__pyx_n_s_name);
+ Py_VISIT(traverse_module_state->__pyx_n_u_ne);
+ Py_VISIT(traverse_module_state->__pyx_kp_s_no_default___reduce___due_to_non);
+ Py_VISIT(traverse_module_state->__pyx_n_s_normalize);
+ Py_VISIT(traverse_module_state->__pyx_n_u_npi_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_or);
+ Py_VISIT(traverse_module_state->__pyx_n_u_ory);
+ Py_VISIT(traverse_module_state->__pyx_n_u_ory_Orya);
+ Py_VISIT(traverse_module_state->__pyx_n_u_pa);
+ Py_VISIT(traverse_module_state->__pyx_n_u_pan_Guru);
+ Py_VISIT(traverse_module_state->__pyx_n_s_postprocess_batch);
+ Py_VISIT(traverse_module_state->__pyx_n_s_preprocess_batch);
+ Py_VISIT(traverse_module_state->__pyx_n_s_put);
+ Py_VISIT(traverse_module_state->__pyx_n_s_pyx_state);
+ Py_VISIT(traverse_module_state->__pyx_n_s_pyx_vtable);
+ Py_VISIT(traverse_module_state->__pyx_n_s_queue);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_r);
+ Py_VISIT(traverse_module_state->__pyx_n_s_range);
+ Py_VISIT(traverse_module_state->__pyx_n_s_re);
+ Py_VISIT(traverse_module_state->__pyx_n_s_reduce);
+ Py_VISIT(traverse_module_state->__pyx_n_s_reduce_cython);
+ Py_VISIT(traverse_module_state->__pyx_n_s_reduce_ex);
+ Py_VISIT(traverse_module_state->__pyx_n_s_regex);
+ Py_VISIT(traverse_module_state->__pyx_n_s_replace);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_s);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_s_2);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_s_3);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_s_s);
+ Py_VISIT(traverse_module_state->__pyx_kp_u_s_s_2);
+ Py_VISIT(traverse_module_state->__pyx_n_s_sacremoses);
+ Py_VISIT(traverse_module_state->__pyx_n_u_san_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_sat_Olck);
+ Py_VISIT(traverse_module_state->__pyx_n_s_self);
+ Py_VISIT(traverse_module_state->__pyx_n_s_sents);
+ Py_VISIT(traverse_module_state->__pyx_n_s_setstate);
+ Py_VISIT(traverse_module_state->__pyx_n_s_setstate_cython);
+ Py_VISIT(traverse_module_state->__pyx_n_u_snd_Arab);
+ Py_VISIT(traverse_module_state->__pyx_n_u_snd_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_s_spec);
+ Py_VISIT(traverse_module_state->__pyx_n_s_split);
+ Py_VISIT(traverse_module_state->__pyx_n_s_src_lang);
+ Py_VISIT(traverse_module_state->__pyx_kp_s_stringsource);
+ Py_VISIT(traverse_module_state->__pyx_n_s_strip);
+ Py_VISIT(traverse_module_state->__pyx_n_s_sub);
+ Py_VISIT(traverse_module_state->__pyx_n_u_ta);
+ Py_VISIT(traverse_module_state->__pyx_n_u_tam_Taml);
+ Py_VISIT(traverse_module_state->__pyx_n_u_te);
+ Py_VISIT(traverse_module_state->__pyx_n_u_tel_Telu);
+ Py_VISIT(traverse_module_state->__pyx_n_s_test);
+ Py_VISIT(traverse_module_state->__pyx_n_s_tgt_lang);
+ Py_VISIT(traverse_module_state->__pyx_n_s_tokenize);
+ Py_VISIT(traverse_module_state->__pyx_n_s_total);
+ Py_VISIT(traverse_module_state->__pyx_n_s_tqdm);
+ Py_VISIT(traverse_module_state->__pyx_n_s_translate);
+ Py_VISIT(traverse_module_state->__pyx_n_s_transliterate);
+ Py_VISIT(traverse_module_state->__pyx_n_s_trivial_detokenize);
+ Py_VISIT(traverse_module_state->__pyx_n_s_trivial_tokenize);
+ Py_VISIT(traverse_module_state->__pyx_n_s_typing);
+ Py_VISIT(traverse_module_state->__pyx_n_s_unit);
+ Py_VISIT(traverse_module_state->__pyx_n_u_unr_Deva);
+ Py_VISIT(traverse_module_state->__pyx_n_u_ur);
+ Py_VISIT(traverse_module_state->__pyx_n_u_urd_Arab);
+ Py_VISIT(traverse_module_state->__pyx_n_s_visualize);
+ Py_VISIT(traverse_module_state->__pyx_int_0);
+ Py_VISIT(traverse_module_state->__pyx_tuple__150);
+ Py_VISIT(traverse_module_state->__pyx_tuple__152);
+ Py_VISIT(traverse_module_state->__pyx_tuple__158);
+ Py_VISIT(traverse_module_state->__pyx_tuple__161);
+ Py_VISIT(traverse_module_state->__pyx_tuple__164);
+ Py_VISIT(traverse_module_state->__pyx_tuple__167);
+ Py_VISIT(traverse_module_state->__pyx_tuple__170);
+ Py_VISIT(traverse_module_state->__pyx_tuple__172);
+ Py_VISIT(traverse_module_state->__pyx_tuple__174);
+ Py_VISIT(traverse_module_state->__pyx_tuple__175);
+ Py_VISIT(traverse_module_state->__pyx_tuple__177);
+ Py_VISIT(traverse_module_state->__pyx_tuple__178);
+ Py_VISIT(traverse_module_state->__pyx_tuple__180);
+ Py_VISIT(traverse_module_state->__pyx_codeobj__173);
+ Py_VISIT(traverse_module_state->__pyx_codeobj__176);
+ Py_VISIT(traverse_module_state->__pyx_codeobj__179);
+ Py_VISIT(traverse_module_state->__pyx_codeobj__181);
+ return 0;
+}
+#endif
+/* #### Code section: module_state_defines ### */
+#define __pyx_d __pyx_mstate_global->__pyx_d
+#define __pyx_b __pyx_mstate_global->__pyx_b
+#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime
+#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple
+#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes
+#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode
+#ifdef __Pyx_CyFunction_USED
+#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType
+#endif
+#ifdef __Pyx_FusedFunction_USED
+#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType
+#endif
+#ifdef __Pyx_Generator_USED
+#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType
+#endif
+#ifdef __Pyx_IterableCoroutine_USED
+#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType
+#endif
+#ifdef __Pyx_Coroutine_USED
+#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType
+#endif
+#ifdef __Pyx_Coroutine_USED
+#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType
+#endif
+#if CYTHON_USE_MODULE_STATE
+#define __pyx_type_17IndicTransToolkit_9processor_IndicProcessor __pyx_mstate_global->__pyx_type_17IndicTransToolkit_9processor_IndicProcessor
+#endif
+#define __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor __pyx_mstate_global->__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor
+#define __pyx_n_u_ __pyx_mstate_global->__pyx_n_u_
+#define __pyx_kp_u_0 __pyx_mstate_global->__pyx_kp_u_0
+#define __pyx_kp_u_1 __pyx_mstate_global->__pyx_kp_u_1
+#define __pyx_kp_u_1_2 __pyx_mstate_global->__pyx_kp_u_1_2
+#define __pyx_kp_u_1_2_2 __pyx_mstate_global->__pyx_kp_u_1_2_2
+#define __pyx_kp_u_1_3 __pyx_mstate_global->__pyx_kp_u_1_3
+#define __pyx_kp_u_1_4 __pyx_mstate_global->__pyx_kp_u_1_4
+#define __pyx_kp_u_2 __pyx_mstate_global->__pyx_kp_u_2
+#define __pyx_kp_u_2_2 __pyx_mstate_global->__pyx_kp_u_2_2
+#define __pyx_kp_u_3 __pyx_mstate_global->__pyx_kp_u_3
+#define __pyx_kp_u_4 __pyx_mstate_global->__pyx_kp_u_4
+#define __pyx_kp_u_5 __pyx_mstate_global->__pyx_kp_u_5
+#define __pyx_kp_u_6 __pyx_mstate_global->__pyx_kp_u_6
+#define __pyx_kp_u_7 __pyx_mstate_global->__pyx_kp_u_7
+#define __pyx_kp_u_8 __pyx_mstate_global->__pyx_kp_u_8
+#define __pyx_kp_u_9 __pyx_mstate_global->__pyx_kp_u_9
+#define __pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2 __pyx_mstate_global->__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2
+#define __pyx_kp_u_A_Za_z0_9_w __pyx_mstate_global->__pyx_kp_u_A_Za_z0_9_w
+#define __pyx_n_u_Arab __pyx_mstate_global->__pyx_n_u_Arab
+#define __pyx_n_u_Aran __pyx_mstate_global->__pyx_n_u_Aran
+#define __pyx_kp_u_C __pyx_mstate_global->__pyx_kp_u_C
+#define __pyx_kp_u_C_2 __pyx_mstate_global->__pyx_kp_u_C_2
+#define __pyx_n_s_Dict __pyx_mstate_global->__pyx_n_s_Dict
+#define __pyx_kp_u_ID __pyx_mstate_global->__pyx_kp_u_ID
+#define __pyx_kp_u_ID_2 __pyx_mstate_global->__pyx_kp_u_ID_2
+#define __pyx_kp_u_ID_3 __pyx_mstate_global->__pyx_kp_u_ID_3
+#define __pyx_kp_u_ID_4 __pyx_mstate_global->__pyx_kp_u_ID_4
+#define __pyx_kp_u_ID_5 __pyx_mstate_global->__pyx_kp_u_ID_5
+#define __pyx_n_s_IndicNormalizerFactory __pyx_mstate_global->__pyx_n_s_IndicNormalizerFactory
+#define __pyx_n_s_IndicProcessor __pyx_mstate_global->__pyx_n_s_IndicProcessor
+#define __pyx_n_s_IndicProcessor___reduce_cython __pyx_mstate_global->__pyx_n_s_IndicProcessor___reduce_cython
+#define __pyx_n_s_IndicProcessor___setstate_cython __pyx_mstate_global->__pyx_n_s_IndicProcessor___setstate_cython
+#define __pyx_n_s_IndicProcessor_postprocess_batch __pyx_mstate_global->__pyx_n_s_IndicProcessor_postprocess_batch
+#define __pyx_n_s_IndicProcessor_preprocess_batch __pyx_mstate_global->__pyx_n_s_IndicProcessor_preprocess_batch
+#define __pyx_n_s_IndicTransToolkit_processor __pyx_mstate_global->__pyx_n_s_IndicTransToolkit_processor
+#define __pyx_kp_s_IndicTransToolkit_processor_pyx __pyx_mstate_global->__pyx_kp_s_IndicTransToolkit_processor_pyx
+#define __pyx_n_u_Latn __pyx_mstate_global->__pyx_n_u_Latn
+#define __pyx_n_s_List __pyx_mstate_global->__pyx_n_s_List
+#define __pyx_n_s_MosesDetokenizer __pyx_mstate_global->__pyx_n_s_MosesDetokenizer
+#define __pyx_n_s_MosesPunctNormalizer __pyx_mstate_global->__pyx_n_s_MosesPunctNormalizer
+#define __pyx_n_s_MosesTokenizer __pyx_mstate_global->__pyx_n_s_MosesTokenizer
+#define __pyx_n_u_Mtei __pyx_mstate_global->__pyx_n_u_Mtei
+#define __pyx_kp_u_None __pyx_mstate_global->__pyx_kp_u_None
+#define __pyx_n_u_Olck __pyx_mstate_global->__pyx_n_u_Olck
+#define __pyx_kp_u_Post_processing __pyx_mstate_global->__pyx_kp_u_Post_processing
+#define __pyx_kp_u_Pre_processing __pyx_mstate_global->__pyx_kp_u_Pre_processing
+#define __pyx_n_s_Queue __pyx_mstate_global->__pyx_n_s_Queue
+#define __pyx_n_s_TypeError __pyx_mstate_global->__pyx_n_s_TypeError
+#define __pyx_n_s_UnicodeIndicTransliterator __pyx_mstate_global->__pyx_n_s_UnicodeIndicTransliterator
+#define __pyx_n_s_Union __pyx_mstate_global->__pyx_n_s_Union
+#define __pyx_n_u__10 __pyx_mstate_global->__pyx_n_u__10
+#define __pyx_n_u__100 __pyx_mstate_global->__pyx_n_u__100
+#define __pyx_kp_u__101 __pyx_mstate_global->__pyx_kp_u__101
+#define __pyx_kp_u__102 __pyx_mstate_global->__pyx_kp_u__102
+#define __pyx_kp_u__103 __pyx_mstate_global->__pyx_kp_u__103
+#define __pyx_kp_u__104 __pyx_mstate_global->__pyx_kp_u__104
+#define __pyx_kp_u__105 __pyx_mstate_global->__pyx_kp_u__105
+#define __pyx_kp_u__106 __pyx_mstate_global->__pyx_kp_u__106
+#define __pyx_kp_u__107 __pyx_mstate_global->__pyx_kp_u__107
+#define __pyx_kp_u__108 __pyx_mstate_global->__pyx_kp_u__108
+#define __pyx_kp_u__109 __pyx_mstate_global->__pyx_kp_u__109
+#define __pyx_n_u__11 __pyx_mstate_global->__pyx_n_u__11
+#define __pyx_kp_u__110 __pyx_mstate_global->__pyx_kp_u__110
+#define __pyx_kp_u__111 __pyx_mstate_global->__pyx_kp_u__111
+#define __pyx_kp_u__112 __pyx_mstate_global->__pyx_kp_u__112
+#define __pyx_kp_u__113 __pyx_mstate_global->__pyx_kp_u__113
+#define __pyx_kp_u__114 __pyx_mstate_global->__pyx_kp_u__114
+#define __pyx_kp_u__115 __pyx_mstate_global->__pyx_kp_u__115
+#define __pyx_kp_u__116 __pyx_mstate_global->__pyx_kp_u__116
+#define __pyx_kp_u__117 __pyx_mstate_global->__pyx_kp_u__117
+#define __pyx_kp_u__118 __pyx_mstate_global->__pyx_kp_u__118
+#define __pyx_kp_u__119 __pyx_mstate_global->__pyx_kp_u__119
+#define __pyx_n_u__12 __pyx_mstate_global->__pyx_n_u__12
+#define __pyx_kp_u__120 __pyx_mstate_global->__pyx_kp_u__120
+#define __pyx_kp_u__121 __pyx_mstate_global->__pyx_kp_u__121
+#define __pyx_kp_u__122 __pyx_mstate_global->__pyx_kp_u__122
+#define __pyx_kp_u__123 __pyx_mstate_global->__pyx_kp_u__123
+#define __pyx_kp_u__124 __pyx_mstate_global->__pyx_kp_u__124
+#define __pyx_kp_u__125 __pyx_mstate_global->__pyx_kp_u__125
+#define __pyx_kp_u__126 __pyx_mstate_global->__pyx_kp_u__126
+#define __pyx_kp_u__127 __pyx_mstate_global->__pyx_kp_u__127
+#define __pyx_kp_u__128 __pyx_mstate_global->__pyx_kp_u__128
+#define __pyx_kp_u__129 __pyx_mstate_global->__pyx_kp_u__129
+#define __pyx_n_u__13 __pyx_mstate_global->__pyx_n_u__13
+#define __pyx_kp_u__130 __pyx_mstate_global->__pyx_kp_u__130
+#define __pyx_kp_u__131 __pyx_mstate_global->__pyx_kp_u__131
+#define __pyx_kp_u__132 __pyx_mstate_global->__pyx_kp_u__132
+#define __pyx_kp_u__133 __pyx_mstate_global->__pyx_kp_u__133
+#define __pyx_kp_u__134 __pyx_mstate_global->__pyx_kp_u__134
+#define __pyx_kp_u__135 __pyx_mstate_global->__pyx_kp_u__135
+#define __pyx_kp_u__136 __pyx_mstate_global->__pyx_kp_u__136
+#define __pyx_kp_u__137 __pyx_mstate_global->__pyx_kp_u__137
+#define __pyx_n_u__138 __pyx_mstate_global->__pyx_n_u__138
+#define __pyx_kp_u__139 __pyx_mstate_global->__pyx_kp_u__139
+#define __pyx_n_u__14 __pyx_mstate_global->__pyx_n_u__14
+#define __pyx_kp_u__140 __pyx_mstate_global->__pyx_kp_u__140
+#define __pyx_kp_u__141 __pyx_mstate_global->__pyx_kp_u__141
+#define __pyx_kp_u__142 __pyx_mstate_global->__pyx_kp_u__142
+#define __pyx_kp_u__143 __pyx_mstate_global->__pyx_kp_u__143
+#define __pyx_kp_u__144 __pyx_mstate_global->__pyx_kp_u__144
+#define __pyx_kp_u__145 __pyx_mstate_global->__pyx_kp_u__145
+#define __pyx_kp_u__146 __pyx_mstate_global->__pyx_kp_u__146
+#define __pyx_kp_u__147 __pyx_mstate_global->__pyx_kp_u__147
+#define __pyx_kp_u__148 __pyx_mstate_global->__pyx_kp_u__148
+#define __pyx_kp_u__149 __pyx_mstate_global->__pyx_kp_u__149
+#define __pyx_n_u__15 __pyx_mstate_global->__pyx_n_u__15
+#define __pyx_kp_u__151 __pyx_mstate_global->__pyx_kp_u__151
+#define __pyx_kp_u__153 __pyx_mstate_global->__pyx_kp_u__153
+#define __pyx_kp_u__154 __pyx_mstate_global->__pyx_kp_u__154
+#define __pyx_n_u__155 __pyx_mstate_global->__pyx_n_u__155
+#define __pyx_kp_u__156 __pyx_mstate_global->__pyx_kp_u__156
+#define __pyx_kp_u__157 __pyx_mstate_global->__pyx_kp_u__157
+#define __pyx_kp_u__159 __pyx_mstate_global->__pyx_kp_u__159
+#define __pyx_n_u__16 __pyx_mstate_global->__pyx_n_u__16
+#define __pyx_kp_u__160 __pyx_mstate_global->__pyx_kp_u__160
+#define __pyx_kp_u__162 __pyx_mstate_global->__pyx_kp_u__162
+#define __pyx_kp_u__163 __pyx_mstate_global->__pyx_kp_u__163
+#define __pyx_kp_u__165 __pyx_mstate_global->__pyx_kp_u__165
+#define __pyx_n_u__166 __pyx_mstate_global->__pyx_n_u__166
+#define __pyx_kp_u__168 __pyx_mstate_global->__pyx_kp_u__168
+#define __pyx_n_u__169 __pyx_mstate_global->__pyx_n_u__169
+#define __pyx_n_u__17 __pyx_mstate_global->__pyx_n_u__17
+#define __pyx_n_s__171 __pyx_mstate_global->__pyx_n_s__171
+#define __pyx_n_u__18 __pyx_mstate_global->__pyx_n_u__18
+#define __pyx_n_s__182 __pyx_mstate_global->__pyx_n_s__182
+#define __pyx_n_u__19 __pyx_mstate_global->__pyx_n_u__19
+#define __pyx_n_u__2 __pyx_mstate_global->__pyx_n_u__2
+#define __pyx_n_u__20 __pyx_mstate_global->__pyx_n_u__20
+#define __pyx_n_u__21 __pyx_mstate_global->__pyx_n_u__21
+#define __pyx_n_u__22 __pyx_mstate_global->__pyx_n_u__22
+#define __pyx_n_u__23 __pyx_mstate_global->__pyx_n_u__23
+#define __pyx_n_u__24 __pyx_mstate_global->__pyx_n_u__24
+#define __pyx_n_u__25 __pyx_mstate_global->__pyx_n_u__25
+#define __pyx_n_u__26 __pyx_mstate_global->__pyx_n_u__26
+#define __pyx_n_u__27 __pyx_mstate_global->__pyx_n_u__27
+#define __pyx_n_u__28 __pyx_mstate_global->__pyx_n_u__28
+#define __pyx_n_u__29 __pyx_mstate_global->__pyx_n_u__29
+#define __pyx_n_u__3 __pyx_mstate_global->__pyx_n_u__3
+#define __pyx_n_u__30 __pyx_mstate_global->__pyx_n_u__30
+#define __pyx_n_u__31 __pyx_mstate_global->__pyx_n_u__31
+#define __pyx_n_u__32 __pyx_mstate_global->__pyx_n_u__32
+#define __pyx_n_u__33 __pyx_mstate_global->__pyx_n_u__33
+#define __pyx_n_u__34 __pyx_mstate_global->__pyx_n_u__34
+#define __pyx_n_u__35 __pyx_mstate_global->__pyx_n_u__35
+#define __pyx_n_u__36 __pyx_mstate_global->__pyx_n_u__36
+#define __pyx_n_u__37 __pyx_mstate_global->__pyx_n_u__37
+#define __pyx_n_u__38 __pyx_mstate_global->__pyx_n_u__38
+#define __pyx_n_u__39 __pyx_mstate_global->__pyx_n_u__39
+#define __pyx_n_u__4 __pyx_mstate_global->__pyx_n_u__4
+#define __pyx_n_u__40 __pyx_mstate_global->__pyx_n_u__40
+#define __pyx_n_u__41 __pyx_mstate_global->__pyx_n_u__41
+#define __pyx_n_u__42 __pyx_mstate_global->__pyx_n_u__42
+#define __pyx_n_u__43 __pyx_mstate_global->__pyx_n_u__43
+#define __pyx_n_u__44 __pyx_mstate_global->__pyx_n_u__44
+#define __pyx_n_u__45 __pyx_mstate_global->__pyx_n_u__45
+#define __pyx_n_u__46 __pyx_mstate_global->__pyx_n_u__46
+#define __pyx_n_u__47 __pyx_mstate_global->__pyx_n_u__47
+#define __pyx_n_u__48 __pyx_mstate_global->__pyx_n_u__48
+#define __pyx_n_u__49 __pyx_mstate_global->__pyx_n_u__49
+#define __pyx_n_u__5 __pyx_mstate_global->__pyx_n_u__5
+#define __pyx_n_u__50 __pyx_mstate_global->__pyx_n_u__50
+#define __pyx_n_u__51 __pyx_mstate_global->__pyx_n_u__51
+#define __pyx_n_u__52 __pyx_mstate_global->__pyx_n_u__52
+#define __pyx_n_u__53 __pyx_mstate_global->__pyx_n_u__53
+#define __pyx_n_u__54 __pyx_mstate_global->__pyx_n_u__54
+#define __pyx_n_u__55 __pyx_mstate_global->__pyx_n_u__55
+#define __pyx_n_u__56 __pyx_mstate_global->__pyx_n_u__56
+#define __pyx_n_u__57 __pyx_mstate_global->__pyx_n_u__57
+#define __pyx_n_u__58 __pyx_mstate_global->__pyx_n_u__58
+#define __pyx_n_u__59 __pyx_mstate_global->__pyx_n_u__59
+#define __pyx_n_u__6 __pyx_mstate_global->__pyx_n_u__6
+#define __pyx_n_u__60 __pyx_mstate_global->__pyx_n_u__60
+#define __pyx_n_u__61 __pyx_mstate_global->__pyx_n_u__61
+#define __pyx_n_u__62 __pyx_mstate_global->__pyx_n_u__62
+#define __pyx_n_u__63 __pyx_mstate_global->__pyx_n_u__63
+#define __pyx_n_u__64 __pyx_mstate_global->__pyx_n_u__64
+#define __pyx_n_u__65 __pyx_mstate_global->__pyx_n_u__65
+#define __pyx_n_u__66 __pyx_mstate_global->__pyx_n_u__66
+#define __pyx_n_u__67 __pyx_mstate_global->__pyx_n_u__67
+#define __pyx_n_u__68 __pyx_mstate_global->__pyx_n_u__68
+#define __pyx_n_u__69 __pyx_mstate_global->__pyx_n_u__69
+#define __pyx_n_u__7 __pyx_mstate_global->__pyx_n_u__7
+#define __pyx_n_u__70 __pyx_mstate_global->__pyx_n_u__70
+#define __pyx_n_u__71 __pyx_mstate_global->__pyx_n_u__71
+#define __pyx_n_u__72 __pyx_mstate_global->__pyx_n_u__72
+#define __pyx_n_u__73 __pyx_mstate_global->__pyx_n_u__73
+#define __pyx_n_u__74 __pyx_mstate_global->__pyx_n_u__74
+#define __pyx_n_u__75 __pyx_mstate_global->__pyx_n_u__75
+#define __pyx_n_u__76 __pyx_mstate_global->__pyx_n_u__76
+#define __pyx_n_u__77 __pyx_mstate_global->__pyx_n_u__77
+#define __pyx_n_u__78 __pyx_mstate_global->__pyx_n_u__78
+#define __pyx_n_u__79 __pyx_mstate_global->__pyx_n_u__79
+#define __pyx_n_u__8 __pyx_mstate_global->__pyx_n_u__8
+#define __pyx_n_u__80 __pyx_mstate_global->__pyx_n_u__80
+#define __pyx_n_u__81 __pyx_mstate_global->__pyx_n_u__81
+#define __pyx_n_u__82 __pyx_mstate_global->__pyx_n_u__82
+#define __pyx_n_u__83 __pyx_mstate_global->__pyx_n_u__83
+#define __pyx_n_u__84 __pyx_mstate_global->__pyx_n_u__84
+#define __pyx_n_u__85 __pyx_mstate_global->__pyx_n_u__85
+#define __pyx_n_u__86 __pyx_mstate_global->__pyx_n_u__86
+#define __pyx_n_u__87 __pyx_mstate_global->__pyx_n_u__87
+#define __pyx_n_u__88 __pyx_mstate_global->__pyx_n_u__88
+#define __pyx_n_u__89 __pyx_mstate_global->__pyx_n_u__89
+#define __pyx_n_u__9 __pyx_mstate_global->__pyx_n_u__9
+#define __pyx_n_u__90 __pyx_mstate_global->__pyx_n_u__90
+#define __pyx_n_u__91 __pyx_mstate_global->__pyx_n_u__91
+#define __pyx_n_u__92 __pyx_mstate_global->__pyx_n_u__92
+#define __pyx_n_u__93 __pyx_mstate_global->__pyx_n_u__93
+#define __pyx_n_u__94 __pyx_mstate_global->__pyx_n_u__94
+#define __pyx_n_u__95 __pyx_mstate_global->__pyx_n_u__95
+#define __pyx_n_u__96 __pyx_mstate_global->__pyx_n_u__96
+#define __pyx_n_u__97 __pyx_mstate_global->__pyx_n_u__97
+#define __pyx_n_u__98 __pyx_mstate_global->__pyx_n_u__98
+#define __pyx_n_u__99 __pyx_mstate_global->__pyx_n_u__99
+#define __pyx_n_u_as __pyx_mstate_global->__pyx_n_u_as
+#define __pyx_n_u_asm_Beng __pyx_mstate_global->__pyx_n_u_asm_Beng
+#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines
+#define __pyx_n_u_awa_Deva __pyx_mstate_global->__pyx_n_u_awa_Deva
+#define __pyx_kp_u_b_w_https_ftp_w_w_w_b __pyx_mstate_global->__pyx_kp_u_b_w_https_ftp_w_w_w_b
+#define __pyx_n_s_batch __pyx_mstate_global->__pyx_n_s_batch
+#define __pyx_n_u_ben_Beng __pyx_mstate_global->__pyx_n_u_ben_Beng
+#define __pyx_n_u_bho_Deva __pyx_mstate_global->__pyx_n_u_bho_Deva
+#define __pyx_n_u_bn __pyx_mstate_global->__pyx_n_u_bn
+#define __pyx_n_u_brx_Deva __pyx_mstate_global->__pyx_n_u_brx_Deva
+#define __pyx_n_s_chr __pyx_mstate_global->__pyx_n_s_chr
+#define __pyx_n_s_cinit___locals_lambda __pyx_mstate_global->__pyx_n_s_cinit___locals_lambda
+#define __pyx_n_s_clear __pyx_mstate_global->__pyx_n_s_clear
+#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback
+#define __pyx_n_s_compile __pyx_mstate_global->__pyx_n_s_compile
+#define __pyx_kp_u_d __pyx_mstate_global->__pyx_kp_u_d
+#define __pyx_kp_u_d_d __pyx_mstate_global->__pyx_kp_u_d_d
+#define __pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d __pyx_mstate_global->__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d
+#define __pyx_n_s_desc __pyx_mstate_global->__pyx_n_s_desc
+#define __pyx_n_s_detokenize __pyx_mstate_global->__pyx_n_s_detokenize
+#define __pyx_kp_u_disable __pyx_mstate_global->__pyx_kp_u_disable
+#define __pyx_n_u_doi_Deva __pyx_mstate_global->__pyx_n_u_doi_Deva
+#define __pyx_n_u_en __pyx_mstate_global->__pyx_n_u_en
+#define __pyx_kp_u_enable __pyx_mstate_global->__pyx_kp_u_enable
+#define __pyx_n_u_eng_Latn __pyx_mstate_global->__pyx_n_u_eng_Latn
+#define __pyx_n_s_escape __pyx_mstate_global->__pyx_n_s_escape
+#define __pyx_n_s_findall __pyx_mstate_global->__pyx_n_s_findall
+#define __pyx_kp_u_gc __pyx_mstate_global->__pyx_kp_u_gc
+#define __pyx_n_s_get __pyx_mstate_global->__pyx_n_s_get
+#define __pyx_n_s_get_normalizer __pyx_mstate_global->__pyx_n_s_get_normalizer
+#define __pyx_n_s_getstate __pyx_mstate_global->__pyx_n_s_getstate
+#define __pyx_n_u_gom_Deva __pyx_mstate_global->__pyx_n_u_gom_Deva
+#define __pyx_n_u_gon_Deva __pyx_mstate_global->__pyx_n_u_gon_Deva
+#define __pyx_n_s_group __pyx_mstate_global->__pyx_n_s_group
+#define __pyx_n_u_gu __pyx_mstate_global->__pyx_n_u_gu
+#define __pyx_n_u_guj_Gujr __pyx_mstate_global->__pyx_n_u_guj_Gujr
+#define __pyx_n_u_hi __pyx_mstate_global->__pyx_n_u_hi
+#define __pyx_n_u_hin_Deva __pyx_mstate_global->__pyx_n_u_hin_Deva
+#define __pyx_n_u_hne_Deva __pyx_mstate_global->__pyx_n_u_hne_Deva
+#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import
+#define __pyx_n_s_indic_detokenize __pyx_mstate_global->__pyx_n_s_indic_detokenize
+#define __pyx_n_s_indic_tokenize __pyx_mstate_global->__pyx_n_s_indic_tokenize
+#define __pyx_n_s_indicnlp_normalize_indic_normali __pyx_mstate_global->__pyx_n_s_indicnlp_normalize_indic_normali
+#define __pyx_n_s_indicnlp_tokenize __pyx_mstate_global->__pyx_n_s_indicnlp_tokenize
+#define __pyx_n_s_indicnlp_transliterate_unicode_t __pyx_mstate_global->__pyx_n_s_indicnlp_transliterate_unicode_t
+#define __pyx_n_s_inference __pyx_mstate_global->__pyx_n_s_inference
+#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing
+#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine
+#define __pyx_n_s_is_target __pyx_mstate_global->__pyx_n_s_is_target
+#define __pyx_kp_u_isenabled __pyx_mstate_global->__pyx_kp_u_isenabled
+#define __pyx_n_s_items __pyx_mstate_global->__pyx_n_s_items
+#define __pyx_n_u_kK __pyx_mstate_global->__pyx_n_u_kK
+#define __pyx_n_u_kan_Knda __pyx_mstate_global->__pyx_n_u_kan_Knda
+#define __pyx_n_u_kas_Arab __pyx_mstate_global->__pyx_n_u_kas_Arab
+#define __pyx_n_u_kas_Deva __pyx_mstate_global->__pyx_n_u_kas_Deva
+#define __pyx_n_u_kha_Latn __pyx_mstate_global->__pyx_n_u_kha_Latn
+#define __pyx_n_u_kn __pyx_mstate_global->__pyx_n_u_kn
+#define __pyx_n_s_lang __pyx_mstate_global->__pyx_n_s_lang
+#define __pyx_n_u_line __pyx_mstate_global->__pyx_n_u_line
+#define __pyx_n_u_lus_Latn __pyx_mstate_global->__pyx_n_u_lus_Latn
+#define __pyx_n_s_m __pyx_mstate_global->__pyx_n_s_m
+#define __pyx_n_u_mag_Deva __pyx_mstate_global->__pyx_n_u_mag_Deva
+#define __pyx_n_u_mai_Deva __pyx_mstate_global->__pyx_n_u_mai_Deva
+#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main
+#define __pyx_n_u_mal_Mlym __pyx_mstate_global->__pyx_n_u_mal_Mlym
+#define __pyx_n_u_mar_Deva __pyx_mstate_global->__pyx_n_u_mar_Deva
+#define __pyx_n_u_ml __pyx_mstate_global->__pyx_n_u_ml
+#define __pyx_n_u_mni_Beng __pyx_mstate_global->__pyx_n_u_mni_Beng
+#define __pyx_n_u_mni_Mtei __pyx_mstate_global->__pyx_n_u_mni_Mtei
+#define __pyx_n_u_mr __pyx_mstate_global->__pyx_n_u_mr
+#define __pyx_kp_u_n __pyx_mstate_global->__pyx_kp_u_n
+#define __pyx_kp_u_n_2 __pyx_mstate_global->__pyx_kp_u_n_2
+#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name
+#define __pyx_n_u_ne __pyx_mstate_global->__pyx_n_u_ne
+#define __pyx_kp_s_no_default___reduce___due_to_non __pyx_mstate_global->__pyx_kp_s_no_default___reduce___due_to_non
+#define __pyx_n_s_normalize __pyx_mstate_global->__pyx_n_s_normalize
+#define __pyx_n_u_npi_Deva __pyx_mstate_global->__pyx_n_u_npi_Deva
+#define __pyx_n_u_or __pyx_mstate_global->__pyx_n_u_or
+#define __pyx_n_u_ory __pyx_mstate_global->__pyx_n_u_ory
+#define __pyx_n_u_ory_Orya __pyx_mstate_global->__pyx_n_u_ory_Orya
+#define __pyx_n_u_pa __pyx_mstate_global->__pyx_n_u_pa
+#define __pyx_n_u_pan_Guru __pyx_mstate_global->__pyx_n_u_pan_Guru
+#define __pyx_n_s_postprocess_batch __pyx_mstate_global->__pyx_n_s_postprocess_batch
+#define __pyx_n_s_preprocess_batch __pyx_mstate_global->__pyx_n_s_preprocess_batch
+#define __pyx_n_s_put __pyx_mstate_global->__pyx_n_s_put
+#define __pyx_n_s_pyx_state __pyx_mstate_global->__pyx_n_s_pyx_state
+#define __pyx_n_s_pyx_vtable __pyx_mstate_global->__pyx_n_s_pyx_vtable
+#define __pyx_n_s_queue __pyx_mstate_global->__pyx_n_s_queue
+#define __pyx_kp_u_r __pyx_mstate_global->__pyx_kp_u_r
+#define __pyx_n_s_range __pyx_mstate_global->__pyx_n_s_range
+#define __pyx_n_s_re __pyx_mstate_global->__pyx_n_s_re
+#define __pyx_n_s_reduce __pyx_mstate_global->__pyx_n_s_reduce
+#define __pyx_n_s_reduce_cython __pyx_mstate_global->__pyx_n_s_reduce_cython
+#define __pyx_n_s_reduce_ex __pyx_mstate_global->__pyx_n_s_reduce_ex
+#define __pyx_n_s_regex __pyx_mstate_global->__pyx_n_s_regex
+#define __pyx_n_s_replace __pyx_mstate_global->__pyx_n_s_replace
+#define __pyx_kp_u_s __pyx_mstate_global->__pyx_kp_u_s
+#define __pyx_kp_u_s_2 __pyx_mstate_global->__pyx_kp_u_s_2
+#define __pyx_kp_u_s_3 __pyx_mstate_global->__pyx_kp_u_s_3
+#define __pyx_kp_u_s_s __pyx_mstate_global->__pyx_kp_u_s_s
+#define __pyx_kp_u_s_s_2 __pyx_mstate_global->__pyx_kp_u_s_s_2
+#define __pyx_n_s_sacremoses __pyx_mstate_global->__pyx_n_s_sacremoses
+#define __pyx_n_u_san_Deva __pyx_mstate_global->__pyx_n_u_san_Deva
+#define __pyx_n_u_sat_Olck __pyx_mstate_global->__pyx_n_u_sat_Olck
+#define __pyx_n_s_self __pyx_mstate_global->__pyx_n_s_self
+#define __pyx_n_s_sents __pyx_mstate_global->__pyx_n_s_sents
+#define __pyx_n_s_setstate __pyx_mstate_global->__pyx_n_s_setstate
+#define __pyx_n_s_setstate_cython __pyx_mstate_global->__pyx_n_s_setstate_cython
+#define __pyx_n_u_snd_Arab __pyx_mstate_global->__pyx_n_u_snd_Arab
+#define __pyx_n_u_snd_Deva __pyx_mstate_global->__pyx_n_u_snd_Deva
+#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec
+#define __pyx_n_s_split __pyx_mstate_global->__pyx_n_s_split
+#define __pyx_n_s_src_lang __pyx_mstate_global->__pyx_n_s_src_lang
+#define __pyx_kp_s_stringsource __pyx_mstate_global->__pyx_kp_s_stringsource
+#define __pyx_n_s_strip __pyx_mstate_global->__pyx_n_s_strip
+#define __pyx_n_s_sub __pyx_mstate_global->__pyx_n_s_sub
+#define __pyx_n_u_ta __pyx_mstate_global->__pyx_n_u_ta
+#define __pyx_n_u_tam_Taml __pyx_mstate_global->__pyx_n_u_tam_Taml
+#define __pyx_n_u_te __pyx_mstate_global->__pyx_n_u_te
+#define __pyx_n_u_tel_Telu __pyx_mstate_global->__pyx_n_u_tel_Telu
+#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test
+#define __pyx_n_s_tgt_lang __pyx_mstate_global->__pyx_n_s_tgt_lang
+#define __pyx_n_s_tokenize __pyx_mstate_global->__pyx_n_s_tokenize
+#define __pyx_n_s_total __pyx_mstate_global->__pyx_n_s_total
+#define __pyx_n_s_tqdm __pyx_mstate_global->__pyx_n_s_tqdm
+#define __pyx_n_s_translate __pyx_mstate_global->__pyx_n_s_translate
+#define __pyx_n_s_transliterate __pyx_mstate_global->__pyx_n_s_transliterate
+#define __pyx_n_s_trivial_detokenize __pyx_mstate_global->__pyx_n_s_trivial_detokenize
+#define __pyx_n_s_trivial_tokenize __pyx_mstate_global->__pyx_n_s_trivial_tokenize
+#define __pyx_n_s_typing __pyx_mstate_global->__pyx_n_s_typing
+#define __pyx_n_s_unit __pyx_mstate_global->__pyx_n_s_unit
+#define __pyx_n_u_unr_Deva __pyx_mstate_global->__pyx_n_u_unr_Deva
+#define __pyx_n_u_ur __pyx_mstate_global->__pyx_n_u_ur
+#define __pyx_n_u_urd_Arab __pyx_mstate_global->__pyx_n_u_urd_Arab
+#define __pyx_n_s_visualize __pyx_mstate_global->__pyx_n_s_visualize
+#define __pyx_int_0 __pyx_mstate_global->__pyx_int_0
+#define __pyx_tuple__150 __pyx_mstate_global->__pyx_tuple__150
+#define __pyx_tuple__152 __pyx_mstate_global->__pyx_tuple__152
+#define __pyx_tuple__158 __pyx_mstate_global->__pyx_tuple__158
+#define __pyx_tuple__161 __pyx_mstate_global->__pyx_tuple__161
+#define __pyx_tuple__164 __pyx_mstate_global->__pyx_tuple__164
+#define __pyx_tuple__167 __pyx_mstate_global->__pyx_tuple__167
+#define __pyx_tuple__170 __pyx_mstate_global->__pyx_tuple__170
+#define __pyx_tuple__172 __pyx_mstate_global->__pyx_tuple__172
+#define __pyx_tuple__174 __pyx_mstate_global->__pyx_tuple__174
+#define __pyx_tuple__175 __pyx_mstate_global->__pyx_tuple__175
+#define __pyx_tuple__177 __pyx_mstate_global->__pyx_tuple__177
+#define __pyx_tuple__178 __pyx_mstate_global->__pyx_tuple__178
+#define __pyx_tuple__180 __pyx_mstate_global->__pyx_tuple__180
+#define __pyx_codeobj__173 __pyx_mstate_global->__pyx_codeobj__173
+#define __pyx_codeobj__176 __pyx_mstate_global->__pyx_codeobj__176
+#define __pyx_codeobj__179 __pyx_mstate_global->__pyx_codeobj__179
+#define __pyx_codeobj__181 __pyx_mstate_global->__pyx_codeobj__181
+/* #### Code section: module_code ### */
+
+/* "IndicTransToolkit/processor.pyx":50
+ * cdef object _xliterator
+ *
+ * def __cinit__(self, bint inference=True): # <<<<<<<<<<<<<<
+ * """
+ * Constructor for IndicProcessor. Initializes all necessary components.
+ */
+
+/* Python wrapper */
+static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ int __pyx_v_inference;
+ CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ PyObject* values[1] = {0};
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
+ #if CYTHON_ASSUME_SAFE_MACROS
+ __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+ #else
+ __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1;
+ #endif
+ __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs);
+ {
+ PyObject **__pyx_pyargnames[] = {&__pyx_n_s_inference,0};
+ if (__pyx_kwds) {
+ Py_ssize_t kw_args;
+ switch (__pyx_nargs) {
+ case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds);
+ switch (__pyx_nargs) {
+ case 0:
+ if (kw_args > 0) {
+ PyObject* value = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_inference);
+ if (value) { values[0] = __Pyx_Arg_NewRef_VARARGS(value); kw_args--; }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 50, __pyx_L3_error)
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ const Py_ssize_t kwd_pos_args = __pyx_nargs;
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(0, 50, __pyx_L3_error)
+ }
+ } else {
+ switch (__pyx_nargs) {
+ case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ if (values[0]) {
+ __pyx_v_inference = __Pyx_PyObject_IsTrue(values[0]); if (unlikely((__pyx_v_inference == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 50, __pyx_L3_error)
+ } else {
+ __pyx_v_inference = ((int)1);
+ }
+ }
+ goto __pyx_L6_skip;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 1, __pyx_nargs); __PYX_ERR(0, 50, __pyx_L3_error)
+ __pyx_L6_skip:;
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L3_error:;
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]);
+ }
+ }
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return -1;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor___cinit__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v_inference);
+
+ /* function exit code */
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]);
+ }
+ }
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":197
+ * (re.compile(r"n"), "n "),
+ * (re.compile(r"C"), " C"),
+ * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()), # <<<<<<<<<<<<<<
+ * (re.compile(r","), ", "),
+ * ]
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda(PyObject *__pyx_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda = {"lambda", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda(PyObject *__pyx_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+ PyObject *__pyx_v_m = 0;
+ #if !CYTHON_METH_FASTCALL
+ CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+ #endif
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ PyObject* values[1] = {0};
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("lambda (wrapper)", 0);
+ #if !CYTHON_METH_FASTCALL
+ #if CYTHON_ASSUME_SAFE_MACROS
+ __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+ #else
+ __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+ #endif
+ #endif
+ __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+ {
+ PyObject **__pyx_pyargnames[] = {&__pyx_n_s_m,0};
+ if (__pyx_kwds) {
+ Py_ssize_t kw_args;
+ switch (__pyx_nargs) {
+ case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds);
+ switch (__pyx_nargs) {
+ case 0:
+ if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_m)) != 0)) {
+ (void)__Pyx_Arg_NewRef_FASTCALL(values[0]);
+ kw_args--;
+ }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 197, __pyx_L3_error)
+ else goto __pyx_L5_argtuple_error;
+ }
+ if (unlikely(kw_args > 0)) {
+ const Py_ssize_t kwd_pos_args = __pyx_nargs;
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "lambda") < 0)) __PYX_ERR(0, 197, __pyx_L3_error)
+ }
+ } else if (unlikely(__pyx_nargs != 1)) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ }
+ __pyx_v_m = values[0];
+ }
+ goto __pyx_L6_skip;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("lambda", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 197, __pyx_L3_error)
+ __pyx_L6_skip:;
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L3_error:;
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__.lambda", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_lambda_funcdef_lambda(__pyx_self, __pyx_v_m);
+
+ /* function exit code */
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_lambda_funcdef_lambda(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_m) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ unsigned int __pyx_t_5;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("lambda", 1);
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_m, __pyx_n_s_group); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = NULL;
+ __pyx_t_5 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ __pyx_t_5 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_4, __pyx_int_0};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ }
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_strip); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = NULL;
+ __pyx_t_5 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_2)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_2);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ __pyx_t_5 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_2, NULL};
+ __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 0+__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+ if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ }
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__.lambda", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":50
+ * cdef object _xliterator
+ *
+ * def __cinit__(self, bint inference=True): # <<<<<<<<<<<<<<
+ * """
+ * Constructor for IndicProcessor. Initializes all necessary components.
+ */
+
+static int __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor___cinit__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, int __pyx_v_inference) {
+ PyObject *__pyx_v_digits_dict = 0;
+ PyObject *__pyx_v_k = NULL;
+ PyObject *__pyx_v_v = NULL;
+ long __pyx_v_c;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ Py_ssize_t __pyx_t_2;
+ Py_ssize_t __pyx_t_3;
+ int __pyx_t_4;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ int __pyx_t_7;
+ long __pyx_t_8;
+ long __pyx_t_9;
+ long __pyx_t_10;
+ unsigned int __pyx_t_11;
+ PyObject *__pyx_t_12 = NULL;
+ PyObject *__pyx_t_13 = NULL;
+ PyObject *__pyx_t_14 = NULL;
+ PyObject *__pyx_t_15 = NULL;
+ PyObject *__pyx_t_16 = NULL;
+ PyObject *__pyx_t_17 = NULL;
+ PyObject *__pyx_t_18 = NULL;
+ PyObject *__pyx_t_19 = NULL;
+ PyObject *__pyx_t_20 = NULL;
+ PyObject *__pyx_t_21 = NULL;
+ PyObject *__pyx_t_22 = NULL;
+ PyObject *__pyx_t_23 = NULL;
+ PyObject *__pyx_t_24 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__cinit__", 1);
+
+ /* "IndicTransToolkit/processor.pyx":54
+ * Constructor for IndicProcessor. Initializes all necessary components.
+ * """
+ * self.inference = inference # <<<<<<<<<<<<<<
+ *
+ * ##############################
+ */
+ __pyx_v_self->inference = __pyx_v_inference;
+
+ /* "IndicTransToolkit/processor.pyx":60
+ * ##############################
+ * self._flores_codes = {
+ * "asm_Beng": "as", # <<<<<<<<<<<<<<
+ * "awa_Deva": "hi",
+ * "ben_Beng": "bn",
+ */
+ __pyx_t_1 = __Pyx_PyDict_NewPresized(34); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 60, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_asm_Beng, __pyx_n_u_as) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_awa_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_ben_Beng, __pyx_n_u_bn) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_bho_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_brx_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_doi_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_eng_Latn, __pyx_n_u_en) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_gom_Deva, __pyx_n_u_kK) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_gon_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_guj_Gujr, __pyx_n_u_gu) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_hin_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_hne_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kan_Knda, __pyx_n_u_kn) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kas_Arab, __pyx_n_u_ur) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kas_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kha_Latn, __pyx_n_u_en) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_lus_Latn, __pyx_n_u_en) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mag_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mai_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mal_Mlym, __pyx_n_u_ml) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mar_Deva, __pyx_n_u_mr) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mni_Beng, __pyx_n_u_bn) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mni_Mtei, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_npi_Deva, __pyx_n_u_ne) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_ory_Orya, __pyx_n_u_or) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_pan_Guru, __pyx_n_u_pa) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_san_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_sat_Olck, __pyx_n_u_or) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_snd_Arab, __pyx_n_u_ur) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_snd_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_tam_Taml, __pyx_n_u_ta) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_tel_Telu, __pyx_n_u_te) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_urd_Arab, __pyx_n_u_ur) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_unr_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
+
+ /* "IndicTransToolkit/processor.pyx":59
+ * # FLORES -> ISO CODES
+ * ##############################
+ * self._flores_codes = { # <<<<<<<<<<<<<<
+ * "asm_Beng": "as",
+ * "awa_Deva": "hi",
+ */
+ __Pyx_GIVEREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_v_self->_flores_codes);
+ __Pyx_DECREF(__pyx_v_self->_flores_codes);
+ __pyx_v_self->_flores_codes = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":99
+ * # INDIC DIGIT TRANSLATION (str.translate)
+ * ##############################
+ * self._digits_translation_table = {} # <<<<<<<<<<<<<<
+ * cdef dict digits_dict = {
+ * "\u09e6": "0", "\u0ae6": "0", "\u0ce6": "0", "\u0966": "0",
+ */
+ __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 99, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_v_self->_digits_translation_table);
+ __Pyx_DECREF(__pyx_v_self->_digits_translation_table);
+ __pyx_v_self->_digits_translation_table = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":101
+ * self._digits_translation_table = {}
+ * cdef dict digits_dict = {
+ * "\u09e6": "0", "\u0ae6": "0", "\u0ce6": "0", "\u0966": "0", # <<<<<<<<<<<<<<
+ * "\u0660": "0", "\uabf0": "0", "\u0b66": "0", "\u0a66": "0",
+ * "\u1c50": "0", "\u06f0": "0",
+ */
+ __pyx_t_1 = __Pyx_PyDict_NewPresized(100); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 101, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__2, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__3, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__4, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__5, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__6, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__7, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__8, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__9, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__10, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__11, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__12, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__13, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__14, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__15, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__16, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__17, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__18, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__19, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__20, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__21, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__22, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__23, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__24, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__25, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__26, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__27, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__28, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__29, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__30, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__31, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__32, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__33, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__34, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__35, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__36, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__37, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__38, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__39, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__40, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__41, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__42, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__43, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__44, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__45, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__46, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__47, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__48, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__49, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__50, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__51, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__52, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__53, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__54, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__55, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__56, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__57, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__58, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__59, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__60, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__61, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__62, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__63, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__64, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__65, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__66, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__67, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__68, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__69, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__70, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__71, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__72, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__73, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__74, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__75, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__76, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__77, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__78, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__79, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__80, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__81, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__82, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__83, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__84, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__85, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__86, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__87, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__88, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__89, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__90, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__91, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__92, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__93, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__94, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__95, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__96, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__97, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__98, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__99, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__100, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error)
+ __pyx_v_digits_dict = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":141
+ * "\u1c59": "9", "\u0c6f": "9",
+ * }
+ * for k, v in digits_dict.items(): # <<<<<<<<<<<<<<
+ * self._digits_translation_table[ord(k)] = v
+ *
+ */
+ __pyx_t_2 = 0;
+ __pyx_t_5 = __Pyx_dict_iterator(__pyx_v_digits_dict, 1, __pyx_n_s_items, (&__pyx_t_3), (&__pyx_t_4)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 141, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_1);
+ __pyx_t_1 = __pyx_t_5;
+ __pyx_t_5 = 0;
+ while (1) {
+ __pyx_t_7 = __Pyx_dict_iter_next(__pyx_t_1, __pyx_t_3, &__pyx_t_2, &__pyx_t_5, &__pyx_t_6, NULL, __pyx_t_4);
+ if (unlikely(__pyx_t_7 == 0)) break;
+ if (unlikely(__pyx_t_7 == -1)) __PYX_ERR(0, 141, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_XDECREF_SET(__pyx_v_k, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_XDECREF_SET(__pyx_v_v, __pyx_t_6);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":142
+ * }
+ * for k, v in digits_dict.items():
+ * self._digits_translation_table[ord(k)] = v # <<<<<<<<<<<<<<
+ *
+ * # Also map ASCII '0'-'9'
+ */
+ if (unlikely(__pyx_v_self->_digits_translation_table == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+ __PYX_ERR(0, 142, __pyx_L1_error)
+ }
+ __pyx_t_8 = __Pyx_PyObject_Ord(__pyx_v_k); if (unlikely(__pyx_t_8 == ((long)(long)(Py_UCS4)-1))) __PYX_ERR(0, 142, __pyx_L1_error)
+ __pyx_t_6 = __Pyx_PyInt_From_long(__pyx_t_8); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 142, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ if (unlikely((PyDict_SetItem(__pyx_v_self->_digits_translation_table, __pyx_t_6, __pyx_v_v) < 0))) __PYX_ERR(0, 142, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":145
+ *
+ * # Also map ASCII '0'-'9'
+ * for c in range(ord('0'), ord('9') + 1): # <<<<<<<<<<<<<<
+ * self._digits_translation_table[c] = chr(c)
+ *
+ */
+ __pyx_t_8 = (57 + 1);
+ __pyx_t_9 = __pyx_t_8;
+ for (__pyx_t_10 = 48; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
+ __pyx_v_c = __pyx_t_10;
+
+ /* "IndicTransToolkit/processor.pyx":146
+ * # Also map ASCII '0'-'9'
+ * for c in range(ord('0'), ord('9') + 1):
+ * self._digits_translation_table[c] = chr(c) # <<<<<<<<<<<<<<
+ *
+ * ##############################
+ */
+ __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_c); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 146, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_builtin_chr, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 146, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (unlikely(__pyx_v_self->_digits_translation_table == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+ __PYX_ERR(0, 146, __pyx_L1_error)
+ }
+ __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_c); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 146, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (unlikely((PyDict_SetItem(__pyx_v_self->_digits_translation_table, __pyx_t_1, __pyx_t_6) < 0))) __PYX_ERR(0, 146, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":151
+ * # PLACEHOLDER MAP QUEUE
+ * ##############################
+ * self._placeholder_entity_maps = Queue() # <<<<<<<<<<<<<<
+ *
+ * ##############################
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_Queue); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 151, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, NULL};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 0+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 151, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_placeholder_entity_maps);
+ __Pyx_DECREF(__pyx_v_self->_placeholder_entity_maps);
+ __pyx_v_self->_placeholder_entity_maps = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":156
+ * # MOSES (as Python objects)
+ * ##############################
+ * self._en_tok = MosesTokenizer(lang="en") # <<<<<<<<<<<<<<
+ * self._en_normalizer = MosesPunctNormalizer()
+ * self._en_detok = MosesDetokenizer(lang="en")
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_MosesTokenizer); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 156, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_lang, __pyx_n_u_en) < 0) __PYX_ERR(0, 156, __pyx_L1_error)
+ __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_empty_tuple, __pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 156, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_GIVEREF(__pyx_t_5);
+ __Pyx_GOTREF(__pyx_v_self->_en_tok);
+ __Pyx_DECREF(__pyx_v_self->_en_tok);
+ __pyx_v_self->_en_tok = __pyx_t_5;
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":157
+ * ##############################
+ * self._en_tok = MosesTokenizer(lang="en")
+ * self._en_normalizer = MosesPunctNormalizer() # <<<<<<<<<<<<<<
+ * self._en_detok = MosesDetokenizer(lang="en")
+ *
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_MosesPunctNormalizer); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 157, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_6 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_6)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_6);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_6, NULL};
+ __pyx_t_5 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 0+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+ if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 157, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_5);
+ __Pyx_GOTREF(__pyx_v_self->_en_normalizer);
+ __Pyx_DECREF(__pyx_v_self->_en_normalizer);
+ __pyx_v_self->_en_normalizer = __pyx_t_5;
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":158
+ * self._en_tok = MosesTokenizer(lang="en")
+ * self._en_normalizer = MosesPunctNormalizer()
+ * self._en_detok = MosesDetokenizer(lang="en") # <<<<<<<<<<<<<<
+ *
+ * ##############################
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_MosesDetokenizer); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 158, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 158, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_lang, __pyx_n_u_en) < 0) __PYX_ERR(0, 158, __pyx_L1_error)
+ __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_empty_tuple, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 158, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_en_detok);
+ __Pyx_DECREF(__pyx_v_self->_en_detok);
+ __pyx_v_self->_en_detok = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":163
+ * # TRANSLITERATOR (Python object)
+ * ##############################
+ * self._xliterator = UnicodeIndicTransliterator() # <<<<<<<<<<<<<<
+ *
+ * ##############################
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_UnicodeIndicTransliterator); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 163, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, NULL};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 0+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 163, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_xliterator);
+ __Pyx_DECREF(__pyx_v_self->_xliterator);
+ __pyx_v_self->_xliterator = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":168
+ * # Precompiled Patterns
+ * ##############################
+ * self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}") # <<<<<<<<<<<<<<
+ * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %")
+ * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)")
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 168, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_1)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_1);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u_2_2};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 168, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_MULTISPACE_REGEX);
+ __Pyx_DECREF(__pyx_v_self->_MULTISPACE_REGEX);
+ __pyx_v_self->_MULTISPACE_REGEX = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":169
+ * ##############################
+ * self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}")
+ * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %") # <<<<<<<<<<<<<<
+ * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)")
+ * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)")
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 169, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_d};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_DIGIT_SPACE_PERCENT);
+ __Pyx_DECREF(__pyx_v_self->_DIGIT_SPACE_PERCENT);
+ __pyx_v_self->_DIGIT_SPACE_PERCENT = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":170
+ * self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}")
+ * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %")
+ * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)") # <<<<<<<<<<<<<<
+ * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)")
+ * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])")
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 170, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 170, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_1)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_1);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u__101};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 170, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_DOUBLE_QUOT_PUNC);
+ __Pyx_DECREF(__pyx_v_self->_DOUBLE_QUOT_PUNC);
+ __pyx_v_self->_DOUBLE_QUOT_PUNC = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":171
+ * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %")
+ * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)")
+ * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)") # <<<<<<<<<<<<<<
+ * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])")
+ *
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 171, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 171, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_d_d};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 171, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_DIGIT_NBSP_DIGIT);
+ __Pyx_DECREF(__pyx_v_self->_DIGIT_NBSP_DIGIT);
+ __pyx_v_self->_DIGIT_NBSP_DIGIT = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":172
+ * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)")
+ * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)")
+ * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])") # <<<<<<<<<<<<<<
+ *
+ * self._URL_PATTERN = re.compile(
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 172, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 172, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_1)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_1);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u__102};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 172, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX);
+ __Pyx_DECREF(__pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX);
+ __pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":174
+ * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])")
+ *
+ * self._URL_PATTERN = re.compile( # <<<<<<<<<<<<<<
+ * r"\b(?_URL_PATTERN);
+ __Pyx_DECREF(__pyx_v_self->_URL_PATTERN);
+ __pyx_v_self->_URL_PATTERN = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":177
+ * r"\b(?_NUMERAL_PATTERN);
+ __Pyx_DECREF(__pyx_v_self->_NUMERAL_PATTERN);
+ __pyx_v_self->_NUMERAL_PATTERN = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":180
+ * r"(~?\d+\.?\d*\s?%?\s?-?\s?~?\d+\.?\d*\s?%|~?\d+%|\d+[-\/.,:']\d+[-\/.,:'+]\d+(?:\.\d+)?|\d+[-\/.:'+]\d+(?:\.\d+)?)"
+ * )
+ * self._EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}") # <<<<<<<<<<<<<<
+ * self._OTHER_PATTERN = re.compile(r"[A-Za-z0-9]*[#|@]\w+")
+ *
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 180, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 180, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_EMAIL_PATTERN);
+ __Pyx_DECREF(__pyx_v_self->_EMAIL_PATTERN);
+ __pyx_v_self->_EMAIL_PATTERN = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":181
+ * )
+ * self._EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}")
+ * self._OTHER_PATTERN = re.compile(r"[A-Za-z0-9]*[#|@]\w+") # <<<<<<<<<<<<<<
+ *
+ * # Combined punctuation replacements
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 181, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 181, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_1)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_1);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u_A_Za_z0_9_w};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 181, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_GIVEREF(__pyx_t_6);
+ __Pyx_GOTREF(__pyx_v_self->_OTHER_PATTERN);
+ __Pyx_DECREF(__pyx_v_self->_OTHER_PATTERN);
+ __pyx_v_self->_OTHER_PATTERN = __pyx_t_6;
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":185
+ * # Combined punctuation replacements
+ * self._PUNC_REPLACEMENTS = [
+ * (re.compile(r"\r"), ""), # <<<<<<<<<<<<<<
+ * (re.compile(r"\(\s*"), "("),
+ * (re.compile(r"\s*\)"), ")"),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 185, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 185, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_r};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 185, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 185, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_6)) __PYX_ERR(0, 185, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__103);
+ __Pyx_GIVEREF(__pyx_kp_u__103);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_kp_u__103)) __PYX_ERR(0, 185, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":186
+ * self._PUNC_REPLACEMENTS = [
+ * (re.compile(r"\r"), ""),
+ * (re.compile(r"\(\s*"), "("), # <<<<<<<<<<<<<<
+ * (re.compile(r"\s*\)"), ")"),
+ * (re.compile(r"\s:\s?"), ":"),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 186, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 186, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_12);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_12))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_12);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_12);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_12, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_12, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 186, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
+ }
+ __pyx_t_12 = PyTuple_New(2); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 186, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_12);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_6)) __PYX_ERR(0, 186, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__104);
+ __Pyx_GIVEREF(__pyx_kp_u__104);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_kp_u__104)) __PYX_ERR(0, 186, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":187
+ * (re.compile(r"\r"), ""),
+ * (re.compile(r"\(\s*"), "("),
+ * (re.compile(r"\s*\)"), ")"), # <<<<<<<<<<<<<<
+ * (re.compile(r"\s:\s?"), ":"),
+ * (re.compile(r"\s;\s?"), ";"),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_13);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_13))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_13);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_13, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s_2};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_13, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
+ }
+ __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_13);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__105);
+ __Pyx_GIVEREF(__pyx_kp_u__105);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_kp_u__105)) __PYX_ERR(0, 187, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":188
+ * (re.compile(r"\(\s*"), "("),
+ * (re.compile(r"\s*\)"), ")"),
+ * (re.compile(r"\s:\s?"), ":"), # <<<<<<<<<<<<<<
+ * (re.compile(r"\s;\s?"), ";"),
+ * (re.compile(r"[`]"), "'"),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 188, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 188, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_14);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_14))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_14);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_14);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_14, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s_s};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_14, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 188, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0;
+ }
+ __pyx_t_14 = PyTuple_New(2); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 188, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_14);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_6)) __PYX_ERR(0, 188, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__106);
+ __Pyx_GIVEREF(__pyx_kp_u__106);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_kp_u__106)) __PYX_ERR(0, 188, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":189
+ * (re.compile(r"\s*\)"), ")"),
+ * (re.compile(r"\s:\s?"), ":"),
+ * (re.compile(r"\s;\s?"), ";"), # <<<<<<<<<<<<<<
+ * (re.compile(r"[`]"), "'"),
+ * (re.compile(r"[]"), '"'),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 189, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_15)) __PYX_ERR(0, 189, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_15);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_15))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_15);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_15);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_15, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s_s_2};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_15, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 189, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0;
+ }
+ __pyx_t_15 = PyTuple_New(2); if (unlikely(!__pyx_t_15)) __PYX_ERR(0, 189, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_15);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_6)) __PYX_ERR(0, 189, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__107);
+ __Pyx_GIVEREF(__pyx_kp_u__107);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_15, 1, __pyx_kp_u__107)) __PYX_ERR(0, 189, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":190
+ * (re.compile(r"\s:\s?"), ":"),
+ * (re.compile(r"\s;\s?"), ";"),
+ * (re.compile(r"[`]"), "'"), # <<<<<<<<<<<<<<
+ * (re.compile(r"[]"), '"'),
+ * (re.compile(r"[]"), "-"),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 190, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 190, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_16))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_16);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_16);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_16, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__108};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_16, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 190, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0;
+ }
+ __pyx_t_16 = PyTuple_New(2); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 190, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_6)) __PYX_ERR(0, 190, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__109);
+ __Pyx_GIVEREF(__pyx_kp_u__109);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_16, 1, __pyx_kp_u__109)) __PYX_ERR(0, 190, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":191
+ * (re.compile(r"\s;\s?"), ";"),
+ * (re.compile(r"[`]"), "'"),
+ * (re.compile(r"[]"), '"'), # <<<<<<<<<<<<<<
+ * (re.compile(r"[]"), "-"),
+ * (re.compile(r"\.\.\."), "..."),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 191, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 191, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_17);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_17))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_17);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_17, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__110};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_17, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 191, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0;
+ }
+ __pyx_t_17 = PyTuple_New(2); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 191, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_17);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_17, 0, __pyx_t_6)) __PYX_ERR(0, 191, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__111);
+ __Pyx_GIVEREF(__pyx_kp_u__111);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_17, 1, __pyx_kp_u__111)) __PYX_ERR(0, 191, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":192
+ * (re.compile(r"[`]"), "'"),
+ * (re.compile(r"[]"), '"'),
+ * (re.compile(r"[]"), "-"), # <<<<<<<<<<<<<<
+ * (re.compile(r"\.\.\."), "..."),
+ * (re.compile(r"%"), "%"),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 192, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 192, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_18);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_18))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_18);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_18);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_18, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__112};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_18, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 192, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0;
+ }
+ __pyx_t_18 = PyTuple_New(2); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 192, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_18);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_18, 0, __pyx_t_6)) __PYX_ERR(0, 192, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__113);
+ __Pyx_GIVEREF(__pyx_kp_u__113);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_18, 1, __pyx_kp_u__113)) __PYX_ERR(0, 192, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":193
+ * (re.compile(r"[]"), '"'),
+ * (re.compile(r"[]"), "-"),
+ * (re.compile(r"\.\.\."), "..."), # <<<<<<<<<<<<<<
+ * (re.compile(r"%"), "%"),
+ * (re.compile(r"n"), "n "),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 193, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 193, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_19);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_19))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_19);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_19);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_19, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__114};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_19, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 193, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0;
+ }
+ __pyx_t_19 = PyTuple_New(2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 193, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_19);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_6)) __PYX_ERR(0, 193, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__115);
+ __Pyx_GIVEREF(__pyx_kp_u__115);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_19, 1, __pyx_kp_u__115)) __PYX_ERR(0, 193, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":194
+ * (re.compile(r"[]"), "-"),
+ * (re.compile(r"\.\.\."), "..."),
+ * (re.compile(r"%"), "%"), # <<<<<<<<<<<<<<
+ * (re.compile(r"n"), "n "),
+ * (re.compile(r"C"), " C"),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 194, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_20 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 194, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_20);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_20))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_20);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_20);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_20, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__116};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_20, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_20); __pyx_t_20 = 0;
+ }
+ __pyx_t_20 = PyTuple_New(2); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 194, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_20);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_20, 0, __pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__117);
+ __Pyx_GIVEREF(__pyx_kp_u__117);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_20, 1, __pyx_kp_u__117)) __PYX_ERR(0, 194, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":195
+ * (re.compile(r"\.\.\."), "..."),
+ * (re.compile(r"%"), "%"),
+ * (re.compile(r"n"), "n "), # <<<<<<<<<<<<<<
+ * (re.compile(r"C"), " C"),
+ * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 195, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_21 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 195, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_21);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_21))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_21);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_21);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_21, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_n};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_21, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 195, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_21); __pyx_t_21 = 0;
+ }
+ __pyx_t_21 = PyTuple_New(2); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 195, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_21);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_21, 0, __pyx_t_6)) __PYX_ERR(0, 195, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u_n_2);
+ __Pyx_GIVEREF(__pyx_kp_u_n_2);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_21, 1, __pyx_kp_u_n_2)) __PYX_ERR(0, 195, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":196
+ * (re.compile(r"%"), "%"),
+ * (re.compile(r"n"), "n "),
+ * (re.compile(r"C"), " C"), # <<<<<<<<<<<<<<
+ * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()),
+ * (re.compile(r","), ", "),
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 196, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_22 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_22)) __PYX_ERR(0, 196, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_22);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_22))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_22);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_22);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_22, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_C};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_22, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 196, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_22); __pyx_t_22 = 0;
+ }
+ __pyx_t_22 = PyTuple_New(2); if (unlikely(!__pyx_t_22)) __PYX_ERR(0, 196, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_22);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_22, 0, __pyx_t_6)) __PYX_ERR(0, 196, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u_C_2);
+ __Pyx_GIVEREF(__pyx_kp_u_C_2);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_22, 1, __pyx_kp_u_C_2)) __PYX_ERR(0, 196, __pyx_L1_error);
+ __pyx_t_6 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":197
+ * (re.compile(r"n"), "n "),
+ * (re.compile(r"C"), " C"),
+ * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()), # <<<<<<<<<<<<<<
+ * (re.compile(r","), ", "),
+ * ]
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_23 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_23);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_23))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_23);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_23);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_23, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__118};
+ __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_23, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_23); __pyx_t_23 = 0;
+ }
+ __pyx_t_23 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda, 0, __pyx_n_s_cinit___locals_lambda, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, NULL); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_23);
+ __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_6);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_6)) __PYX_ERR(0, 197, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_23);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_23)) __PYX_ERR(0, 197, __pyx_L1_error);
+ __pyx_t_6 = 0;
+ __pyx_t_23 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":198
+ * (re.compile(r"C"), " C"),
+ * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()),
+ * (re.compile(r","), ", "), # <<<<<<<<<<<<<<
+ * ]
+ *
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_re); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 198, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_24 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_compile); if (unlikely(!__pyx_t_24)) __PYX_ERR(0, 198, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_24);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __pyx_t_6 = NULL;
+ __pyx_t_11 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_24))) {
+ __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_24);
+ if (likely(__pyx_t_6)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_24);
+ __Pyx_INCREF(__pyx_t_6);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_24, function);
+ __pyx_t_11 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_kp_u__119};
+ __pyx_t_23 = __Pyx_PyObject_FastCall(__pyx_t_24, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11);
+ __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+ if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 198, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_23);
+ __Pyx_DECREF(__pyx_t_24); __pyx_t_24 = 0;
+ }
+ __pyx_t_24 = PyTuple_New(2); if (unlikely(!__pyx_t_24)) __PYX_ERR(0, 198, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_24);
+ __Pyx_GIVEREF(__pyx_t_23);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_24, 0, __pyx_t_23)) __PYX_ERR(0, 198, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__120);
+ __Pyx_GIVEREF(__pyx_kp_u__120);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_24, 1, __pyx_kp_u__120)) __PYX_ERR(0, 198, __pyx_L1_error);
+ __pyx_t_23 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":184
+ *
+ * # Combined punctuation replacements
+ * self._PUNC_REPLACEMENTS = [ # <<<<<<<<<<<<<<
+ * (re.compile(r"\r"), ""),
+ * (re.compile(r"\(\s*"), "("),
+ */
+ __pyx_t_23 = PyList_New(14); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 184, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_23);
+ __Pyx_GIVEREF(__pyx_t_1);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 0, __pyx_t_1)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_12);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 1, __pyx_t_12)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_13);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 2, __pyx_t_13)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_14);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 3, __pyx_t_14)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_15);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 4, __pyx_t_15)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_16);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 5, __pyx_t_16)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_17);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 6, __pyx_t_17)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_18);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 7, __pyx_t_18)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_19);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 8, __pyx_t_19)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_20);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 9, __pyx_t_20)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_21);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 10, __pyx_t_21)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_22);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 11, __pyx_t_22)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_5);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 12, __pyx_t_5)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_24);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 13, __pyx_t_24)) __PYX_ERR(0, 184, __pyx_L1_error);
+ __pyx_t_1 = 0;
+ __pyx_t_12 = 0;
+ __pyx_t_13 = 0;
+ __pyx_t_14 = 0;
+ __pyx_t_15 = 0;
+ __pyx_t_16 = 0;
+ __pyx_t_17 = 0;
+ __pyx_t_18 = 0;
+ __pyx_t_19 = 0;
+ __pyx_t_20 = 0;
+ __pyx_t_21 = 0;
+ __pyx_t_22 = 0;
+ __pyx_t_5 = 0;
+ __pyx_t_24 = 0;
+ __Pyx_GIVEREF(__pyx_t_23);
+ __Pyx_GOTREF(__pyx_v_self->_PUNC_REPLACEMENTS);
+ __Pyx_DECREF(__pyx_v_self->_PUNC_REPLACEMENTS);
+ __pyx_v_self->_PUNC_REPLACEMENTS = ((PyObject*)__pyx_t_23);
+ __pyx_t_23 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":201
+ * ]
+ *
+ * self._INDIC_FAILURE_CASES = [ # <<<<<<<<<<<<<<
+ * " ",
+ * "",
+ */
+ __pyx_t_23 = PyList_New(18); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 201, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_23);
+ __Pyx_INCREF(__pyx_kp_u__121);
+ __Pyx_GIVEREF(__pyx_kp_u__121);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 0, __pyx_kp_u__121)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__122);
+ __Pyx_GIVEREF(__pyx_kp_u__122);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 1, __pyx_kp_u__122)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__123);
+ __Pyx_GIVEREF(__pyx_kp_u__123);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 2, __pyx_kp_u__123)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__124);
+ __Pyx_GIVEREF(__pyx_kp_u__124);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 3, __pyx_kp_u__124)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__125);
+ __Pyx_GIVEREF(__pyx_kp_u__125);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 4, __pyx_kp_u__125)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__126);
+ __Pyx_GIVEREF(__pyx_kp_u__126);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 5, __pyx_kp_u__126)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__127);
+ __Pyx_GIVEREF(__pyx_kp_u__127);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 6, __pyx_kp_u__127)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__128);
+ __Pyx_GIVEREF(__pyx_kp_u__128);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 7, __pyx_kp_u__128)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__129);
+ __Pyx_GIVEREF(__pyx_kp_u__129);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 8, __pyx_kp_u__129)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__130);
+ __Pyx_GIVEREF(__pyx_kp_u__130);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 9, __pyx_kp_u__130)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__131);
+ __Pyx_GIVEREF(__pyx_kp_u__131);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 10, __pyx_kp_u__131)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__132);
+ __Pyx_GIVEREF(__pyx_kp_u__132);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 11, __pyx_kp_u__132)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__133);
+ __Pyx_GIVEREF(__pyx_kp_u__133);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 12, __pyx_kp_u__133)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__134);
+ __Pyx_GIVEREF(__pyx_kp_u__134);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 13, __pyx_kp_u__134)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__135);
+ __Pyx_GIVEREF(__pyx_kp_u__135);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 14, __pyx_kp_u__135)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__136);
+ __Pyx_GIVEREF(__pyx_kp_u__136);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 15, __pyx_kp_u__136)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_kp_u__137);
+ __Pyx_GIVEREF(__pyx_kp_u__137);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 16, __pyx_kp_u__137)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_n_u__138);
+ __Pyx_GIVEREF(__pyx_n_u__138);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 17, __pyx_n_u__138)) __PYX_ERR(0, 201, __pyx_L1_error);
+ __Pyx_GIVEREF(__pyx_t_23);
+ __Pyx_GOTREF(__pyx_v_self->_INDIC_FAILURE_CASES);
+ __Pyx_DECREF(__pyx_v_self->_INDIC_FAILURE_CASES);
+ __pyx_v_self->_INDIC_FAILURE_CASES = ((PyObject*)__pyx_t_23);
+ __pyx_t_23 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":50
+ * cdef object _xliterator
+ *
+ * def __cinit__(self, bint inference=True): # <<<<<<<<<<<<<<
+ * """
+ * Constructor for IndicProcessor. Initializes all necessary components.
+ */
+
+ /* function exit code */
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_12);
+ __Pyx_XDECREF(__pyx_t_13);
+ __Pyx_XDECREF(__pyx_t_14);
+ __Pyx_XDECREF(__pyx_t_15);
+ __Pyx_XDECREF(__pyx_t_16);
+ __Pyx_XDECREF(__pyx_t_17);
+ __Pyx_XDECREF(__pyx_t_18);
+ __Pyx_XDECREF(__pyx_t_19);
+ __Pyx_XDECREF(__pyx_t_20);
+ __Pyx_XDECREF(__pyx_t_21);
+ __Pyx_XDECREF(__pyx_t_22);
+ __Pyx_XDECREF(__pyx_t_23);
+ __Pyx_XDECREF(__pyx_t_24);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_digits_dict);
+ __Pyx_XDECREF(__pyx_v_k);
+ __Pyx_XDECREF(__pyx_v_v);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":223
+ *
+ * # Internal Method: Apply punctuation replacements
+ * cdef str _apply_punc_replacements(self, str text, list replacements) except *: # <<<<<<<<<<<<<<
+ * """
+ * Apply a list of (pattern, replacement) in sequence to text.
+ */
+
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__apply_punc_replacements(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text, PyObject *__pyx_v_replacements) {
+ int __pyx_v_i;
+ PyObject *__pyx_v_pair = 0;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ Py_ssize_t __pyx_t_1;
+ Py_ssize_t __pyx_t_2;
+ int __pyx_t_3;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ unsigned int __pyx_t_7;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("_apply_punc_replacements", 0);
+ __Pyx_INCREF(__pyx_v_text);
+
+ /* "IndicTransToolkit/processor.pyx":229
+ * cdef int i
+ * cdef tuple pair
+ * for i in range(len(replacements)): # <<<<<<<<<<<<<<
+ * pair = replacements[i]
+ * text = pair[0].sub(pair[1], text)
+ */
+ if (unlikely(__pyx_v_replacements == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
+ __PYX_ERR(0, 229, __pyx_L1_error)
+ }
+ __pyx_t_1 = __Pyx_PyList_GET_SIZE(__pyx_v_replacements); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(0, 229, __pyx_L1_error)
+ __pyx_t_2 = __pyx_t_1;
+ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
+ __pyx_v_i = __pyx_t_3;
+
+ /* "IndicTransToolkit/processor.pyx":230
+ * cdef tuple pair
+ * for i in range(len(replacements)):
+ * pair = replacements[i] # <<<<<<<<<<<<<<
+ * text = pair[0].sub(pair[1], text)
+ * return text
+ */
+ if (unlikely(__pyx_v_replacements == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+ __PYX_ERR(0, 230, __pyx_L1_error)
+ }
+ if (!(likely(PyTuple_CheckExact(PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i)))||((PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i)) == Py_None) || __Pyx_RaiseUnexpectedTypeError("tuple", PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i)))) __PYX_ERR(0, 230, __pyx_L1_error)
+ __pyx_t_4 = PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_XDECREF_SET(__pyx_v_pair, ((PyObject*)__pyx_t_4));
+ __pyx_t_4 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":231
+ * for i in range(len(replacements)):
+ * pair = replacements[i]
+ * text = pair[0].sub(pair[1], text) # <<<<<<<<<<<<<<
+ * return text
+ *
+ */
+ if (unlikely(__pyx_v_pair == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+ __PYX_ERR(0, 231, __pyx_L1_error)
+ }
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(PyTuple_GET_ITEM(__pyx_v_pair, 0), __pyx_n_s_sub); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 231, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ if (unlikely(__pyx_v_pair == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+ __PYX_ERR(0, 231, __pyx_L1_error)
+ }
+ __pyx_t_6 = NULL;
+ __pyx_t_7 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_6)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_6);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_7 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_6, PyTuple_GET_ITEM(__pyx_v_pair, 1), __pyx_v_text};
+ __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 2+__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+ if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 231, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_4))) __PYX_ERR(0, 231, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_4));
+ __pyx_t_4 = 0;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":232
+ * pair = replacements[i]
+ * text = pair[0].sub(pair[1], text)
+ * return text # <<<<<<<<<<<<<<
+ *
+ * # Internal Method: Punctuation Normalization
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_text);
+ __pyx_r = __pyx_v_text;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":223
+ *
+ * # Internal Method: Apply punctuation replacements
+ * cdef str _apply_punc_replacements(self, str text, list replacements) except *: # <<<<<<<<<<<<<<
+ * """
+ * Apply a list of (pattern, replacement) in sequence to text.
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._apply_punc_replacements", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_pair);
+ __Pyx_XDECREF(__pyx_v_text);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":235
+ *
+ * # Internal Method: Punctuation Normalization
+ * cdef str _punc_norm(self, str text) except *: # <<<<<<<<<<<<<<
+ * """
+ * Consolidate punctuation normalization in fewer passes.
+ */
+
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__punc_norm(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ unsigned int __pyx_t_4;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("_punc_norm", 0);
+ __Pyx_INCREF(__pyx_v_text);
+
+ /* "IndicTransToolkit/processor.pyx":240
+ * """
+ * # 1) Apply replacements
+ * text = self._apply_punc_replacements(text, self._PUNC_REPLACEMENTS) # <<<<<<<<<<<<<<
+ *
+ * # 2) Additional patterns
+ */
+ __pyx_t_1 = __pyx_v_self->_PUNC_REPLACEMENTS;
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_apply_punc_replacements(__pyx_v_self, __pyx_v_text, ((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 240, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":243
+ *
+ * # 2) Additional patterns
+ * text = self._MULTISPACE_REGEX.sub(" ", text) # <<<<<<<<<<<<<<
+ * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text)
+ * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text)
+ */
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_MULTISPACE_REGEX, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 243, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = NULL;
+ __pyx_t_4 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_3)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_3);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_4 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u__139, __pyx_v_text};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 243, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 243, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":244
+ * # 2) Additional patterns
+ * text = self._MULTISPACE_REGEX.sub(" ", text)
+ * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text) # <<<<<<<<<<<<<<
+ * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text)
+ * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text)
+ */
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 244, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = NULL;
+ __pyx_t_4 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_3)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_3);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_4 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_2, __pyx_v_text};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 244, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 244, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":245
+ * text = self._MULTISPACE_REGEX.sub(" ", text)
+ * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text)
+ * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text) # <<<<<<<<<<<<<<
+ * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text)
+ * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text)
+ */
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_DIGIT_SPACE_PERCENT, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 245, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = NULL;
+ __pyx_t_4 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_3)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_3);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_4 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_3, __pyx_v_text};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 245, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 245, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":246
+ * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text)
+ * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text)
+ * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text) # <<<<<<<<<<<<<<
+ * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text)
+ * return text.strip()
+ */
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_DOUBLE_QUOT_PUNC, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 246, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = NULL;
+ __pyx_t_4 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_3)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_3);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_4 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_4, __pyx_v_text};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 246, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 246, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":247
+ * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text)
+ * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text)
+ * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text) # <<<<<<<<<<<<<<
+ * return text.strip()
+ *
+ */
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_DIGIT_NBSP_DIGIT, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 247, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = NULL;
+ __pyx_t_4 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_3)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_3);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_4 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_2_2, __pyx_v_text};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 247, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 247, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":248
+ * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text)
+ * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text)
+ * return text.strip() # <<<<<<<<<<<<<<
+ *
+ * # Internal Method: Wrap Text with Placeholders
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_2 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_text); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 248, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 248, __pyx_L1_error)
+ __pyx_r = ((PyObject*)__pyx_t_2);
+ __pyx_t_2 = 0;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":235
+ *
+ * # Internal Method: Punctuation Normalization
+ * cdef str _punc_norm(self, str text) except *: # <<<<<<<<<<<<<<
+ * """
+ * Consolidate punctuation normalization in fewer passes.
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._punc_norm", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_text);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":251
+ *
+ * # Internal Method: Wrap Text with Placeholders
+ * cdef str _wrap_with_placeholders(self, str text) except *: # <<<<<<<<<<<<<<
+ * """
+ * Wrap substrings with matched patterns in the text with placeholders.
+ */
+
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__wrap_with_placeholders(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text) {
+ int __pyx_v_serial_no;
+ PyObject *__pyx_v_placeholder_entity_map = 0;
+ PyObject *__pyx_v_patterns = 0;
+ PyObject *__pyx_v_pattern = 0;
+ PyObject *__pyx_v_matches = 0;
+ PyObject *__pyx_v_match = 0;
+ PyObject *__pyx_v_base_placeholder = 0;
+ int __pyx_v_i;
+ PyObject *__pyx_v_indic_case = NULL;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ Py_ssize_t __pyx_t_2;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ unsigned int __pyx_t_6;
+ Py_ssize_t __pyx_t_7;
+ Py_ssize_t __pyx_t_8;
+ int __pyx_t_9;
+ int __pyx_t_10;
+ int __pyx_t_11;
+ Py_ssize_t __pyx_t_12;
+ Py_UCS4 __pyx_t_13;
+ Py_ssize_t __pyx_t_14;
+ Py_ssize_t __pyx_t_15;
+ PyObject *__pyx_t_16 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("_wrap_with_placeholders", 0);
+ __Pyx_INCREF(__pyx_v_text);
+
+ /* "IndicTransToolkit/processor.pyx":256
+ * Store the placeholder map in the queue for retrieval in postprocessing.
+ * """
+ * cdef int serial_no = 1 # <<<<<<<<<<<<<<
+ * cdef dict placeholder_entity_map = {}
+ * cdef list patterns = [
+ */
+ __pyx_v_serial_no = 1;
+
+ /* "IndicTransToolkit/processor.pyx":257
+ * """
+ * cdef int serial_no = 1
+ * cdef dict placeholder_entity_map = {} # <<<<<<<<<<<<<<
+ * cdef list patterns = [
+ * self._EMAIL_PATTERN,
+ */
+ __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 257, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_v_placeholder_entity_map = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":258
+ * cdef int serial_no = 1
+ * cdef dict placeholder_entity_map = {}
+ * cdef list patterns = [ # <<<<<<<<<<<<<<
+ * self._EMAIL_PATTERN,
+ * self._URL_PATTERN,
+ */
+ __pyx_t_1 = PyList_New(4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 258, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_INCREF(__pyx_v_self->_EMAIL_PATTERN);
+ __Pyx_GIVEREF(__pyx_v_self->_EMAIL_PATTERN);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->_EMAIL_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_v_self->_URL_PATTERN);
+ __Pyx_GIVEREF(__pyx_v_self->_URL_PATTERN);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 1, __pyx_v_self->_URL_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_v_self->_NUMERAL_PATTERN);
+ __Pyx_GIVEREF(__pyx_v_self->_NUMERAL_PATTERN);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 2, __pyx_v_self->_NUMERAL_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_v_self->_OTHER_PATTERN);
+ __Pyx_GIVEREF(__pyx_v_self->_OTHER_PATTERN);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 3, __pyx_v_self->_OTHER_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error);
+ __pyx_v_patterns = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":270
+ * cdef int i
+ *
+ * for pattern in patterns: # <<<<<<<<<<<<<<
+ * matches = set(pattern.findall(text))
+ * for match in matches:
+ */
+ __pyx_t_1 = __pyx_v_patterns; __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_2 = 0;
+ for (;;) {
+ {
+ Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1);
+ #if !CYTHON_ASSUME_SAFE_MACROS
+ if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 270, __pyx_L1_error)
+ #endif
+ if (__pyx_t_2 >= __pyx_temp) break;
+ }
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely((0 < 0))) __PYX_ERR(0, 270, __pyx_L1_error)
+ #else
+ __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 270, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ #endif
+ __Pyx_XDECREF_SET(__pyx_v_pattern, __pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":271
+ *
+ * for pattern in patterns:
+ * matches = set(pattern.findall(text)) # <<<<<<<<<<<<<<
+ * for match in matches:
+ * # Additional checks
+ */
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_pattern, __pyx_n_s_findall); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_4))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_4, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_v_text};
+ __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 271, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ }
+ __pyx_t_4 = PySet_New(__pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_XDECREF_SET(__pyx_v_matches, ((PyObject*)__pyx_t_4));
+ __pyx_t_4 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":272
+ * for pattern in patterns:
+ * matches = set(pattern.findall(text))
+ * for match in matches: # <<<<<<<<<<<<<<
+ * # Additional checks
+ * if pattern is self._URL_PATTERN:
+ */
+ __pyx_t_7 = 0;
+ __pyx_t_3 = __Pyx_set_iterator(__pyx_v_matches, 1, (&__pyx_t_8), (&__pyx_t_9)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 272, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __pyx_t_4 = __pyx_t_3;
+ __pyx_t_3 = 0;
+ while (1) {
+ __pyx_t_10 = __Pyx_set_iter_next(__pyx_t_4, __pyx_t_8, &__pyx_t_7, &__pyx_t_3, __pyx_t_9);
+ if (unlikely(__pyx_t_10 == 0)) break;
+ if (unlikely(__pyx_t_10 == -1)) __PYX_ERR(0, 272, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 272, __pyx_L1_error)
+ __Pyx_XDECREF_SET(__pyx_v_match, ((PyObject*)__pyx_t_3));
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":274
+ * for match in matches:
+ * # Additional checks
+ * if pattern is self._URL_PATTERN: # <<<<<<<<<<<<<<
+ * if len(match.replace(".", "")) < 4:
+ * continue
+ */
+ __pyx_t_11 = (__pyx_v_pattern == __pyx_v_self->_URL_PATTERN);
+ if (__pyx_t_11) {
+
+ /* "IndicTransToolkit/processor.pyx":275
+ * # Additional checks
+ * if pattern is self._URL_PATTERN:
+ * if len(match.replace(".", "")) < 4: # <<<<<<<<<<<<<<
+ * continue
+ * if pattern is self._NUMERAL_PATTERN:
+ */
+ if (unlikely(__pyx_v_match == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace");
+ __PYX_ERR(0, 275, __pyx_L1_error)
+ }
+ __pyx_t_3 = PyUnicode_Replace(__pyx_v_match, __pyx_kp_u__140, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 275, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_12 = __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); if (unlikely(__pyx_t_12 == ((Py_ssize_t)-1))) __PYX_ERR(0, 275, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_11 = (__pyx_t_12 < 4);
+ if (__pyx_t_11) {
+
+ /* "IndicTransToolkit/processor.pyx":276
+ * if pattern is self._URL_PATTERN:
+ * if len(match.replace(".", "")) < 4:
+ * continue # <<<<<<<<<<<<<<
+ * if pattern is self._NUMERAL_PATTERN:
+ * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4:
+ */
+ goto __pyx_L5_continue;
+
+ /* "IndicTransToolkit/processor.pyx":275
+ * # Additional checks
+ * if pattern is self._URL_PATTERN:
+ * if len(match.replace(".", "")) < 4: # <<<<<<<<<<<<<<
+ * continue
+ * if pattern is self._NUMERAL_PATTERN:
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":274
+ * for match in matches:
+ * # Additional checks
+ * if pattern is self._URL_PATTERN: # <<<<<<<<<<<<<<
+ * if len(match.replace(".", "")) < 4:
+ * continue
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":277
+ * if len(match.replace(".", "")) < 4:
+ * continue
+ * if pattern is self._NUMERAL_PATTERN: # <<<<<<<<<<<<<<
+ * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4:
+ * continue
+ */
+ __pyx_t_11 = (__pyx_v_pattern == __pyx_v_self->_NUMERAL_PATTERN);
+ if (__pyx_t_11) {
+
+ /* "IndicTransToolkit/processor.pyx":278
+ * continue
+ * if pattern is self._NUMERAL_PATTERN:
+ * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: # <<<<<<<<<<<<<<
+ * continue
+ *
+ */
+ if (unlikely(__pyx_v_match == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace");
+ __PYX_ERR(0, 278, __pyx_L1_error)
+ }
+ __pyx_t_3 = PyUnicode_Replace(__pyx_v_match, __pyx_kp_u__139, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 278, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_5 = PyUnicode_Replace(((PyObject*)__pyx_t_3), __pyx_kp_u__140, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 278, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyUnicode_Replace(((PyObject*)__pyx_t_5), __pyx_kp_u__106, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 278, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_12 = __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); if (unlikely(__pyx_t_12 == ((Py_ssize_t)-1))) __PYX_ERR(0, 278, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_11 = (__pyx_t_12 < 4);
+ if (__pyx_t_11) {
+
+ /* "IndicTransToolkit/processor.pyx":279
+ * if pattern is self._NUMERAL_PATTERN:
+ * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4:
+ * continue # <<<<<<<<<<<<<<
+ *
+ * base_placeholder = f""
+ */
+ goto __pyx_L5_continue;
+
+ /* "IndicTransToolkit/processor.pyx":278
+ * continue
+ * if pattern is self._NUMERAL_PATTERN:
+ * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: # <<<<<<<<<<<<<<
+ * continue
+ *
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":277
+ * if len(match.replace(".", "")) < 4:
+ * continue
+ * if pattern is self._NUMERAL_PATTERN: # <<<<<<<<<<<<<<
+ * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4:
+ * continue
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":281
+ * continue
+ *
+ * base_placeholder = f"" # <<<<<<<<<<<<<<
+ * # Map various placeholder formats to the matched text
+ * placeholder_entity_map[f""] = match
+ */
+ __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 281, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_12 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u_ID);
+ __pyx_t_12 += 3;
+ __Pyx_GIVEREF(__pyx_kp_u_ID);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u_ID);
+ __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 281, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__141);
+ __pyx_t_12 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__141);
+ PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__141);
+ __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 281, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_XDECREF_SET(__pyx_v_base_placeholder, ((PyObject*)__pyx_t_5));
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":283
+ * base_placeholder = f""
+ * # Map various placeholder formats to the matched text
+ * placeholder_entity_map[f""] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"< ID{serial_no} >"] = match
+ * placeholder_entity_map[f"[ID{serial_no}]"] = match
+ */
+ __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 283, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_12 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u_ID);
+ __pyx_t_12 += 3;
+ __Pyx_GIVEREF(__pyx_kp_u_ID);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u_ID);
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 283, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__141);
+ __pyx_t_12 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__141);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__141);
+ __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 283, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 283, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":284
+ * # Map various placeholder formats to the matched text
+ * placeholder_entity_map[f""] = match
+ * placeholder_entity_map[f"< ID{serial_no} >"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"[ID{serial_no}]"] = match
+ * placeholder_entity_map[f"[ ID{serial_no} ]"] = match
+ */
+ __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 284, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_12 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u_ID_2);
+ __pyx_t_12 += 4;
+ __Pyx_GIVEREF(__pyx_kp_u_ID_2);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u_ID_2);
+ __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 284, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__142);
+ __pyx_t_12 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__142);
+ PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__142);
+ __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 284, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 284, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":285
+ * placeholder_entity_map[f""] = match
+ * placeholder_entity_map[f"< ID{serial_no} >"] = match
+ * placeholder_entity_map[f"[ID{serial_no}]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"[ ID{serial_no} ]"] = match
+ * placeholder_entity_map[f"[ID {serial_no}]"] = match
+ */
+ __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 285, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_12 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u_ID_3);
+ __pyx_t_12 += 3;
+ __Pyx_GIVEREF(__pyx_kp_u_ID_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u_ID_3);
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 285, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__143);
+ __pyx_t_12 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__143);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__143);
+ __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 285, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 285, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":286
+ * placeholder_entity_map[f"< ID{serial_no} >"] = match
+ * placeholder_entity_map[f"[ID{serial_no}]"] = match
+ * placeholder_entity_map[f"[ ID{serial_no} ]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"[ID {serial_no}]"] = match
+ * placeholder_entity_map[f""] = match
+ */
+ __pyx_t_5 = __pyx_v_self->_INDIC_FAILURE_CASES;
+ __Pyx_INCREF(__pyx_t_5);
+ if (unlikely(__pyx_t_5 == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
+ __PYX_ERR(0, 293, __pyx_L1_error)
+ }
+ __pyx_t_12 = __Pyx_PyList_GET_SIZE(__pyx_t_5); if (unlikely(__pyx_t_12 == ((Py_ssize_t)-1))) __PYX_ERR(0, 293, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_14 = __pyx_t_12;
+ for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_14; __pyx_t_10+=1) {
+ __pyx_v_i = __pyx_t_10;
+
+ /* "IndicTransToolkit/processor.pyx":294
+ * # Handle Indic failure cases
+ * for i in range(len(self._INDIC_FAILURE_CASES)):
+ * indic_case = self._INDIC_FAILURE_CASES[i] # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match
+ * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match
+ */
+ if (unlikely(__pyx_v_self->_INDIC_FAILURE_CASES == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+ __PYX_ERR(0, 294, __pyx_L1_error)
+ }
+ __pyx_t_5 = PyList_GET_ITEM(__pyx_v_self->_INDIC_FAILURE_CASES, __pyx_v_i);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_XDECREF_SET(__pyx_v_indic_case, __pyx_t_5);
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":295
+ * for i in range(len(self._INDIC_FAILURE_CASES)):
+ * indic_case = self._INDIC_FAILURE_CASES[i]
+ * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match
+ * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match
+ */
+ __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 295, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__145);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__145);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__145);
+ __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__141);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__141);
+ PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_kp_u__141);
+ __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 295, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":296
+ * indic_case = self._INDIC_FAILURE_CASES[i]
+ * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match
+ * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match
+ * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match
+ */
+ __pyx_t_3 = PyTuple_New(4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 296, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__146);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__146);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__146);
+ __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__142);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__142);
+ PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_kp_u__142);
+ __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 296, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":297
+ * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match
+ * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match
+ * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match
+ * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match
+ */
+ __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 297, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__146);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__146);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__146);
+ __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139);
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__142);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__142);
+ PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__142);
+ __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 297, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":298
+ * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match
+ * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match
+ * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match
+ * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match
+ */
+ __pyx_t_3 = PyTuple_New(5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 298, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__145);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__145);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__145);
+ __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__139);
+ __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__143);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__143);
+ PyTuple_SET_ITEM(__pyx_t_3, 4, __pyx_kp_u__143);
+ __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 298, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":299
+ * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match
+ * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match
+ * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match
+ * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match
+ */
+ __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 299, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__146);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__146);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__146);
+ __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139);
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__144);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__144);
+ PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__144);
+ __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 299, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":300
+ * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match
+ * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match
+ * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match
+ * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match
+ */
+ __pyx_t_3 = PyTuple_New(4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__147);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__147);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__147);
+ __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__143);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__143);
+ PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_kp_u__143);
+ __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 300, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":301
+ * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match
+ * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match
+ * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match
+ * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match
+ */
+ __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 301, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__147);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__147);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__147);
+ __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139);
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__143);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__143);
+ PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__143);
+ __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 301, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":302
+ * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match
+ * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match
+ * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match
+ * placeholder_entity_map[f"{indic_case} {serial_no}"] = match
+ */
+ __pyx_t_3 = PyTuple_New(4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 302, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__148);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__148);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__148);
+ __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 302, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 302, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__144);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__144);
+ PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_kp_u__144);
+ __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 302, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 302, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":303
+ * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match
+ * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match
+ * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"{indic_case} {serial_no}"] = match
+ * placeholder_entity_map[f"{indic_case}{serial_no}"] = match
+ */
+ __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 303, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __Pyx_INCREF(__pyx_kp_u__148);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__148);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__148);
+ __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 303, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139);
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 303, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3);
+ __pyx_t_3 = 0;
+ __Pyx_INCREF(__pyx_kp_u__144);
+ __pyx_t_15 += 2;
+ __Pyx_GIVEREF(__pyx_kp_u__144);
+ PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__144);
+ __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 303, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 303, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":304
+ * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match
+ * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match
+ * placeholder_entity_map[f"{indic_case} {serial_no}"] = match # <<<<<<<<<<<<<<
+ * placeholder_entity_map[f"{indic_case}{serial_no}"] = match
+ *
+ */
+ __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 304, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_15 = 0;
+ __pyx_t_13 = 127;
+ __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 304, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13;
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_15 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_kp_u__139);
+ __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 304, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5);
+ __pyx_t_5 = 0;
+ __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 304, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 304, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":305
+ * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match
+ * placeholder_entity_map[f"{indic_case} {serial_no}"] = match
+ * placeholder_entity_map[f"{indic_case}{serial_no}"] = match # <<<<<<<<<<<<<<
+ *
+ * # Replace the match with the base placeholder
+ */
+ __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 305, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 305, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_16 = __Pyx_PyUnicode_ConcatInPlace(__pyx_t_5, __pyx_t_3); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 305, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_16, __pyx_v_match) < 0))) __PYX_ERR(0, 305, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":308
+ *
+ * # Replace the match with the base placeholder
+ * text = text.replace(match, base_placeholder) # <<<<<<<<<<<<<<
+ * serial_no += 1
+ *
+ */
+ if (unlikely(__pyx_v_text == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace");
+ __PYX_ERR(0, 308, __pyx_L1_error)
+ }
+ __pyx_t_16 = PyUnicode_Replace(__pyx_v_text, __pyx_v_match, __pyx_v_base_placeholder, -1L); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 308, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_16));
+ __pyx_t_16 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":309
+ * # Replace the match with the base placeholder
+ * text = text.replace(match, base_placeholder)
+ * serial_no += 1 # <<<<<<<<<<<<<<
+ *
+ * # Clean up any remaining placeholder artifacts
+ */
+ __pyx_v_serial_no = (__pyx_v_serial_no + 1);
+ __pyx_L5_continue:;
+ }
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":270
+ * cdef int i
+ *
+ * for pattern in patterns: # <<<<<<<<<<<<<<
+ * matches = set(pattern.findall(text))
+ * for match in matches:
+ */
+ }
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":312
+ *
+ * # Clean up any remaining placeholder artifacts
+ * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]") # <<<<<<<<<<<<<<
+ * self._placeholder_entity_maps.put(placeholder_entity_map)
+ * return text
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_re); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_sub); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_16))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_16);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_16);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_16, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[4] = {__pyx_t_4, __pyx_kp_u_s_3, __pyx_kp_u__139, __pyx_v_text};
+ __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_16, __pyx_callargs+1-__pyx_t_6, 3+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0;
+ }
+ __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_replace); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_16, __pyx_tuple__150, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0;
+ __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_replace); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_16, __pyx_tuple__152, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0;
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_1));
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":313
+ * # Clean up any remaining placeholder artifacts
+ * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]")
+ * self._placeholder_entity_maps.put(placeholder_entity_map) # <<<<<<<<<<<<<<
+ * return text
+ *
+ */
+ __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_placeholder_entity_maps, __pyx_n_s_put); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 313, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_16);
+ __pyx_t_4 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_16))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_16);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_16);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_16, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_4, __pyx_v_placeholder_entity_map};
+ __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_16, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 313, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":314
+ * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]")
+ * self._placeholder_entity_maps.put(placeholder_entity_map)
+ * return text # <<<<<<<<<<<<<<
+ *
+ * # Internal Method: Normalize Text
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_text);
+ __pyx_r = __pyx_v_text;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":251
+ *
+ * # Internal Method: Wrap Text with Placeholders
+ * cdef str _wrap_with_placeholders(self, str text) except *: # <<<<<<<<<<<<<<
+ * """
+ * Wrap substrings with matched patterns in the text with placeholders.
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_16);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._wrap_with_placeholders", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_placeholder_entity_map);
+ __Pyx_XDECREF(__pyx_v_patterns);
+ __Pyx_XDECREF(__pyx_v_pattern);
+ __Pyx_XDECREF(__pyx_v_matches);
+ __Pyx_XDECREF(__pyx_v_match);
+ __Pyx_XDECREF(__pyx_v_base_placeholder);
+ __Pyx_XDECREF(__pyx_v_indic_case);
+ __Pyx_XDECREF(__pyx_v_text);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":317
+ *
+ * # Internal Method: Normalize Text
+ * cdef str _normalize(self, str text) except *: # <<<<<<<<<<<<<<
+ * """
+ * Normalizes numerals and optionally wraps placeholders.
+ */
+
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__normalize(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("_normalize", 0);
+ __Pyx_INCREF(__pyx_v_text);
+
+ /* "IndicTransToolkit/processor.pyx":322
+ * """
+ * # Single-pass digit translation
+ * text = text.translate(self._digits_translation_table) # <<<<<<<<<<<<<<
+ *
+ * if self.inference:
+ */
+ __pyx_t_1 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyUnicode_Type_translate, __pyx_v_text, __pyx_v_self->_digits_translation_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 322, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_1));
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":324
+ * text = text.translate(self._digits_translation_table)
+ *
+ * if self.inference: # <<<<<<<<<<<<<<
+ * text = self._wrap_with_placeholders(text)
+ * return text
+ */
+ if (__pyx_v_self->inference) {
+
+ /* "IndicTransToolkit/processor.pyx":325
+ *
+ * if self.inference:
+ * text = self._wrap_with_placeholders(text) # <<<<<<<<<<<<<<
+ * return text
+ *
+ */
+ __pyx_t_1 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_wrap_with_placeholders(__pyx_v_self, __pyx_v_text); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 325, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_1));
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":324
+ * text = text.translate(self._digits_translation_table)
+ *
+ * if self.inference: # <<<<<<<<<<<<<<
+ * text = self._wrap_with_placeholders(text)
+ * return text
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":326
+ * if self.inference:
+ * text = self._wrap_with_placeholders(text)
+ * return text # <<<<<<<<<<<<<<
+ *
+ * # Internal Method: Indic Tokenize and Transliterate
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_text);
+ __pyx_r = __pyx_v_text;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":317
+ *
+ * # Internal Method: Normalize Text
+ * cdef str _normalize(self, str text) except *: # <<<<<<<<<<<<<<
+ * """
+ * Normalizes numerals and optionally wraps placeholders.
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._normalize", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_text);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":329
+ *
+ * # Internal Method: Indic Tokenize and Transliterate
+ * cdef str _do_indic_tokenize_and_transliterate( # <<<<<<<<<<<<<<
+ * self,
+ * str sentence,
+ */
+
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__do_indic_tokenize_and_transliterate(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_normalizer, PyObject *__pyx_v_iso_lang, int __pyx_v_transliterate) {
+ PyObject *__pyx_v_normed = 0;
+ PyObject *__pyx_v_tokens = 0;
+ PyObject *__pyx_v_joined = 0;
+ PyObject *__pyx_v_xlated = 0;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ unsigned int __pyx_t_5;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("_do_indic_tokenize_and_transliterate", 1);
+
+ /* "IndicTransToolkit/processor.pyx":344
+ * cdef str xlated
+ *
+ * normed = normalizer.normalize(sentence.strip()) # <<<<<<<<<<<<<<
+ * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang)
+ * joined = " ".join(tokens)
+ */
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_normalizer, __pyx_n_s_normalize); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 344, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_sentence); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 344, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = NULL;
+ __pyx_t_5 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_2))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_2, function);
+ __pyx_t_5 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_4, __pyx_t_3};
+ __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_2, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 344, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 344, __pyx_L1_error)
+ __pyx_v_normed = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":345
+ *
+ * normed = normalizer.normalize(sentence.strip())
+ * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang) # <<<<<<<<<<<<<<
+ * joined = " ".join(tokens)
+ * xlated = joined
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_indic_tokenize); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 345, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_trivial_tokenize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 345, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = NULL;
+ __pyx_t_5 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_2)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_2);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ __pyx_t_5 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_2, __pyx_v_normed, __pyx_v_iso_lang};
+ __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 2+__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+ if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 345, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ }
+ if (!(likely(PyList_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_1))) __PYX_ERR(0, 345, __pyx_L1_error)
+ __pyx_v_tokens = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":346
+ * normed = normalizer.normalize(sentence.strip())
+ * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang)
+ * joined = " ".join(tokens) # <<<<<<<<<<<<<<
+ * xlated = joined
+ * if transliterate:
+ */
+ __pyx_t_1 = PyUnicode_Join(__pyx_kp_u__139, __pyx_v_tokens); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 346, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_v_joined = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":347
+ * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang)
+ * joined = " ".join(tokens)
+ * xlated = joined # <<<<<<<<<<<<<<
+ * if transliterate:
+ * xlated = self._xliterator.transliterate(joined, iso_lang, "hi")
+ */
+ __Pyx_INCREF(__pyx_v_joined);
+ __pyx_v_xlated = __pyx_v_joined;
+
+ /* "IndicTransToolkit/processor.pyx":348
+ * joined = " ".join(tokens)
+ * xlated = joined
+ * if transliterate: # <<<<<<<<<<<<<<
+ * xlated = self._xliterator.transliterate(joined, iso_lang, "hi")
+ * xlated = xlated.replace(" ", "")
+ */
+ if (__pyx_v_transliterate) {
+
+ /* "IndicTransToolkit/processor.pyx":349
+ * xlated = joined
+ * if transliterate:
+ * xlated = self._xliterator.transliterate(joined, iso_lang, "hi") # <<<<<<<<<<<<<<
+ * xlated = xlated.replace(" ", "")
+ * return xlated
+ */
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_xliterator, __pyx_n_s_transliterate); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 349, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_2 = NULL;
+ __pyx_t_5 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_2)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_2);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ __pyx_t_5 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[4] = {__pyx_t_2, __pyx_v_joined, __pyx_v_iso_lang, __pyx_n_u_hi};
+ __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 3+__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+ if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 349, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 349, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_xlated, ((PyObject*)__pyx_t_1));
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":350
+ * if transliterate:
+ * xlated = self._xliterator.transliterate(joined, iso_lang, "hi")
+ * xlated = xlated.replace(" ", "") # <<<<<<<<<<<<<<
+ * return xlated
+ *
+ */
+ if (unlikely(__pyx_v_xlated == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace");
+ __PYX_ERR(0, 350, __pyx_L1_error)
+ }
+ __pyx_t_1 = PyUnicode_Replace(__pyx_v_xlated, __pyx_kp_u__153, __pyx_kp_u__154, -1L); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 350, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF_SET(__pyx_v_xlated, ((PyObject*)__pyx_t_1));
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":348
+ * joined = " ".join(tokens)
+ * xlated = joined
+ * if transliterate: # <<<<<<<<<<<<<<
+ * xlated = self._xliterator.transliterate(joined, iso_lang, "hi")
+ * xlated = xlated.replace(" ", "")
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":351
+ * xlated = self._xliterator.transliterate(joined, iso_lang, "hi")
+ * xlated = xlated.replace(" ", "")
+ * return xlated # <<<<<<<<<<<<<<
+ *
+ * # Internal Method: Preprocess a Single Sentence
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_xlated);
+ __pyx_r = __pyx_v_xlated;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":329
+ *
+ * # Internal Method: Indic Tokenize and Transliterate
+ * cdef str _do_indic_tokenize_and_transliterate( # <<<<<<<<<<<<<<
+ * self,
+ * str sentence,
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._do_indic_tokenize_and_transliterate", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_normed);
+ __Pyx_XDECREF(__pyx_v_tokens);
+ __Pyx_XDECREF(__pyx_v_joined);
+ __Pyx_XDECREF(__pyx_v_xlated);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":354
+ *
+ * # Internal Method: Preprocess a Single Sentence
+ * cdef str _preprocess( # <<<<<<<<<<<<<<
+ * self,
+ * str sent,
+ */
+
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__preprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_src_lang, PyObject *__pyx_v_tgt_lang, PyObject *__pyx_v_normalizer, int __pyx_v_is_target) {
+ PyObject *__pyx_v_iso_lang = 0;
+ PyObject *__pyx_v_script_part = 0;
+ int __pyx_v_do_transliterate;
+ PyObject *__pyx_v_e_strip = 0;
+ PyObject *__pyx_v_e_norm = 0;
+ PyObject *__pyx_v_e_tokens = 0;
+ PyObject *__pyx_v_processed_sent = 0;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_t_4;
+ int __pyx_t_5;
+ PyObject *__pyx_t_6 = NULL;
+ unsigned int __pyx_t_7;
+ PyObject *__pyx_t_8 = NULL;
+ Py_ssize_t __pyx_t_9;
+ Py_UCS4 __pyx_t_10;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("_preprocess", 0);
+ __Pyx_INCREF(__pyx_v_sent);
+
+ /* "IndicTransToolkit/processor.pyx":366
+ * tokenization, transliteration, and adding language tags if necessary.
+ * """
+ * cdef str iso_lang = self._flores_codes.get(src_lang, "hi") # <<<<<<<<<<<<<<
+ * cdef str script_part = src_lang.split("_")[1]
+ * cdef bint do_transliterate = True
+ */
+ if (unlikely(__pyx_v_self->_flores_codes == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "get");
+ __PYX_ERR(0, 366, __pyx_L1_error)
+ }
+ __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_self->_flores_codes, __pyx_v_src_lang, __pyx_n_u_hi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 366, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 366, __pyx_L1_error)
+ __pyx_v_iso_lang = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":367
+ * """
+ * cdef str iso_lang = self._flores_codes.get(src_lang, "hi")
+ * cdef str script_part = src_lang.split("_")[1] # <<<<<<<<<<<<<<
+ * cdef bint do_transliterate = True
+ * cdef str e_strip
+ */
+ if (unlikely(__pyx_v_src_lang == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "split");
+ __PYX_ERR(0, 367, __pyx_L1_error)
+ }
+ __pyx_t_1 = PyUnicode_Split(__pyx_v_src_lang, __Pyx_NoneAsNull(__pyx_n_u__155), -1L); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 367, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_GetItemInt_List(__pyx_t_1, 1, long, 1, __Pyx_PyInt_From_long, 1, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 367, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 367, __pyx_L1_error)
+ __pyx_v_script_part = ((PyObject*)__pyx_t_2);
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":368
+ * cdef str iso_lang = self._flores_codes.get(src_lang, "hi")
+ * cdef str script_part = src_lang.split("_")[1]
+ * cdef bint do_transliterate = True # <<<<<<<<<<<<<<
+ * cdef str e_strip
+ * cdef str e_norm
+ */
+ __pyx_v_do_transliterate = 1;
+
+ /* "IndicTransToolkit/processor.pyx":375
+ *
+ * # 1) Punctuation normalization
+ * sent = self._punc_norm(sent) # <<<<<<<<<<<<<<
+ *
+ * # 2) Numerals & placeholders
+ */
+ __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_punc_norm(__pyx_v_self, __pyx_v_sent); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 375, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF_SET(__pyx_v_sent, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":378
+ *
+ * # 2) Numerals & placeholders
+ * sent = self._normalize(sent) # <<<<<<<<<<<<<<
+ *
+ * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]:
+ */
+ __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_normalize(__pyx_v_self, __pyx_v_sent); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 378, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF_SET(__pyx_v_sent, ((PyObject*)__pyx_t_2));
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":380
+ * sent = self._normalize(sent)
+ *
+ * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]: # <<<<<<<<<<<<<<
+ * do_transliterate = False
+ *
+ */
+ __Pyx_INCREF(__pyx_v_script_part);
+ __pyx_t_3 = __pyx_v_script_part;
+ __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Arab, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error)
+ if (!__pyx_t_5) {
+ } else {
+ __pyx_t_4 = __pyx_t_5;
+ goto __pyx_L4_bool_binop_done;
+ }
+ __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Aran, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error)
+ if (!__pyx_t_5) {
+ } else {
+ __pyx_t_4 = __pyx_t_5;
+ goto __pyx_L4_bool_binop_done;
+ }
+ __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Olck, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error)
+ if (!__pyx_t_5) {
+ } else {
+ __pyx_t_4 = __pyx_t_5;
+ goto __pyx_L4_bool_binop_done;
+ }
+ __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Mtei, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error)
+ if (!__pyx_t_5) {
+ } else {
+ __pyx_t_4 = __pyx_t_5;
+ goto __pyx_L4_bool_binop_done;
+ }
+ __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Latn, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error)
+ __pyx_t_4 = __pyx_t_5;
+ __pyx_L4_bool_binop_done:;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_5 = __pyx_t_4;
+ if (__pyx_t_5) {
+
+ /* "IndicTransToolkit/processor.pyx":381
+ *
+ * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]:
+ * do_transliterate = False # <<<<<<<<<<<<<<
+ *
+ * if iso_lang == "en":
+ */
+ __pyx_v_do_transliterate = 0;
+
+ /* "IndicTransToolkit/processor.pyx":380
+ * sent = self._normalize(sent)
+ *
+ * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]: # <<<<<<<<<<<<<<
+ * do_transliterate = False
+ *
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":383
+ * do_transliterate = False
+ *
+ * if iso_lang == "en": # <<<<<<<<<<<<<<
+ * # English path
+ * e_strip = sent.strip()
+ */
+ __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_v_iso_lang, __pyx_n_u_en, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 383, __pyx_L1_error)
+ if (__pyx_t_5) {
+
+ /* "IndicTransToolkit/processor.pyx":385
+ * if iso_lang == "en":
+ * # English path
+ * e_strip = sent.strip() # <<<<<<<<<<<<<<
+ * e_norm = self._en_normalizer.normalize(e_strip)
+ * e_tokens = self._en_tok.tokenize(e_norm, escape=False)
+ */
+ __pyx_t_2 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_sent); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 385, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 385, __pyx_L1_error)
+ __pyx_v_e_strip = ((PyObject*)__pyx_t_2);
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":386
+ * # English path
+ * e_strip = sent.strip()
+ * e_norm = self._en_normalizer.normalize(e_strip) # <<<<<<<<<<<<<<
+ * e_tokens = self._en_tok.tokenize(e_norm, escape=False)
+ * processed_sent = " ".join(e_tokens)
+ */
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_en_normalizer, __pyx_n_s_normalize); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_6 = NULL;
+ __pyx_t_7 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_1))) {
+ __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_1);
+ if (likely(__pyx_t_6)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+ __Pyx_INCREF(__pyx_t_6);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_1, function);
+ __pyx_t_7 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_v_e_strip};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_7, 1+__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 386, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 386, __pyx_L1_error)
+ __pyx_v_e_norm = ((PyObject*)__pyx_t_2);
+ __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":387
+ * e_strip = sent.strip()
+ * e_norm = self._en_normalizer.normalize(e_strip)
+ * e_tokens = self._en_tok.tokenize(e_norm, escape=False) # <<<<<<<<<<<<<<
+ * processed_sent = " ".join(e_tokens)
+ * else:
+ */
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_en_tok, __pyx_n_s_tokenize); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 387, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_INCREF(__pyx_v_e_norm);
+ __Pyx_GIVEREF(__pyx_v_e_norm);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_e_norm)) __PYX_ERR(0, 387, __pyx_L1_error);
+ __pyx_t_6 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 387, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ if (PyDict_SetItem(__pyx_t_6, __pyx_n_s_escape, Py_False) < 0) __PYX_ERR(0, 387, __pyx_L1_error)
+ __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_1, __pyx_t_6); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 387, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_8);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ if (!(likely(PyList_CheckExact(__pyx_t_8))||((__pyx_t_8) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_8))) __PYX_ERR(0, 387, __pyx_L1_error)
+ __pyx_v_e_tokens = ((PyObject*)__pyx_t_8);
+ __pyx_t_8 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":388
+ * e_norm = self._en_normalizer.normalize(e_strip)
+ * e_tokens = self._en_tok.tokenize(e_norm, escape=False)
+ * processed_sent = " ".join(e_tokens) # <<<<<<<<<<<<<<
+ * else:
+ * # Indic path
+ */
+ __pyx_t_8 = PyUnicode_Join(__pyx_kp_u__139, __pyx_v_e_tokens); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 388, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_8);
+ __pyx_v_processed_sent = ((PyObject*)__pyx_t_8);
+ __pyx_t_8 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":383
+ * do_transliterate = False
+ *
+ * if iso_lang == "en": # <<<<<<<<<<<<<<
+ * # English path
+ * e_strip = sent.strip()
+ */
+ goto __pyx_L9;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":391
+ * else:
+ * # Indic path
+ * processed_sent = self._do_indic_tokenize_and_transliterate(sent, normalizer, iso_lang, do_transliterate) # <<<<<<<<<<<<<<
+ *
+ * processed_sent = processed_sent.strip()
+ */
+ /*else*/ {
+ __pyx_t_8 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_do_indic_tokenize_and_transliterate(__pyx_v_self, __pyx_v_sent, __pyx_v_normalizer, __pyx_v_iso_lang, __pyx_v_do_transliterate); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 391, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_8);
+ __pyx_v_processed_sent = ((PyObject*)__pyx_t_8);
+ __pyx_t_8 = 0;
+ }
+ __pyx_L9:;
+
+ /* "IndicTransToolkit/processor.pyx":393
+ * processed_sent = self._do_indic_tokenize_and_transliterate(sent, normalizer, iso_lang, do_transliterate)
+ *
+ * processed_sent = processed_sent.strip() # <<<<<<<<<<<<<<
+ * if not is_target:
+ * return f"{src_lang} {tgt_lang} {processed_sent}"
+ */
+ __pyx_t_8 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_processed_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 393, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_8);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_8))||((__pyx_t_8) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_8))) __PYX_ERR(0, 393, __pyx_L1_error)
+ __Pyx_DECREF_SET(__pyx_v_processed_sent, ((PyObject*)__pyx_t_8));
+ __pyx_t_8 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":394
+ *
+ * processed_sent = processed_sent.strip()
+ * if not is_target: # <<<<<<<<<<<<<<
+ * return f"{src_lang} {tgt_lang} {processed_sent}"
+ * else:
+ */
+ __pyx_t_5 = (!__pyx_v_is_target);
+ if (__pyx_t_5) {
+
+ /* "IndicTransToolkit/processor.pyx":395
+ * processed_sent = processed_sent.strip()
+ * if not is_target:
+ * return f"{src_lang} {tgt_lang} {processed_sent}" # <<<<<<<<<<<<<<
+ * else:
+ * return processed_sent
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_8 = PyTuple_New(5); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 395, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_8);
+ __pyx_t_9 = 0;
+ __pyx_t_10 = 127;
+ __pyx_t_6 = __Pyx_PyUnicode_Unicode(__pyx_v_src_lang); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_10 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) > __pyx_t_10) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) : __pyx_t_10;
+ __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6);
+ __Pyx_GIVEREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_6);
+ __pyx_t_6 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_9 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_kp_u__139);
+ __pyx_t_6 = __Pyx_PyUnicode_Unicode(__pyx_v_tgt_lang); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_10 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) > __pyx_t_10) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) : __pyx_t_10;
+ __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6);
+ __Pyx_GIVEREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_6);
+ __pyx_t_6 = 0;
+ __Pyx_INCREF(__pyx_kp_u__139);
+ __pyx_t_9 += 1;
+ __Pyx_GIVEREF(__pyx_kp_u__139);
+ PyTuple_SET_ITEM(__pyx_t_8, 3, __pyx_kp_u__139);
+ __pyx_t_6 = __Pyx_PyUnicode_Unicode(__pyx_v_processed_sent); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_10 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) > __pyx_t_10) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) : __pyx_t_10;
+ __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6);
+ __Pyx_GIVEREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_8, 4, __pyx_t_6);
+ __pyx_t_6 = 0;
+ __pyx_t_6 = __Pyx_PyUnicode_Join(__pyx_t_8, 5, __pyx_t_9, __pyx_t_10); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+ __pyx_r = ((PyObject*)__pyx_t_6);
+ __pyx_t_6 = 0;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":394
+ *
+ * processed_sent = processed_sent.strip()
+ * if not is_target: # <<<<<<<<<<<<<<
+ * return f"{src_lang} {tgt_lang} {processed_sent}"
+ * else:
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":397
+ * return f"{src_lang} {tgt_lang} {processed_sent}"
+ * else:
+ * return processed_sent # <<<<<<<<<<<<<<
+ *
+ * # Internal Method: Postprocess a Single Sentence
+ */
+ /*else*/ {
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_processed_sent);
+ __pyx_r = __pyx_v_processed_sent;
+ goto __pyx_L0;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":354
+ *
+ * # Internal Method: Preprocess a Single Sentence
+ * cdef str _preprocess( # <<<<<<<<<<<<<<
+ * self,
+ * str sent,
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_8);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._preprocess", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_iso_lang);
+ __Pyx_XDECREF(__pyx_v_script_part);
+ __Pyx_XDECREF(__pyx_v_e_strip);
+ __Pyx_XDECREF(__pyx_v_e_norm);
+ __Pyx_XDECREF(__pyx_v_e_tokens);
+ __Pyx_XDECREF(__pyx_v_processed_sent);
+ __Pyx_XDECREF(__pyx_v_sent);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":400
+ *
+ * # Internal Method: Postprocess a Single Sentence
+ * cdef str _postprocess(self, object sent, str lang) except *: # <<<<<<<<<<<<<<
+ * """
+ * Postprocess a single sentence:
+ */
+
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__postprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_lang) {
+ PyObject *__pyx_v_placeholder_entity_map = 0;
+ PyObject *__pyx_v_lang_code = 0;
+ PyObject *__pyx_v_script_code = 0;
+ PyObject *__pyx_v_iso_lang = 0;
+ PyObject *__pyx_v_k = 0;
+ PyObject *__pyx_v_v = 0;
+ PyObject *__pyx_v_xlated = 0;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_t_1;
+ int __pyx_t_2;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ unsigned int __pyx_t_6;
+ PyObject *__pyx_t_7 = NULL;
+ Py_ssize_t __pyx_t_8;
+ Py_ssize_t __pyx_t_9;
+ int __pyx_t_10;
+ int __pyx_t_11;
+ PyObject *__pyx_t_12 = NULL;
+ PyObject *__pyx_t_13 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("_postprocess", 0);
+ __Pyx_INCREF(__pyx_v_sent);
+
+ /* "IndicTransToolkit/processor.pyx":417
+ *
+ * # Unwrap if sent is a tuple or list
+ * if isinstance(sent, (tuple, list)): # <<<<<<<<<<<<<<
+ * sent = sent[0]
+ *
+ */
+ __pyx_t_2 = PyTuple_Check(__pyx_v_sent);
+ if (!__pyx_t_2) {
+ } else {
+ __pyx_t_1 = __pyx_t_2;
+ goto __pyx_L4_bool_binop_done;
+ }
+ __pyx_t_2 = PyList_Check(__pyx_v_sent);
+ __pyx_t_1 = __pyx_t_2;
+ __pyx_L4_bool_binop_done:;
+ if (__pyx_t_1) {
+
+ /* "IndicTransToolkit/processor.pyx":418
+ * # Unwrap if sent is a tuple or list
+ * if isinstance(sent, (tuple, list)):
+ * sent = sent[0] # <<<<<<<<<<<<<<
+ *
+ * placeholder_entity_map = self._placeholder_entity_maps.get()
+ */
+ __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_sent, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 418, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":417
+ *
+ * # Unwrap if sent is a tuple or list
+ * if isinstance(sent, (tuple, list)): # <<<<<<<<<<<<<<
+ * sent = sent[0]
+ *
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":420
+ * sent = sent[0]
+ *
+ * placeholder_entity_map = self._placeholder_entity_maps.get() # <<<<<<<<<<<<<<
+ * lang_code, script_code = lang.split("_", 1)
+ * iso_lang = self._flores_codes.get(lang, "hi")
+ */
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_placeholder_entity_maps, __pyx_n_s_get); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 420, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_4))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_4, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_5, NULL};
+ __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 0+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 420, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ }
+ if (!(likely(PyDict_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("dict", __pyx_t_3))) __PYX_ERR(0, 420, __pyx_L1_error)
+ __pyx_v_placeholder_entity_map = ((PyObject*)__pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":421
+ *
+ * placeholder_entity_map = self._placeholder_entity_maps.get()
+ * lang_code, script_code = lang.split("_", 1) # <<<<<<<<<<<<<<
+ * iso_lang = self._flores_codes.get(lang, "hi")
+ *
+ */
+ if (unlikely(__pyx_v_lang == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "split");
+ __PYX_ERR(0, 421, __pyx_L1_error)
+ }
+ __pyx_t_3 = PyUnicode_Split(__pyx_v_lang, __Pyx_NoneAsNull(__pyx_n_u__155), 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 421, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (1) {
+ PyObject* sequence = __pyx_t_3;
+ Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);
+ if (unlikely(size != 2)) {
+ if (size > 2) __Pyx_RaiseTooManyValuesError(2);
+ else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
+ __PYX_ERR(0, 421, __pyx_L1_error)
+ }
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ __pyx_t_4 = PyList_GET_ITEM(sequence, 0);
+ __pyx_t_5 = PyList_GET_ITEM(sequence, 1);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_5);
+ #else
+ __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 421, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 421, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ #endif
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_4))) __PYX_ERR(0, 421, __pyx_L1_error)
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_5))||((__pyx_t_5) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_5))) __PYX_ERR(0, 421, __pyx_L1_error)
+ __pyx_v_lang_code = ((PyObject*)__pyx_t_4);
+ __pyx_t_4 = 0;
+ __pyx_v_script_code = ((PyObject*)__pyx_t_5);
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":422
+ * placeholder_entity_map = self._placeholder_entity_maps.get()
+ * lang_code, script_code = lang.split("_", 1)
+ * iso_lang = self._flores_codes.get(lang, "hi") # <<<<<<<<<<<<<<
+ *
+ * # Fix for Perso-Arabic scripts
+ */
+ if (unlikely(__pyx_v_self->_flores_codes == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "get");
+ __PYX_ERR(0, 422, __pyx_L1_error)
+ }
+ __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_self->_flores_codes, __pyx_v_lang, __pyx_n_u_hi); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 422, __pyx_L1_error)
+ __pyx_v_iso_lang = ((PyObject*)__pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":425
+ *
+ * # Fix for Perso-Arabic scripts
+ * if script_code in ["Arab", "Aran"]: # <<<<<<<<<<<<<<
+ * sent = (
+ * sent.replace(" ", "")
+ */
+ __Pyx_INCREF(__pyx_v_script_code);
+ __pyx_t_7 = __pyx_v_script_code;
+ __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_t_7, __pyx_n_u_Arab, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 425, __pyx_L1_error)
+ if (!__pyx_t_2) {
+ } else {
+ __pyx_t_1 = __pyx_t_2;
+ goto __pyx_L7_bool_binop_done;
+ }
+ __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_t_7, __pyx_n_u_Aran, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 425, __pyx_L1_error)
+ __pyx_t_1 = __pyx_t_2;
+ __pyx_L7_bool_binop_done:;
+ __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+ __pyx_t_2 = __pyx_t_1;
+ if (__pyx_t_2) {
+
+ /* "IndicTransToolkit/processor.pyx":427
+ * if script_code in ["Arab", "Aran"]:
+ * sent = (
+ * sent.replace(" ", "") # <<<<<<<<<<<<<<
+ * .replace(" ", "")
+ * .replace(" ", "")
+ */
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__158, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 427, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":428
+ * sent = (
+ * sent.replace(" ", "")
+ * .replace(" ", "") # <<<<<<<<<<<<<<
+ * .replace(" ", "")
+ * .replace("", "")
+ */
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__161, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 428, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":429
+ * sent.replace(" ", "")
+ * .replace(" ", "")
+ * .replace(" ", "") # <<<<<<<<<<<<<<
+ * .replace("", "")
+ * )
+ */
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__164, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 429, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":430
+ * .replace(" ", "")
+ * .replace(" ", "")
+ * .replace("", "") # <<<<<<<<<<<<<<
+ * )
+ *
+ */
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__167, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 430, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_5);
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":425
+ *
+ * # Fix for Perso-Arabic scripts
+ * if script_code in ["Arab", "Aran"]: # <<<<<<<<<<<<<<
+ * sent = (
+ * sent.replace(" ", "")
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":434
+ *
+ * # Oriya fix
+ * if lang_code == "ory": # <<<<<<<<<<<<<<
+ * sent = sent.replace("", "")
+ *
+ */
+ __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_lang_code, __pyx_n_u_ory, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 434, __pyx_L1_error)
+ if (__pyx_t_2) {
+
+ /* "IndicTransToolkit/processor.pyx":435
+ * # Oriya fix
+ * if lang_code == "ory":
+ * sent = sent.replace("", "") # <<<<<<<<<<<<<<
+ *
+ * # Restore placeholders
+ */
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_replace); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 435, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_tuple__170, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 435, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":434
+ *
+ * # Oriya fix
+ * if lang_code == "ory": # <<<<<<<<<<<<<<
+ * sent = sent.replace("", "")
+ *
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":438
+ *
+ * # Restore placeholders
+ * for k, v in placeholder_entity_map.items(): # <<<<<<<<<<<<<<
+ * sent = sent.replace(k, v)
+ *
+ */
+ __pyx_t_8 = 0;
+ if (unlikely(__pyx_v_placeholder_entity_map == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "items");
+ __PYX_ERR(0, 438, __pyx_L1_error)
+ }
+ __pyx_t_5 = __Pyx_dict_iterator(__pyx_v_placeholder_entity_map, 1, __pyx_n_s_items, (&__pyx_t_9), (&__pyx_t_10)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 438, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_3);
+ __pyx_t_3 = __pyx_t_5;
+ __pyx_t_5 = 0;
+ while (1) {
+ __pyx_t_11 = __Pyx_dict_iter_next(__pyx_t_3, __pyx_t_9, &__pyx_t_8, &__pyx_t_5, &__pyx_t_4, NULL, __pyx_t_10);
+ if (unlikely(__pyx_t_11 == 0)) break;
+ if (unlikely(__pyx_t_11 == -1)) __PYX_ERR(0, 438, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_GOTREF(__pyx_t_4);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_5))||((__pyx_t_5) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_5))) __PYX_ERR(0, 438, __pyx_L1_error)
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_4))) __PYX_ERR(0, 438, __pyx_L1_error)
+ __Pyx_XDECREF_SET(__pyx_v_k, ((PyObject*)__pyx_t_5));
+ __pyx_t_5 = 0;
+ __Pyx_XDECREF_SET(__pyx_v_v, ((PyObject*)__pyx_t_4));
+ __pyx_t_4 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":439
+ * # Restore placeholders
+ * for k, v in placeholder_entity_map.items():
+ * sent = sent.replace(k, v) # <<<<<<<<<<<<<<
+ *
+ * # Detokenize
+ */
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_replace); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 439, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_12 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_12)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_12);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_12, __pyx_v_k, __pyx_v_v};
+ __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0;
+ if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 439, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_4);
+ __pyx_t_4 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":442
+ *
+ * # Detokenize
+ * if lang == "eng_Latn": # <<<<<<<<<<<<<<
+ * return self._en_detok.detokenize(sent.split(" "))
+ * else:
+ */
+ __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_lang, __pyx_n_u_eng_Latn, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 442, __pyx_L1_error)
+ if (__pyx_t_2) {
+
+ /* "IndicTransToolkit/processor.pyx":443
+ * # Detokenize
+ * if lang == "eng_Latn":
+ * return self._en_detok.detokenize(sent.split(" ")) # <<<<<<<<<<<<<<
+ * else:
+ * xlated = self._xliterator.transliterate(sent, "hi", iso_lang)
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_en_detok, __pyx_n_s_detokenize); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 443, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_split); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 443, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_12);
+ __pyx_t_13 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_12))) {
+ __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_12);
+ if (likely(__pyx_t_13)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_12);
+ __Pyx_INCREF(__pyx_t_13);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_12, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_13, __pyx_kp_u__139};
+ __pyx_t_5 = __Pyx_PyObject_FastCall(__pyx_t_12, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0;
+ if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 443, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
+ }
+ __pyx_t_12 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_4))) {
+ __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_4);
+ if (likely(__pyx_t_12)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_12);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_4, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_12, __pyx_t_5};
+ __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0;
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 443, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 443, __pyx_L1_error)
+ __pyx_r = ((PyObject*)__pyx_t_3);
+ __pyx_t_3 = 0;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":442
+ *
+ * # Detokenize
+ * if lang == "eng_Latn": # <<<<<<<<<<<<<<
+ * return self._en_detok.detokenize(sent.split(" "))
+ * else:
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":445
+ * return self._en_detok.detokenize(sent.split(" "))
+ * else:
+ * xlated = self._xliterator.transliterate(sent, "hi", iso_lang) # <<<<<<<<<<<<<<
+ * return indic_detokenize.trivial_detokenize(xlated, iso_lang)
+ *
+ */
+ /*else*/ {
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_xliterator, __pyx_n_s_transliterate); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 445, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_4))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_4, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[4] = {__pyx_t_5, __pyx_v_sent, __pyx_n_u_hi, __pyx_v_iso_lang};
+ __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 3+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 445, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 445, __pyx_L1_error)
+ __pyx_v_xlated = ((PyObject*)__pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":446
+ * else:
+ * xlated = self._xliterator.transliterate(sent, "hi", iso_lang)
+ * return indic_detokenize.trivial_detokenize(xlated, iso_lang) # <<<<<<<<<<<<<<
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_indic_detokenize); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 446, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_trivial_detokenize); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 446, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[3] = {__pyx_t_4, __pyx_v_xlated, __pyx_v_iso_lang};
+ __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 446, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 446, __pyx_L1_error)
+ __pyx_r = ((PyObject*)__pyx_t_3);
+ __pyx_t_3 = 0;
+ goto __pyx_L0;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":400
+ *
+ * # Internal Method: Postprocess a Single Sentence
+ * cdef str _postprocess(self, object sent, str lang) except *: # <<<<<<<<<<<<<<
+ * """
+ * Postprocess a single sentence:
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_12);
+ __Pyx_XDECREF(__pyx_t_13);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._postprocess", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_placeholder_entity_map);
+ __Pyx_XDECREF(__pyx_v_lang_code);
+ __Pyx_XDECREF(__pyx_v_script_code);
+ __Pyx_XDECREF(__pyx_v_iso_lang);
+ __Pyx_XDECREF(__pyx_v_k);
+ __Pyx_XDECREF(__pyx_v_v);
+ __Pyx_XDECREF(__pyx_v_xlated);
+ __Pyx_XDECREF(__pyx_v_sent);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":449
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ * cpdef list preprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] batch,
+ */
+
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_batch, PyObject *__pyx_v_src_lang, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args) {
+
+ /* "IndicTransToolkit/processor.pyx":453
+ * List[str] batch,
+ * str src_lang,
+ * str tgt_lang=None, # <<<<<<<<<<<<<<
+ * bint is_target=False,
+ * bint visualize=False
+ */
+ PyObject *__pyx_v_tgt_lang = ((PyObject*)Py_None);
+
+ /* "IndicTransToolkit/processor.pyx":454
+ * str src_lang,
+ * str tgt_lang=None,
+ * bint is_target=False, # <<<<<<<<<<<<<<
+ * bint visualize=False
+ * ):
+ */
+ int __pyx_v_is_target = ((int)0);
+
+ /* "IndicTransToolkit/processor.pyx":455
+ * str tgt_lang=None,
+ * bint is_target=False,
+ * bint visualize=False # <<<<<<<<<<<<<<
+ * ):
+ * """
+ */
+ int __pyx_v_visualize = ((int)0);
+ PyObject *__pyx_v_normalizer = 0;
+ PyObject *__pyx_v_iso_code = 0;
+ PyObject *__pyx_v_iterator = 0;
+ int __pyx_v_n;
+ PyObject *__pyx_7genexpr__pyx_v_s = NULL;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ unsigned int __pyx_t_7;
+ Py_ssize_t __pyx_t_8;
+ int __pyx_t_9;
+ PyObject *(*__pyx_t_10)(PyObject *);
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("preprocess_batch", 1);
+ if (__pyx_optional_args) {
+ if (__pyx_optional_args->__pyx_n > 0) {
+ __pyx_v_tgt_lang = __pyx_optional_args->tgt_lang;
+ if (__pyx_optional_args->__pyx_n > 1) {
+ __pyx_v_is_target = __pyx_optional_args->is_target;
+ if (__pyx_optional_args->__pyx_n > 2) {
+ __pyx_v_visualize = __pyx_optional_args->visualize;
+ }
+ }
+ }
+ }
+
+ /* "IndicTransToolkit/processor.pyx":449
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ * cpdef list preprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] batch,
+ */
+ /* Check if called by wrapper */
+ if (unlikely(__pyx_skip_dispatch)) ;
+ /* Check if overridden in Python */
+ else if (unlikely((Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0) || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))) {
+ #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
+ static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
+ if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
+ PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
+ #endif
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_preprocess_batch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (!__Pyx_IsSameCFunction(__pyx_t_1, (void*) __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch)) {
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_3 = __Pyx_PyBool_FromLong(__pyx_v_is_target); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = __Pyx_PyBool_FromLong(__pyx_v_visualize); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_5 = __pyx_t_1; __pyx_t_6 = NULL;
+ __pyx_t_7 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_6)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_6);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_7 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[6] = {__pyx_t_6, __pyx_v_batch, __pyx_v_src_lang, __pyx_v_tgt_lang, __pyx_t_3, __pyx_t_4};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 5+__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ if (!(likely(PyList_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_2))) __PYX_ERR(0, 449, __pyx_L1_error)
+ __pyx_r = ((PyObject*)__pyx_t_2);
+ __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ goto __pyx_L0;
+ }
+ #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
+ __pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
+ __pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
+ if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
+ __pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
+ }
+ #endif
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
+ }
+ #endif
+ }
+
+ /* "IndicTransToolkit/processor.pyx":461
+ * This is exposed for external use.
+ * """
+ * cdef object normalizer = None # <<<<<<<<<<<<<<
+ * cdef str iso_code = self._flores_codes.get(src_lang, "hi")
+ * cdef object iterator
+ */
+ __Pyx_INCREF(Py_None);
+ __pyx_v_normalizer = Py_None;
+
+ /* "IndicTransToolkit/processor.pyx":462
+ * """
+ * cdef object normalizer = None
+ * cdef str iso_code = self._flores_codes.get(src_lang, "hi") # <<<<<<<<<<<<<<
+ * cdef object iterator
+ * cdef list results
+ */
+ if (unlikely(__pyx_v_self->_flores_codes == Py_None)) {
+ PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "get");
+ __PYX_ERR(0, 462, __pyx_L1_error)
+ }
+ __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_self->_flores_codes, __pyx_v_src_lang, __pyx_n_u_hi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 462, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 462, __pyx_L1_error)
+ __pyx_v_iso_code = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":466
+ * cdef list results
+ * cdef int i
+ * cdef int n = len(batch) # <<<<<<<<<<<<<<
+ *
+ * if src_lang != "eng_Latn":
+ */
+ if (unlikely(__pyx_v_batch == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
+ __PYX_ERR(0, 466, __pyx_L1_error)
+ }
+ __pyx_t_8 = __Pyx_PyList_GET_SIZE(__pyx_v_batch); if (unlikely(__pyx_t_8 == ((Py_ssize_t)-1))) __PYX_ERR(0, 466, __pyx_L1_error)
+ __pyx_v_n = __pyx_t_8;
+
+ /* "IndicTransToolkit/processor.pyx":468
+ * cdef int n = len(batch)
+ *
+ * if src_lang != "eng_Latn": # <<<<<<<<<<<<<<
+ * normalizer = IndicNormalizerFactory().get_normalizer(iso_code)
+ *
+ */
+ __pyx_t_9 = (__Pyx_PyUnicode_Equals(__pyx_v_src_lang, __pyx_n_u_eng_Latn, Py_NE)); if (unlikely((__pyx_t_9 < 0))) __PYX_ERR(0, 468, __pyx_L1_error)
+ if (__pyx_t_9) {
+
+ /* "IndicTransToolkit/processor.pyx":469
+ *
+ * if src_lang != "eng_Latn":
+ * normalizer = IndicNormalizerFactory().get_normalizer(iso_code) # <<<<<<<<<<<<<<
+ *
+ * if visualize:
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_IndicNormalizerFactory); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 469, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_4 = NULL;
+ __pyx_t_7 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_7 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_4, NULL};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 0+__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 469, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_get_normalizer); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 469, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = NULL;
+ __pyx_t_7 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_5))) {
+ __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_5);
+ if (likely(__pyx_t_2)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_2);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_5, function);
+ __pyx_t_7 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_2, __pyx_v_iso_code};
+ __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 1+__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+ if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 469, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_DECREF_SET(__pyx_v_normalizer, __pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":468
+ * cdef int n = len(batch)
+ *
+ * if src_lang != "eng_Latn": # <<<<<<<<<<<<<<
+ * normalizer = IndicNormalizerFactory().get_normalizer(iso_code)
+ *
+ */
+ }
+
+ /* "IndicTransToolkit/processor.pyx":471
+ * normalizer = IndicNormalizerFactory().get_normalizer(iso_code)
+ *
+ * if visualize: # <<<<<<<<<<<<<<
+ * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line")
+ * else:
+ */
+ if (__pyx_v_visualize) {
+
+ /* "IndicTransToolkit/processor.pyx":472
+ *
+ * if visualize:
+ * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line") # <<<<<<<<<<<<<<
+ * else:
+ * iterator = batch
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_tqdm); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_INCREF(__pyx_v_batch);
+ __Pyx_GIVEREF(__pyx_v_batch);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_batch)) __PYX_ERR(0, 472, __pyx_L1_error);
+ __pyx_t_2 = __Pyx_PyDict_NewPresized(3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_total, __pyx_t_4) < 0) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyUnicode_Unicode(__pyx_v_src_lang); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_3 = __Pyx_PyUnicode_Concat(__pyx_kp_u_Pre_processing, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_desc, __pyx_t_3) < 0) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_unit, __pyx_n_u_line) < 0) __PYX_ERR(0, 472, __pyx_L1_error)
+ __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_5, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 472, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_v_iterator = __pyx_t_3;
+ __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":471
+ * normalizer = IndicNormalizerFactory().get_normalizer(iso_code)
+ *
+ * if visualize: # <<<<<<<<<<<<<<
+ * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line")
+ * else:
+ */
+ goto __pyx_L4;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":474
+ * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line")
+ * else:
+ * iterator = batch # <<<<<<<<<<<<<<
+ *
+ * return [self._preprocess(s, src_lang, tgt_lang, normalizer, is_target) for s in iterator]
+ */
+ /*else*/ {
+ __Pyx_INCREF(__pyx_v_batch);
+ __pyx_v_iterator = __pyx_v_batch;
+ }
+ __pyx_L4:;
+
+ /* "IndicTransToolkit/processor.pyx":476
+ * iterator = batch
+ *
+ * return [self._preprocess(s, src_lang, tgt_lang, normalizer, is_target) for s in iterator] # <<<<<<<<<<<<<<
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ */
+ __Pyx_XDECREF(__pyx_r);
+ { /* enter inner scope */
+ __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 476, __pyx_L7_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (likely(PyList_CheckExact(__pyx_v_iterator)) || PyTuple_CheckExact(__pyx_v_iterator)) {
+ __pyx_t_2 = __pyx_v_iterator; __Pyx_INCREF(__pyx_t_2);
+ __pyx_t_8 = 0;
+ __pyx_t_10 = NULL;
+ } else {
+ __pyx_t_8 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_iterator); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 476, __pyx_L7_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_10 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 476, __pyx_L7_error)
+ }
+ for (;;) {
+ if (likely(!__pyx_t_10)) {
+ if (likely(PyList_CheckExact(__pyx_t_2))) {
+ {
+ Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2);
+ #if !CYTHON_ASSUME_SAFE_MACROS
+ if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 476, __pyx_L7_error)
+ #endif
+ if (__pyx_t_8 >= __pyx_temp) break;
+ }
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(0, 476, __pyx_L7_error)
+ #else
+ __pyx_t_5 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 476, __pyx_L7_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ #endif
+ } else {
+ {
+ Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2);
+ #if !CYTHON_ASSUME_SAFE_MACROS
+ if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 476, __pyx_L7_error)
+ #endif
+ if (__pyx_t_8 >= __pyx_temp) break;
+ }
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(0, 476, __pyx_L7_error)
+ #else
+ __pyx_t_5 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 476, __pyx_L7_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ #endif
+ }
+ } else {
+ __pyx_t_5 = __pyx_t_10(__pyx_t_2);
+ if (unlikely(!__pyx_t_5)) {
+ PyObject* exc_type = PyErr_Occurred();
+ if (exc_type) {
+ if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
+ else __PYX_ERR(0, 476, __pyx_L7_error)
+ }
+ break;
+ }
+ __Pyx_GOTREF(__pyx_t_5);
+ }
+ __Pyx_XDECREF_SET(__pyx_7genexpr__pyx_v_s, __pyx_t_5);
+ __pyx_t_5 = 0;
+ if (!(likely(PyUnicode_CheckExact(__pyx_7genexpr__pyx_v_s))||((__pyx_7genexpr__pyx_v_s) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_7genexpr__pyx_v_s))) __PYX_ERR(0, 476, __pyx_L7_error)
+ __pyx_t_5 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_preprocess(__pyx_v_self, ((PyObject*)__pyx_7genexpr__pyx_v_s), __pyx_v_src_lang, __pyx_v_tgt_lang, __pyx_v_normalizer, __pyx_v_is_target); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 476, __pyx_L7_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ if (unlikely(__Pyx_ListComp_Append(__pyx_t_3, (PyObject*)__pyx_t_5))) __PYX_ERR(0, 476, __pyx_L7_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_XDECREF(__pyx_7genexpr__pyx_v_s); __pyx_7genexpr__pyx_v_s = 0;
+ goto __pyx_L11_exit_scope;
+ __pyx_L7_error:;
+ __Pyx_XDECREF(__pyx_7genexpr__pyx_v_s); __pyx_7genexpr__pyx_v_s = 0;
+ goto __pyx_L1_error;
+ __pyx_L11_exit_scope:;
+ } /* exit inner scope */
+ __pyx_r = ((PyObject*)__pyx_t_3);
+ __pyx_t_3 = 0;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":449
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ * cpdef list preprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] batch,
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.preprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_normalizer);
+ __Pyx_XDECREF(__pyx_v_iso_code);
+ __Pyx_XDECREF(__pyx_v_iterator);
+ __Pyx_XDECREF(__pyx_7genexpr__pyx_v_s);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+PyDoc_STRVAR(__pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch, "\n Preprocess an array of sentences (normalize, tokenize, transliterate).\n This is exposed for external use.\n ");
+static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch = {"preprocess_batch", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch};
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+ PyObject *__pyx_v_batch = 0;
+ PyObject *__pyx_v_src_lang = 0;
+ PyObject *__pyx_v_tgt_lang = 0;
+ int __pyx_v_is_target;
+ int __pyx_v_visualize;
+ #if !CYTHON_METH_FASTCALL
+ CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+ #endif
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ PyObject* values[5] = {0,0,0,0,0};
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("preprocess_batch (wrapper)", 0);
+ #if !CYTHON_METH_FASTCALL
+ #if CYTHON_ASSUME_SAFE_MACROS
+ __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+ #else
+ __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+ #endif
+ #endif
+ __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+ {
+ PyObject **__pyx_pyargnames[] = {&__pyx_n_s_batch,&__pyx_n_s_src_lang,&__pyx_n_s_tgt_lang,&__pyx_n_s_is_target,&__pyx_n_s_visualize,0};
+
+ /* "IndicTransToolkit/processor.pyx":453
+ * List[str] batch,
+ * str src_lang,
+ * str tgt_lang=None, # <<<<<<<<<<<<<<
+ * bint is_target=False,
+ * bint visualize=False
+ */
+ values[2] = __Pyx_Arg_NewRef_FASTCALL(((PyObject*)Py_None));
+ if (__pyx_kwds) {
+ Py_ssize_t kw_args;
+ switch (__pyx_nargs) {
+ case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4);
+ CYTHON_FALLTHROUGH;
+ case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3);
+ CYTHON_FALLTHROUGH;
+ case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2);
+ CYTHON_FALLTHROUGH;
+ case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+ CYTHON_FALLTHROUGH;
+ case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds);
+ switch (__pyx_nargs) {
+ case 0:
+ if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_batch)) != 0)) {
+ (void)__Pyx_Arg_NewRef_FASTCALL(values[0]);
+ kw_args--;
+ }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error)
+ else goto __pyx_L5_argtuple_error;
+ CYTHON_FALLTHROUGH;
+ case 1:
+ if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_src_lang)) != 0)) {
+ (void)__Pyx_Arg_NewRef_FASTCALL(values[1]);
+ kw_args--;
+ }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error)
+ else {
+ __Pyx_RaiseArgtupleInvalid("preprocess_batch", 0, 2, 5, 1); __PYX_ERR(0, 449, __pyx_L3_error)
+ }
+ CYTHON_FALLTHROUGH;
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tgt_lang);
+ if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error)
+ }
+ CYTHON_FALLTHROUGH;
+ case 3:
+ if (kw_args > 0) {
+ PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_is_target);
+ if (value) { values[3] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error)
+ }
+ CYTHON_FALLTHROUGH;
+ case 4:
+ if (kw_args > 0) {
+ PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_visualize);
+ if (value) { values[4] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error)
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ const Py_ssize_t kwd_pos_args = __pyx_nargs;
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "preprocess_batch") < 0)) __PYX_ERR(0, 449, __pyx_L3_error)
+ }
+ } else {
+ switch (__pyx_nargs) {
+ case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4);
+ CYTHON_FALLTHROUGH;
+ case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3);
+ CYTHON_FALLTHROUGH;
+ case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2);
+ CYTHON_FALLTHROUGH;
+ case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+ values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_batch = ((PyObject*)values[0]);
+ __pyx_v_src_lang = ((PyObject*)values[1]);
+ __pyx_v_tgt_lang = ((PyObject*)values[2]);
+ if (values[3]) {
+ __pyx_v_is_target = __Pyx_PyObject_IsTrue(values[3]); if (unlikely((__pyx_v_is_target == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 454, __pyx_L3_error)
+ } else {
+
+ /* "IndicTransToolkit/processor.pyx":454
+ * str src_lang,
+ * str tgt_lang=None,
+ * bint is_target=False, # <<<<<<<<<<<<<<
+ * bint visualize=False
+ * ):
+ */
+ __pyx_v_is_target = ((int)0);
+ }
+ if (values[4]) {
+ __pyx_v_visualize = __Pyx_PyObject_IsTrue(values[4]); if (unlikely((__pyx_v_visualize == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 455, __pyx_L3_error)
+ } else {
+
+ /* "IndicTransToolkit/processor.pyx":455
+ * str tgt_lang=None,
+ * bint is_target=False,
+ * bint visualize=False # <<<<<<<<<<<<<<
+ * ):
+ * """
+ */
+ __pyx_v_visualize = ((int)0);
+ }
+ }
+ goto __pyx_L6_skip;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("preprocess_batch", 0, 2, 5, __pyx_nargs); __PYX_ERR(0, 449, __pyx_L3_error)
+ __pyx_L6_skip:;
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L3_error:;
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.preprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_batch), (&PyList_Type), 1, "batch", 1))) __PYX_ERR(0, 451, __pyx_L1_error)
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_src_lang), (&PyUnicode_Type), 1, "src_lang", 1))) __PYX_ERR(0, 452, __pyx_L1_error)
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tgt_lang), (&PyUnicode_Type), 1, "tgt_lang", 1))) __PYX_ERR(0, 453, __pyx_L1_error)
+ __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v_batch, __pyx_v_src_lang, __pyx_v_tgt_lang, __pyx_v_is_target, __pyx_v_visualize);
+
+ /* "IndicTransToolkit/processor.pyx":449
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ * cpdef list preprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] batch,
+ */
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_batch, PyObject *__pyx_v_src_lang, PyObject *__pyx_v_tgt_lang, int __pyx_v_is_target, int __pyx_v_visualize) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch __pyx_t_2;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("preprocess_batch", 1);
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_2.__pyx_n = 3;
+ __pyx_t_2.tgt_lang = __pyx_v_tgt_lang;
+ __pyx_t_2.is_target = __pyx_v_is_target;
+ __pyx_t_2.visualize = __pyx_v_visualize;
+ __pyx_t_1 = __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor->preprocess_batch(__pyx_v_self, __pyx_v_batch, __pyx_v_src_lang, 1, &__pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.preprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":479
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ * cpdef list postprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] sents,
+ */
+
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sents, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args) {
+ PyObject *__pyx_v_lang = ((PyObject*)__pyx_n_u_hin_Deva);
+
+ /* "IndicTransToolkit/processor.pyx":483
+ * List[str] sents,
+ * str lang="hin_Deva",
+ * bint visualize=False # <<<<<<<<<<<<<<
+ * ):
+ * """
+ */
+ int __pyx_v_visualize = ((int)0);
+ PyObject *__pyx_v_iterator = 0;
+ PyObject *__pyx_v_results = 0;
+ int __pyx_v_n;
+ PyObject *__pyx_8genexpr1__pyx_v_s = NULL;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ unsigned int __pyx_t_6;
+ Py_ssize_t __pyx_t_7;
+ PyObject *(*__pyx_t_8)(PyObject *);
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("postprocess_batch", 1);
+ if (__pyx_optional_args) {
+ if (__pyx_optional_args->__pyx_n > 0) {
+ __pyx_v_lang = __pyx_optional_args->lang;
+ if (__pyx_optional_args->__pyx_n > 1) {
+ __pyx_v_visualize = __pyx_optional_args->visualize;
+ }
+ }
+ }
+
+ /* "IndicTransToolkit/processor.pyx":479
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ * cpdef list postprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] sents,
+ */
+ /* Check if called by wrapper */
+ if (unlikely(__pyx_skip_dispatch)) ;
+ /* Check if overridden in Python */
+ else if (unlikely((Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0) || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))) {
+ #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
+ static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
+ if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
+ PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
+ #endif
+ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_postprocess_batch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (!__Pyx_IsSameCFunction(__pyx_t_1, (void*) __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch)) {
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_3 = __Pyx_PyBool_FromLong(__pyx_v_visualize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_4 = __pyx_t_1; __pyx_t_5 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (unlikely(PyMethod_Check(__pyx_t_4))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_4, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[4] = {__pyx_t_5, __pyx_v_sents, __pyx_v_lang, __pyx_t_3};
+ __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 3+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ }
+ if (!(likely(PyList_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_2))) __PYX_ERR(0, 479, __pyx_L1_error)
+ __pyx_r = ((PyObject*)__pyx_t_2);
+ __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ goto __pyx_L0;
+ }
+ #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
+ __pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
+ __pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
+ if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
+ __pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
+ }
+ #endif
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
+ }
+ #endif
+ }
+
+ /* "IndicTransToolkit/processor.pyx":493
+ * cdef list results
+ * cdef int i
+ * cdef int n = len(sents) # <<<<<<<<<<<<<<
+ *
+ * if visualize:
+ */
+ if (unlikely(__pyx_v_sents == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
+ __PYX_ERR(0, 493, __pyx_L1_error)
+ }
+ __pyx_t_7 = __Pyx_PyList_GET_SIZE(__pyx_v_sents); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 493, __pyx_L1_error)
+ __pyx_v_n = __pyx_t_7;
+
+ /* "IndicTransToolkit/processor.pyx":495
+ * cdef int n = len(sents)
+ *
+ * if visualize: # <<<<<<<<<<<<<<
+ * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line")
+ * else:
+ */
+ if (__pyx_v_visualize) {
+
+ /* "IndicTransToolkit/processor.pyx":496
+ *
+ * if visualize:
+ * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line") # <<<<<<<<<<<<<<
+ * else:
+ * iterator = sents
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_tqdm); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_INCREF(__pyx_v_sents);
+ __Pyx_GIVEREF(__pyx_v_sents);
+ if (__Pyx_PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_sents)) __PYX_ERR(0, 496, __pyx_L1_error);
+ __pyx_t_4 = __Pyx_PyDict_NewPresized(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_total, __pyx_t_3) < 0) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_PyUnicode_Unicode(__pyx_v_lang); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_5 = __Pyx_PyUnicode_Concat(__pyx_kp_u_Post_processing, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_desc, __pyx_t_5) < 0) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_unit, __pyx_n_u_line) < 0) __PYX_ERR(0, 496, __pyx_L1_error)
+ __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_2, __pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 496, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_v_iterator = __pyx_t_5;
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":495
+ * cdef int n = len(sents)
+ *
+ * if visualize: # <<<<<<<<<<<<<<
+ * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line")
+ * else:
+ */
+ goto __pyx_L3;
+ }
+
+ /* "IndicTransToolkit/processor.pyx":498
+ * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line")
+ * else:
+ * iterator = sents # <<<<<<<<<<<<<<
+ *
+ * results = [self._postprocess(s, lang) for s in iterator]
+ */
+ /*else*/ {
+ __Pyx_INCREF(__pyx_v_sents);
+ __pyx_v_iterator = __pyx_v_sents;
+ }
+ __pyx_L3:;
+
+ /* "IndicTransToolkit/processor.pyx":500
+ * iterator = sents
+ *
+ * results = [self._postprocess(s, lang) for s in iterator] # <<<<<<<<<<<<<<
+ * self._placeholder_entity_maps.queue.clear()
+ *
+ */
+ { /* enter inner scope */
+ __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 500, __pyx_L6_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ if (likely(PyList_CheckExact(__pyx_v_iterator)) || PyTuple_CheckExact(__pyx_v_iterator)) {
+ __pyx_t_4 = __pyx_v_iterator; __Pyx_INCREF(__pyx_t_4);
+ __pyx_t_7 = 0;
+ __pyx_t_8 = NULL;
+ } else {
+ __pyx_t_7 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_iterator); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 500, __pyx_L6_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_8 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_4); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 500, __pyx_L6_error)
+ }
+ for (;;) {
+ if (likely(!__pyx_t_8)) {
+ if (likely(PyList_CheckExact(__pyx_t_4))) {
+ {
+ Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_4);
+ #if !CYTHON_ASSUME_SAFE_MACROS
+ if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 500, __pyx_L6_error)
+ #endif
+ if (__pyx_t_7 >= __pyx_temp) break;
+ }
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ __pyx_t_2 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_7); __Pyx_INCREF(__pyx_t_2); __pyx_t_7++; if (unlikely((0 < 0))) __PYX_ERR(0, 500, __pyx_L6_error)
+ #else
+ __pyx_t_2 = __Pyx_PySequence_ITEM(__pyx_t_4, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 500, __pyx_L6_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ #endif
+ } else {
+ {
+ Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_4);
+ #if !CYTHON_ASSUME_SAFE_MACROS
+ if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 500, __pyx_L6_error)
+ #endif
+ if (__pyx_t_7 >= __pyx_temp) break;
+ }
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ __pyx_t_2 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_7); __Pyx_INCREF(__pyx_t_2); __pyx_t_7++; if (unlikely((0 < 0))) __PYX_ERR(0, 500, __pyx_L6_error)
+ #else
+ __pyx_t_2 = __Pyx_PySequence_ITEM(__pyx_t_4, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 500, __pyx_L6_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ #endif
+ }
+ } else {
+ __pyx_t_2 = __pyx_t_8(__pyx_t_4);
+ if (unlikely(!__pyx_t_2)) {
+ PyObject* exc_type = PyErr_Occurred();
+ if (exc_type) {
+ if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
+ else __PYX_ERR(0, 500, __pyx_L6_error)
+ }
+ break;
+ }
+ __Pyx_GOTREF(__pyx_t_2);
+ }
+ __Pyx_XDECREF_SET(__pyx_8genexpr1__pyx_v_s, __pyx_t_2);
+ __pyx_t_2 = 0;
+ __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_postprocess(__pyx_v_self, __pyx_8genexpr1__pyx_v_s, __pyx_v_lang); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 500, __pyx_L6_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (unlikely(__Pyx_ListComp_Append(__pyx_t_5, (PyObject*)__pyx_t_2))) __PYX_ERR(0, 500, __pyx_L6_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_s); __pyx_8genexpr1__pyx_v_s = 0;
+ goto __pyx_L10_exit_scope;
+ __pyx_L6_error:;
+ __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_s); __pyx_8genexpr1__pyx_v_s = 0;
+ goto __pyx_L1_error;
+ __pyx_L10_exit_scope:;
+ } /* exit inner scope */
+ __pyx_v_results = ((PyObject*)__pyx_t_5);
+ __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":501
+ *
+ * results = [self._postprocess(s, lang) for s in iterator]
+ * self._placeholder_entity_maps.queue.clear() # <<<<<<<<<<<<<<
+ *
+ * return results
+ */
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_placeholder_entity_maps, __pyx_n_s_queue); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 501, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_clear); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 501, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = NULL;
+ __pyx_t_6 = 0;
+ #if CYTHON_UNPACK_METHODS
+ if (likely(PyMethod_Check(__pyx_t_2))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_2, function);
+ __pyx_t_6 = 1;
+ }
+ }
+ #endif
+ {
+ PyObject *__pyx_callargs[2] = {__pyx_t_4, NULL};
+ __pyx_t_5 = __Pyx_PyObject_FastCall(__pyx_t_2, __pyx_callargs+1-__pyx_t_6, 0+__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 501, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":503
+ * self._placeholder_entity_maps.queue.clear()
+ *
+ * return results # <<<<<<<<<<<<<<
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_results);
+ __pyx_r = __pyx_v_results;
+ goto __pyx_L0;
+
+ /* "IndicTransToolkit/processor.pyx":479
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ * cpdef list postprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] sents,
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.postprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_iterator);
+ __Pyx_XDECREF(__pyx_v_results);
+ __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_s);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+PyDoc_STRVAR(__pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch, "\n Postprocess a batch of sentences:\n Restore placeholders, fix script issues, and detokenize.\n This is exposed for external use.\n ");
+static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch = {"postprocess_batch", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch};
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+ PyObject *__pyx_v_sents = 0;
+ PyObject *__pyx_v_lang = 0;
+ int __pyx_v_visualize;
+ #if !CYTHON_METH_FASTCALL
+ CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+ #endif
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ PyObject* values[3] = {0,0,0};
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("postprocess_batch (wrapper)", 0);
+ #if !CYTHON_METH_FASTCALL
+ #if CYTHON_ASSUME_SAFE_MACROS
+ __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+ #else
+ __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+ #endif
+ #endif
+ __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+ {
+ PyObject **__pyx_pyargnames[] = {&__pyx_n_s_sents,&__pyx_n_s_lang,&__pyx_n_s_visualize,0};
+ values[1] = __Pyx_Arg_NewRef_FASTCALL(((PyObject*)__pyx_n_u_hin_Deva));
+ if (__pyx_kwds) {
+ Py_ssize_t kw_args;
+ switch (__pyx_nargs) {
+ case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2);
+ CYTHON_FALLTHROUGH;
+ case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+ CYTHON_FALLTHROUGH;
+ case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds);
+ switch (__pyx_nargs) {
+ case 0:
+ if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sents)) != 0)) {
+ (void)__Pyx_Arg_NewRef_FASTCALL(values[0]);
+ kw_args--;
+ }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L3_error)
+ else goto __pyx_L5_argtuple_error;
+ CYTHON_FALLTHROUGH;
+ case 1:
+ if (kw_args > 0) {
+ PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_lang);
+ if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L3_error)
+ }
+ CYTHON_FALLTHROUGH;
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_visualize);
+ if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L3_error)
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ const Py_ssize_t kwd_pos_args = __pyx_nargs;
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "postprocess_batch") < 0)) __PYX_ERR(0, 479, __pyx_L3_error)
+ }
+ } else {
+ switch (__pyx_nargs) {
+ case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2);
+ CYTHON_FALLTHROUGH;
+ case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+ CYTHON_FALLTHROUGH;
+ case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_sents = ((PyObject*)values[0]);
+ __pyx_v_lang = ((PyObject*)values[1]);
+ if (values[2]) {
+ __pyx_v_visualize = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_visualize == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 483, __pyx_L3_error)
+ } else {
+
+ /* "IndicTransToolkit/processor.pyx":483
+ * List[str] sents,
+ * str lang="hin_Deva",
+ * bint visualize=False # <<<<<<<<<<<<<<
+ * ):
+ * """
+ */
+ __pyx_v_visualize = ((int)0);
+ }
+ }
+ goto __pyx_L6_skip;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("postprocess_batch", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 479, __pyx_L3_error)
+ __pyx_L6_skip:;
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L3_error:;
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.postprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_sents), (&PyList_Type), 1, "sents", 1))) __PYX_ERR(0, 481, __pyx_L1_error)
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_lang), (&PyUnicode_Type), 1, "lang", 1))) __PYX_ERR(0, 482, __pyx_L1_error)
+ __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v_sents, __pyx_v_lang, __pyx_v_visualize);
+
+ /* "IndicTransToolkit/processor.pyx":479
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ * cpdef list postprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] sents,
+ */
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sents, PyObject *__pyx_v_lang, int __pyx_v_visualize) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch __pyx_t_2;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("postprocess_batch", 1);
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_2.__pyx_n = 2;
+ __pyx_t_2.lang = __pyx_v_lang;
+ __pyx_t_2.visualize = __pyx_v_visualize;
+ __pyx_t_1 = __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor->postprocess_batch(__pyx_v_self, __pyx_v_sents, 1, &__pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.postprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "IndicTransToolkit/processor.pyx":21
+ *
+ * cdef class IndicProcessor:
+ * cdef public bint inference # <<<<<<<<<<<<<<
+ *
+ * # Precompiled regex patterns and placeholders
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_1__get__(PyObject *__pyx_v_self) {
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs);
+ __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference___get__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference___get__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__get__", 1);
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong(__pyx_v_self->inference); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 21, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.inference.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/
+static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) {
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__set__ (wrapper)", 0);
+ __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs);
+ __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference_2__set__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), ((PyObject *)__pyx_v_value));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference_2__set__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_value) {
+ int __pyx_r;
+ int __pyx_t_1;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 21, __pyx_L1_error)
+ __pyx_v_self->inference = __pyx_t_1;
+
+ /* function exit code */
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.inference.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ return __pyx_r;
+}
+
+/* "(tree fragment)":1
+ * def __reduce_cython__(self): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+ #if !CYTHON_METH_FASTCALL
+ CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+ #endif
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0);
+ #if !CYTHON_METH_FASTCALL
+ #if CYTHON_ASSUME_SAFE_MACROS
+ __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+ #else
+ __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+ #endif
+ #endif
+ __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+ if (unlikely(__pyx_nargs > 0)) {
+ __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;}
+ if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL;
+ __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_6__reduce_cython__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_6__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__reduce_cython__", 1);
+
+ /* "(tree fragment)":2
+ * def __reduce_cython__(self):
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ */
+ __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0);
+ __PYX_ERR(1, 2, __pyx_L1_error)
+
+ /* "(tree fragment)":1
+ * def __reduce_cython__(self): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
+static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__(PyObject *__pyx_v_self,
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+ CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0;
+ #if !CYTHON_METH_FASTCALL
+ CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+ #endif
+ CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+ PyObject* values[1] = {0};
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0);
+ #if !CYTHON_METH_FASTCALL
+ #if CYTHON_ASSUME_SAFE_MACROS
+ __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+ #else
+ __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+ #endif
+ #endif
+ __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+ {
+ PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0};
+ if (__pyx_kwds) {
+ Py_ssize_t kw_args;
+ switch (__pyx_nargs) {
+ case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds);
+ switch (__pyx_nargs) {
+ case 0:
+ if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) {
+ (void)__Pyx_Arg_NewRef_FASTCALL(values[0]);
+ kw_args--;
+ }
+ else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error)
+ else goto __pyx_L5_argtuple_error;
+ }
+ if (unlikely(kw_args > 0)) {
+ const Py_ssize_t kwd_pos_args = __pyx_nargs;
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error)
+ }
+ } else if (unlikely(__pyx_nargs != 1)) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+ }
+ __pyx_v___pyx_state = values[0];
+ }
+ goto __pyx_L6_skip;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error)
+ __pyx_L6_skip:;
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L3_error:;
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_8__setstate_cython__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v___pyx_state);
+
+ /* function exit code */
+ {
+ Py_ssize_t __pyx_temp;
+ for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+ __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+ }
+ }
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_8__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__setstate_cython__", 1);
+
+ /* "(tree fragment)":4
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state):
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<<
+ */
+ __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0);
+ __PYX_ERR(1, 4, __pyx_L1_error)
+
+ /* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+static struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor;
+
+static PyObject *__pyx_tp_new_17IndicTransToolkit_9processor_IndicProcessor(PyTypeObject *t, PyObject *a, PyObject *k) {
+ struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p;
+ PyObject *o;
+ #if CYTHON_COMPILING_IN_LIMITED_API
+ allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc);
+ o = alloc_func(t, 0);
+ #else
+ if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) {
+ o = (*t->tp_alloc)(t, 0);
+ } else {
+ o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+ }
+ if (unlikely(!o)) return 0;
+ #endif
+ p = ((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o);
+ p->__pyx_vtab = __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor;
+ p->_MULTISPACE_REGEX = Py_None; Py_INCREF(Py_None);
+ p->_DIGIT_SPACE_PERCENT = Py_None; Py_INCREF(Py_None);
+ p->_DOUBLE_QUOT_PUNC = Py_None; Py_INCREF(Py_None);
+ p->_DIGIT_NBSP_DIGIT = Py_None; Py_INCREF(Py_None);
+ p->_END_BRACKET_SPACE_PUNC_REGEX = Py_None; Py_INCREF(Py_None);
+ p->_URL_PATTERN = Py_None; Py_INCREF(Py_None);
+ p->_NUMERAL_PATTERN = Py_None; Py_INCREF(Py_None);
+ p->_EMAIL_PATTERN = Py_None; Py_INCREF(Py_None);
+ p->_OTHER_PATTERN = Py_None; Py_INCREF(Py_None);
+ p->_PUNC_REPLACEMENTS = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ p->_INDIC_FAILURE_CASES = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ p->_flores_codes = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ p->_digits_translation_table = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ p->_placeholder_entity_maps = Py_None; Py_INCREF(Py_None);
+ p->_en_tok = Py_None; Py_INCREF(Py_None);
+ p->_en_normalizer = Py_None; Py_INCREF(Py_None);
+ p->_en_detok = Py_None; Py_INCREF(Py_None);
+ p->_xliterator = Py_None; Py_INCREF(Py_None);
+ if (unlikely(__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_1__cinit__(o, a, k) < 0)) goto bad;
+ return o;
+ bad:
+ Py_DECREF(o); o = 0;
+ return NULL;
+}
+
+static void __pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor(PyObject *o) {
+ struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p = (struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o;
+ #if CYTHON_USE_TP_FINALIZE
+ if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) {
+ if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor) {
+ if (PyObject_CallFinalizerFromDealloc(o)) return;
+ }
+ }
+ #endif
+ PyObject_GC_UnTrack(o);
+ Py_CLEAR(p->_MULTISPACE_REGEX);
+ Py_CLEAR(p->_DIGIT_SPACE_PERCENT);
+ Py_CLEAR(p->_DOUBLE_QUOT_PUNC);
+ Py_CLEAR(p->_DIGIT_NBSP_DIGIT);
+ Py_CLEAR(p->_END_BRACKET_SPACE_PUNC_REGEX);
+ Py_CLEAR(p->_URL_PATTERN);
+ Py_CLEAR(p->_NUMERAL_PATTERN);
+ Py_CLEAR(p->_EMAIL_PATTERN);
+ Py_CLEAR(p->_OTHER_PATTERN);
+ Py_CLEAR(p->_PUNC_REPLACEMENTS);
+ Py_CLEAR(p->_INDIC_FAILURE_CASES);
+ Py_CLEAR(p->_flores_codes);
+ Py_CLEAR(p->_digits_translation_table);
+ Py_CLEAR(p->_placeholder_entity_maps);
+ Py_CLEAR(p->_en_tok);
+ Py_CLEAR(p->_en_normalizer);
+ Py_CLEAR(p->_en_detok);
+ Py_CLEAR(p->_xliterator);
+ #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY
+ (*Py_TYPE(o)->tp_free)(o);
+ #else
+ {
+ freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free);
+ if (tp_free) tp_free(o);
+ }
+ #endif
+}
+
+static int __pyx_tp_traverse_17IndicTransToolkit_9processor_IndicProcessor(PyObject *o, visitproc v, void *a) {
+ int e;
+ struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p = (struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o;
+ if (p->_MULTISPACE_REGEX) {
+ e = (*v)(p->_MULTISPACE_REGEX, a); if (e) return e;
+ }
+ if (p->_DIGIT_SPACE_PERCENT) {
+ e = (*v)(p->_DIGIT_SPACE_PERCENT, a); if (e) return e;
+ }
+ if (p->_DOUBLE_QUOT_PUNC) {
+ e = (*v)(p->_DOUBLE_QUOT_PUNC, a); if (e) return e;
+ }
+ if (p->_DIGIT_NBSP_DIGIT) {
+ e = (*v)(p->_DIGIT_NBSP_DIGIT, a); if (e) return e;
+ }
+ if (p->_END_BRACKET_SPACE_PUNC_REGEX) {
+ e = (*v)(p->_END_BRACKET_SPACE_PUNC_REGEX, a); if (e) return e;
+ }
+ if (p->_URL_PATTERN) {
+ e = (*v)(p->_URL_PATTERN, a); if (e) return e;
+ }
+ if (p->_NUMERAL_PATTERN) {
+ e = (*v)(p->_NUMERAL_PATTERN, a); if (e) return e;
+ }
+ if (p->_EMAIL_PATTERN) {
+ e = (*v)(p->_EMAIL_PATTERN, a); if (e) return e;
+ }
+ if (p->_OTHER_PATTERN) {
+ e = (*v)(p->_OTHER_PATTERN, a); if (e) return e;
+ }
+ if (p->_PUNC_REPLACEMENTS) {
+ e = (*v)(p->_PUNC_REPLACEMENTS, a); if (e) return e;
+ }
+ if (p->_INDIC_FAILURE_CASES) {
+ e = (*v)(p->_INDIC_FAILURE_CASES, a); if (e) return e;
+ }
+ if (p->_flores_codes) {
+ e = (*v)(p->_flores_codes, a); if (e) return e;
+ }
+ if (p->_digits_translation_table) {
+ e = (*v)(p->_digits_translation_table, a); if (e) return e;
+ }
+ if (p->_placeholder_entity_maps) {
+ e = (*v)(p->_placeholder_entity_maps, a); if (e) return e;
+ }
+ if (p->_en_tok) {
+ e = (*v)(p->_en_tok, a); if (e) return e;
+ }
+ if (p->_en_normalizer) {
+ e = (*v)(p->_en_normalizer, a); if (e) return e;
+ }
+ if (p->_en_detok) {
+ e = (*v)(p->_en_detok, a); if (e) return e;
+ }
+ if (p->_xliterator) {
+ e = (*v)(p->_xliterator, a); if (e) return e;
+ }
+ return 0;
+}
+
+static int __pyx_tp_clear_17IndicTransToolkit_9processor_IndicProcessor(PyObject *o) {
+ PyObject* tmp;
+ struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p = (struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o;
+ tmp = ((PyObject*)p->_MULTISPACE_REGEX);
+ p->_MULTISPACE_REGEX = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_DIGIT_SPACE_PERCENT);
+ p->_DIGIT_SPACE_PERCENT = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_DOUBLE_QUOT_PUNC);
+ p->_DOUBLE_QUOT_PUNC = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_DIGIT_NBSP_DIGIT);
+ p->_DIGIT_NBSP_DIGIT = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_END_BRACKET_SPACE_PUNC_REGEX);
+ p->_END_BRACKET_SPACE_PUNC_REGEX = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_URL_PATTERN);
+ p->_URL_PATTERN = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_NUMERAL_PATTERN);
+ p->_NUMERAL_PATTERN = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_EMAIL_PATTERN);
+ p->_EMAIL_PATTERN = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_OTHER_PATTERN);
+ p->_OTHER_PATTERN = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_PUNC_REPLACEMENTS);
+ p->_PUNC_REPLACEMENTS = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_INDIC_FAILURE_CASES);
+ p->_INDIC_FAILURE_CASES = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_flores_codes);
+ p->_flores_codes = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_digits_translation_table);
+ p->_digits_translation_table = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_placeholder_entity_maps);
+ p->_placeholder_entity_maps = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_en_tok);
+ p->_en_tok = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_en_normalizer);
+ p->_en_normalizer = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_en_detok);
+ p->_en_detok = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_xliterator);
+ p->_xliterator = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ return 0;
+}
+
+static PyObject *__pyx_getprop_17IndicTransToolkit_9processor_14IndicProcessor_inference(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_1__get__(o);
+}
+
+static int __pyx_setprop_17IndicTransToolkit_9processor_14IndicProcessor_inference(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) {
+ if (v) {
+ return __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_3__set__(o, v);
+ }
+ else {
+ PyErr_SetString(PyExc_NotImplementedError, "__del__");
+ return -1;
+ }
+}
+
+static PyMethodDef __pyx_methods_17IndicTransToolkit_9processor_IndicProcessor[] = {
+ {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0},
+ {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0},
+ {0, 0, 0, 0}
+};
+
+static struct PyGetSetDef __pyx_getsets_17IndicTransToolkit_9processor_IndicProcessor[] = {
+ {(char *)"inference", __pyx_getprop_17IndicTransToolkit_9processor_14IndicProcessor_inference, __pyx_setprop_17IndicTransToolkit_9processor_14IndicProcessor_inference, (char *)0, 0},
+ {0, 0, 0, 0, 0}
+};
+#if CYTHON_USE_TYPE_SPECS
+static PyType_Slot __pyx_type_17IndicTransToolkit_9processor_IndicProcessor_slots[] = {
+ {Py_tp_dealloc, (void *)__pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor},
+ {Py_tp_traverse, (void *)__pyx_tp_traverse_17IndicTransToolkit_9processor_IndicProcessor},
+ {Py_tp_clear, (void *)__pyx_tp_clear_17IndicTransToolkit_9processor_IndicProcessor},
+ {Py_tp_methods, (void *)__pyx_methods_17IndicTransToolkit_9processor_IndicProcessor},
+ {Py_tp_getset, (void *)__pyx_getsets_17IndicTransToolkit_9processor_IndicProcessor},
+ {Py_tp_new, (void *)__pyx_tp_new_17IndicTransToolkit_9processor_IndicProcessor},
+ {0, 0},
+};
+static PyType_Spec __pyx_type_17IndicTransToolkit_9processor_IndicProcessor_spec = {
+ "IndicTransToolkit.processor.IndicProcessor",
+ sizeof(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor),
+ 0,
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC,
+ __pyx_type_17IndicTransToolkit_9processor_IndicProcessor_slots,
+};
+#else
+
+static PyTypeObject __pyx_type_17IndicTransToolkit_9processor_IndicProcessor = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "IndicTransToolkit.processor.""IndicProcessor", /*tp_name*/
+ sizeof(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor, /*tp_dealloc*/
+ #if PY_VERSION_HEX < 0x030800b4
+ 0, /*tp_print*/
+ #endif
+ #if PY_VERSION_HEX >= 0x030800b4
+ 0, /*tp_vectorcall_offset*/
+ #endif
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #endif
+ #if PY_MAJOR_VERSION >= 3
+ 0, /*tp_as_async*/
+ #endif
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ 0, /*tp_doc*/
+ __pyx_tp_traverse_17IndicTransToolkit_9processor_IndicProcessor, /*tp_traverse*/
+ __pyx_tp_clear_17IndicTransToolkit_9processor_IndicProcessor, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_methods_17IndicTransToolkit_9processor_IndicProcessor, /*tp_methods*/
+ 0, /*tp_members*/
+ __pyx_getsets_17IndicTransToolkit_9processor_IndicProcessor, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ #if !CYTHON_USE_TYPE_SPECS
+ 0, /*tp_dictoffset*/
+ #endif
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_17IndicTransToolkit_9processor_IndicProcessor, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ 0, /*tp_version_tag*/
+ #if PY_VERSION_HEX >= 0x030400a1
+ #if CYTHON_USE_TP_FINALIZE
+ 0, /*tp_finalize*/
+ #else
+ NULL, /*tp_finalize*/
+ #endif
+ #endif
+ #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
+ 0, /*tp_vectorcall*/
+ #endif
+ #if __PYX_NEED_TP_PRINT_SLOT == 1
+ 0, /*tp_print*/
+ #endif
+ #if PY_VERSION_HEX >= 0x030C0000
+ 0, /*tp_watched*/
+ #endif
+ #if PY_VERSION_HEX >= 0x030d00A4
+ 0, /*tp_versions_used*/
+ #endif
+ #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000
+ 0, /*tp_pypy_flags*/
+ #endif
+};
+#endif
+
+static PyMethodDef __pyx_methods[] = {
+ {0, 0, 0, 0}
+};
+#ifndef CYTHON_SMALL_CODE
+#if defined(__clang__)
+ #define CYTHON_SMALL_CODE
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+ #define CYTHON_SMALL_CODE __attribute__((cold))
+#else
+ #define CYTHON_SMALL_CODE
+#endif
+#endif
+/* #### Code section: pystring_table ### */
+
+static int __Pyx_CreateStringTabAndInitStrings(void) {
+ __Pyx_StringTabEntry __pyx_string_tab[] = {
+ {&__pyx_n_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 1},
+ {&__pyx_kp_u_0, __pyx_k_0, sizeof(__pyx_k_0), 0, 1, 0, 0},
+ {&__pyx_kp_u_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 1, 0, 0},
+ {&__pyx_kp_u_1_2, __pyx_k_1_2, sizeof(__pyx_k_1_2), 0, 1, 0, 0},
+ {&__pyx_kp_u_1_2_2, __pyx_k_1_2_2, sizeof(__pyx_k_1_2_2), 0, 1, 0, 0},
+ {&__pyx_kp_u_1_3, __pyx_k_1_3, sizeof(__pyx_k_1_3), 0, 1, 0, 0},
+ {&__pyx_kp_u_1_4, __pyx_k_1_4, sizeof(__pyx_k_1_4), 0, 1, 0, 0},
+ {&__pyx_kp_u_2, __pyx_k_2, sizeof(__pyx_k_2), 0, 1, 0, 0},
+ {&__pyx_kp_u_2_2, __pyx_k_2_2, sizeof(__pyx_k_2_2), 0, 1, 0, 0},
+ {&__pyx_kp_u_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 1, 0, 0},
+ {&__pyx_kp_u_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 1, 0, 0},
+ {&__pyx_kp_u_5, __pyx_k_5, sizeof(__pyx_k_5), 0, 1, 0, 0},
+ {&__pyx_kp_u_6, __pyx_k_6, sizeof(__pyx_k_6), 0, 1, 0, 0},
+ {&__pyx_kp_u_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 1, 0, 0},
+ {&__pyx_kp_u_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 1, 0, 0},
+ {&__pyx_kp_u_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 1, 0, 0},
+ {&__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2, __pyx_k_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2, sizeof(__pyx_k_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2), 0, 1, 0, 0},
+ {&__pyx_kp_u_A_Za_z0_9_w, __pyx_k_A_Za_z0_9_w, sizeof(__pyx_k_A_Za_z0_9_w), 0, 1, 0, 0},
+ {&__pyx_n_u_Arab, __pyx_k_Arab, sizeof(__pyx_k_Arab), 0, 1, 0, 1},
+ {&__pyx_n_u_Aran, __pyx_k_Aran, sizeof(__pyx_k_Aran), 0, 1, 0, 1},
+ {&__pyx_kp_u_C, __pyx_k_C, sizeof(__pyx_k_C), 0, 1, 0, 0},
+ {&__pyx_kp_u_C_2, __pyx_k_C_2, sizeof(__pyx_k_C_2), 0, 1, 0, 0},
+ {&__pyx_n_s_Dict, __pyx_k_Dict, sizeof(__pyx_k_Dict), 0, 0, 1, 1},
+ {&__pyx_kp_u_ID, __pyx_k_ID, sizeof(__pyx_k_ID), 0, 1, 0, 0},
+ {&__pyx_kp_u_ID_2, __pyx_k_ID_2, sizeof(__pyx_k_ID_2), 0, 1, 0, 0},
+ {&__pyx_kp_u_ID_3, __pyx_k_ID_3, sizeof(__pyx_k_ID_3), 0, 1, 0, 0},
+ {&__pyx_kp_u_ID_4, __pyx_k_ID_4, sizeof(__pyx_k_ID_4), 0, 1, 0, 0},
+ {&__pyx_kp_u_ID_5, __pyx_k_ID_5, sizeof(__pyx_k_ID_5), 0, 1, 0, 0},
+ {&__pyx_n_s_IndicNormalizerFactory, __pyx_k_IndicNormalizerFactory, sizeof(__pyx_k_IndicNormalizerFactory), 0, 0, 1, 1},
+ {&__pyx_n_s_IndicProcessor, __pyx_k_IndicProcessor, sizeof(__pyx_k_IndicProcessor), 0, 0, 1, 1},
+ {&__pyx_n_s_IndicProcessor___reduce_cython, __pyx_k_IndicProcessor___reduce_cython, sizeof(__pyx_k_IndicProcessor___reduce_cython), 0, 0, 1, 1},
+ {&__pyx_n_s_IndicProcessor___setstate_cython, __pyx_k_IndicProcessor___setstate_cython, sizeof(__pyx_k_IndicProcessor___setstate_cython), 0, 0, 1, 1},
+ {&__pyx_n_s_IndicProcessor_postprocess_batch, __pyx_k_IndicProcessor_postprocess_batch, sizeof(__pyx_k_IndicProcessor_postprocess_batch), 0, 0, 1, 1},
+ {&__pyx_n_s_IndicProcessor_preprocess_batch, __pyx_k_IndicProcessor_preprocess_batch, sizeof(__pyx_k_IndicProcessor_preprocess_batch), 0, 0, 1, 1},
+ {&__pyx_n_s_IndicTransToolkit_processor, __pyx_k_IndicTransToolkit_processor, sizeof(__pyx_k_IndicTransToolkit_processor), 0, 0, 1, 1},
+ {&__pyx_kp_s_IndicTransToolkit_processor_pyx, __pyx_k_IndicTransToolkit_processor_pyx, sizeof(__pyx_k_IndicTransToolkit_processor_pyx), 0, 0, 1, 0},
+ {&__pyx_n_u_Latn, __pyx_k_Latn, sizeof(__pyx_k_Latn), 0, 1, 0, 1},
+ {&__pyx_n_s_List, __pyx_k_List, sizeof(__pyx_k_List), 0, 0, 1, 1},
+ {&__pyx_n_s_MosesDetokenizer, __pyx_k_MosesDetokenizer, sizeof(__pyx_k_MosesDetokenizer), 0, 0, 1, 1},
+ {&__pyx_n_s_MosesPunctNormalizer, __pyx_k_MosesPunctNormalizer, sizeof(__pyx_k_MosesPunctNormalizer), 0, 0, 1, 1},
+ {&__pyx_n_s_MosesTokenizer, __pyx_k_MosesTokenizer, sizeof(__pyx_k_MosesTokenizer), 0, 0, 1, 1},
+ {&__pyx_n_u_Mtei, __pyx_k_Mtei, sizeof(__pyx_k_Mtei), 0, 1, 0, 1},
+ {&__pyx_kp_u_None, __pyx_k_None, sizeof(__pyx_k_None), 0, 1, 0, 0},
+ {&__pyx_n_u_Olck, __pyx_k_Olck, sizeof(__pyx_k_Olck), 0, 1, 0, 1},
+ {&__pyx_kp_u_Post_processing, __pyx_k_Post_processing, sizeof(__pyx_k_Post_processing), 0, 1, 0, 0},
+ {&__pyx_kp_u_Pre_processing, __pyx_k_Pre_processing, sizeof(__pyx_k_Pre_processing), 0, 1, 0, 0},
+ {&__pyx_n_s_Queue, __pyx_k_Queue, sizeof(__pyx_k_Queue), 0, 0, 1, 1},
+ {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1},
+ {&__pyx_n_s_UnicodeIndicTransliterator, __pyx_k_UnicodeIndicTransliterator, sizeof(__pyx_k_UnicodeIndicTransliterator), 0, 0, 1, 1},
+ {&__pyx_n_s_Union, __pyx_k_Union, sizeof(__pyx_k_Union), 0, 0, 1, 1},
+ {&__pyx_n_u__10, __pyx_k__10, sizeof(__pyx_k__10), 0, 1, 0, 1},
+ {&__pyx_n_u__100, __pyx_k__100, sizeof(__pyx_k__100), 0, 1, 0, 1},
+ {&__pyx_kp_u__101, __pyx_k__101, sizeof(__pyx_k__101), 0, 1, 0, 0},
+ {&__pyx_kp_u__102, __pyx_k__102, sizeof(__pyx_k__102), 0, 1, 0, 0},
+ {&__pyx_kp_u__103, __pyx_k__103, sizeof(__pyx_k__103), 0, 1, 0, 0},
+ {&__pyx_kp_u__104, __pyx_k__104, sizeof(__pyx_k__104), 0, 1, 0, 0},
+ {&__pyx_kp_u__105, __pyx_k__105, sizeof(__pyx_k__105), 0, 1, 0, 0},
+ {&__pyx_kp_u__106, __pyx_k__106, sizeof(__pyx_k__106), 0, 1, 0, 0},
+ {&__pyx_kp_u__107, __pyx_k__107, sizeof(__pyx_k__107), 0, 1, 0, 0},
+ {&__pyx_kp_u__108, __pyx_k__108, sizeof(__pyx_k__108), 0, 1, 0, 0},
+ {&__pyx_kp_u__109, __pyx_k__109, sizeof(__pyx_k__109), 0, 1, 0, 0},
+ {&__pyx_n_u__11, __pyx_k__11, sizeof(__pyx_k__11), 0, 1, 0, 1},
+ {&__pyx_kp_u__110, __pyx_k__110, sizeof(__pyx_k__110), 0, 1, 0, 0},
+ {&__pyx_kp_u__111, __pyx_k__111, sizeof(__pyx_k__111), 0, 1, 0, 0},
+ {&__pyx_kp_u__112, __pyx_k__112, sizeof(__pyx_k__112), 0, 1, 0, 0},
+ {&__pyx_kp_u__113, __pyx_k__113, sizeof(__pyx_k__113), 0, 1, 0, 0},
+ {&__pyx_kp_u__114, __pyx_k__114, sizeof(__pyx_k__114), 0, 1, 0, 0},
+ {&__pyx_kp_u__115, __pyx_k__115, sizeof(__pyx_k__115), 0, 1, 0, 0},
+ {&__pyx_kp_u__116, __pyx_k__116, sizeof(__pyx_k__116), 0, 1, 0, 0},
+ {&__pyx_kp_u__117, __pyx_k__117, sizeof(__pyx_k__117), 0, 1, 0, 0},
+ {&__pyx_kp_u__118, __pyx_k__118, sizeof(__pyx_k__118), 0, 1, 0, 0},
+ {&__pyx_kp_u__119, __pyx_k__119, sizeof(__pyx_k__119), 0, 1, 0, 0},
+ {&__pyx_n_u__12, __pyx_k__12, sizeof(__pyx_k__12), 0, 1, 0, 1},
+ {&__pyx_kp_u__120, __pyx_k__120, sizeof(__pyx_k__120), 0, 1, 0, 0},
+ {&__pyx_kp_u__121, __pyx_k__121, sizeof(__pyx_k__121), 0, 1, 0, 0},
+ {&__pyx_kp_u__122, __pyx_k__122, sizeof(__pyx_k__122), 0, 1, 0, 0},
+ {&__pyx_kp_u__123, __pyx_k__123, sizeof(__pyx_k__123), 0, 1, 0, 0},
+ {&__pyx_kp_u__124, __pyx_k__124, sizeof(__pyx_k__124), 0, 1, 0, 0},
+ {&__pyx_kp_u__125, __pyx_k__125, sizeof(__pyx_k__125), 0, 1, 0, 0},
+ {&__pyx_kp_u__126, __pyx_k__126, sizeof(__pyx_k__126), 0, 1, 0, 0},
+ {&__pyx_kp_u__127, __pyx_k__127, sizeof(__pyx_k__127), 0, 1, 0, 0},
+ {&__pyx_kp_u__128, __pyx_k__128, sizeof(__pyx_k__128), 0, 1, 0, 0},
+ {&__pyx_kp_u__129, __pyx_k__129, sizeof(__pyx_k__129), 0, 1, 0, 0},
+ {&__pyx_n_u__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 1, 0, 1},
+ {&__pyx_kp_u__130, __pyx_k__130, sizeof(__pyx_k__130), 0, 1, 0, 0},
+ {&__pyx_kp_u__131, __pyx_k__131, sizeof(__pyx_k__131), 0, 1, 0, 0},
+ {&__pyx_kp_u__132, __pyx_k__132, sizeof(__pyx_k__132), 0, 1, 0, 0},
+ {&__pyx_kp_u__133, __pyx_k__133, sizeof(__pyx_k__133), 0, 1, 0, 0},
+ {&__pyx_kp_u__134, __pyx_k__134, sizeof(__pyx_k__134), 0, 1, 0, 0},
+ {&__pyx_kp_u__135, __pyx_k__135, sizeof(__pyx_k__135), 0, 1, 0, 0},
+ {&__pyx_kp_u__136, __pyx_k__136, sizeof(__pyx_k__136), 0, 1, 0, 0},
+ {&__pyx_kp_u__137, __pyx_k__137, sizeof(__pyx_k__137), 0, 1, 0, 0},
+ {&__pyx_n_u__138, __pyx_k__138, sizeof(__pyx_k__138), 0, 1, 0, 1},
+ {&__pyx_kp_u__139, __pyx_k__139, sizeof(__pyx_k__139), 0, 1, 0, 0},
+ {&__pyx_n_u__14, __pyx_k__14, sizeof(__pyx_k__14), 0, 1, 0, 1},
+ {&__pyx_kp_u__140, __pyx_k__140, sizeof(__pyx_k__140), 0, 1, 0, 0},
+ {&__pyx_kp_u__141, __pyx_k__141, sizeof(__pyx_k__141), 0, 1, 0, 0},
+ {&__pyx_kp_u__142, __pyx_k__142, sizeof(__pyx_k__142), 0, 1, 0, 0},
+ {&__pyx_kp_u__143, __pyx_k__143, sizeof(__pyx_k__143), 0, 1, 0, 0},
+ {&__pyx_kp_u__144, __pyx_k__144, sizeof(__pyx_k__144), 0, 1, 0, 0},
+ {&__pyx_kp_u__145, __pyx_k__145, sizeof(__pyx_k__145), 0, 1, 0, 0},
+ {&__pyx_kp_u__146, __pyx_k__146, sizeof(__pyx_k__146), 0, 1, 0, 0},
+ {&__pyx_kp_u__147, __pyx_k__147, sizeof(__pyx_k__147), 0, 1, 0, 0},
+ {&__pyx_kp_u__148, __pyx_k__148, sizeof(__pyx_k__148), 0, 1, 0, 0},
+ {&__pyx_kp_u__149, __pyx_k__149, sizeof(__pyx_k__149), 0, 1, 0, 0},
+ {&__pyx_n_u__15, __pyx_k__15, sizeof(__pyx_k__15), 0, 1, 0, 1},
+ {&__pyx_kp_u__151, __pyx_k__151, sizeof(__pyx_k__151), 0, 1, 0, 0},
+ {&__pyx_kp_u__153, __pyx_k__153, sizeof(__pyx_k__153), 0, 1, 0, 0},
+ {&__pyx_kp_u__154, __pyx_k__154, sizeof(__pyx_k__154), 0, 1, 0, 0},
+ {&__pyx_n_u__155, __pyx_k__155, sizeof(__pyx_k__155), 0, 1, 0, 1},
+ {&__pyx_kp_u__156, __pyx_k__156, sizeof(__pyx_k__156), 0, 1, 0, 0},
+ {&__pyx_kp_u__157, __pyx_k__157, sizeof(__pyx_k__157), 0, 1, 0, 0},
+ {&__pyx_kp_u__159, __pyx_k__159, sizeof(__pyx_k__159), 0, 1, 0, 0},
+ {&__pyx_n_u__16, __pyx_k__16, sizeof(__pyx_k__16), 0, 1, 0, 1},
+ {&__pyx_kp_u__160, __pyx_k__160, sizeof(__pyx_k__160), 0, 1, 0, 0},
+ {&__pyx_kp_u__162, __pyx_k__162, sizeof(__pyx_k__162), 0, 1, 0, 0},
+ {&__pyx_kp_u__163, __pyx_k__163, sizeof(__pyx_k__163), 0, 1, 0, 0},
+ {&__pyx_kp_u__165, __pyx_k__165, sizeof(__pyx_k__165), 0, 1, 0, 0},
+ {&__pyx_n_u__166, __pyx_k__166, sizeof(__pyx_k__166), 0, 1, 0, 1},
+ {&__pyx_kp_u__168, __pyx_k__168, sizeof(__pyx_k__168), 0, 1, 0, 0},
+ {&__pyx_n_u__169, __pyx_k__169, sizeof(__pyx_k__169), 0, 1, 0, 1},
+ {&__pyx_n_u__17, __pyx_k__17, sizeof(__pyx_k__17), 0, 1, 0, 1},
+ {&__pyx_n_s__171, __pyx_k__171, sizeof(__pyx_k__171), 0, 0, 1, 1},
+ {&__pyx_n_u__18, __pyx_k__18, sizeof(__pyx_k__18), 0, 1, 0, 1},
+ {&__pyx_n_s__182, __pyx_k__182, sizeof(__pyx_k__182), 0, 0, 1, 1},
+ {&__pyx_n_u__19, __pyx_k__19, sizeof(__pyx_k__19), 0, 1, 0, 1},
+ {&__pyx_n_u__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 1, 0, 1},
+ {&__pyx_n_u__20, __pyx_k__20, sizeof(__pyx_k__20), 0, 1, 0, 1},
+ {&__pyx_n_u__21, __pyx_k__21, sizeof(__pyx_k__21), 0, 1, 0, 1},
+ {&__pyx_n_u__22, __pyx_k__22, sizeof(__pyx_k__22), 0, 1, 0, 1},
+ {&__pyx_n_u__23, __pyx_k__23, sizeof(__pyx_k__23), 0, 1, 0, 1},
+ {&__pyx_n_u__24, __pyx_k__24, sizeof(__pyx_k__24), 0, 1, 0, 1},
+ {&__pyx_n_u__25, __pyx_k__25, sizeof(__pyx_k__25), 0, 1, 0, 1},
+ {&__pyx_n_u__26, __pyx_k__26, sizeof(__pyx_k__26), 0, 1, 0, 1},
+ {&__pyx_n_u__27, __pyx_k__27, sizeof(__pyx_k__27), 0, 1, 0, 1},
+ {&__pyx_n_u__28, __pyx_k__28, sizeof(__pyx_k__28), 0, 1, 0, 1},
+ {&__pyx_n_u__29, __pyx_k__29, sizeof(__pyx_k__29), 0, 1, 0, 1},
+ {&__pyx_n_u__3, __pyx_k__3, sizeof(__pyx_k__3), 0, 1, 0, 1},
+ {&__pyx_n_u__30, __pyx_k__30, sizeof(__pyx_k__30), 0, 1, 0, 1},
+ {&__pyx_n_u__31, __pyx_k__31, sizeof(__pyx_k__31), 0, 1, 0, 1},
+ {&__pyx_n_u__32, __pyx_k__32, sizeof(__pyx_k__32), 0, 1, 0, 1},
+ {&__pyx_n_u__33, __pyx_k__33, sizeof(__pyx_k__33), 0, 1, 0, 1},
+ {&__pyx_n_u__34, __pyx_k__34, sizeof(__pyx_k__34), 0, 1, 0, 1},
+ {&__pyx_n_u__35, __pyx_k__35, sizeof(__pyx_k__35), 0, 1, 0, 1},
+ {&__pyx_n_u__36, __pyx_k__36, sizeof(__pyx_k__36), 0, 1, 0, 1},
+ {&__pyx_n_u__37, __pyx_k__37, sizeof(__pyx_k__37), 0, 1, 0, 1},
+ {&__pyx_n_u__38, __pyx_k__38, sizeof(__pyx_k__38), 0, 1, 0, 1},
+ {&__pyx_n_u__39, __pyx_k__39, sizeof(__pyx_k__39), 0, 1, 0, 1},
+ {&__pyx_n_u__4, __pyx_k__4, sizeof(__pyx_k__4), 0, 1, 0, 1},
+ {&__pyx_n_u__40, __pyx_k__40, sizeof(__pyx_k__40), 0, 1, 0, 1},
+ {&__pyx_n_u__41, __pyx_k__41, sizeof(__pyx_k__41), 0, 1, 0, 1},
+ {&__pyx_n_u__42, __pyx_k__42, sizeof(__pyx_k__42), 0, 1, 0, 1},
+ {&__pyx_n_u__43, __pyx_k__43, sizeof(__pyx_k__43), 0, 1, 0, 1},
+ {&__pyx_n_u__44, __pyx_k__44, sizeof(__pyx_k__44), 0, 1, 0, 1},
+ {&__pyx_n_u__45, __pyx_k__45, sizeof(__pyx_k__45), 0, 1, 0, 1},
+ {&__pyx_n_u__46, __pyx_k__46, sizeof(__pyx_k__46), 0, 1, 0, 1},
+ {&__pyx_n_u__47, __pyx_k__47, sizeof(__pyx_k__47), 0, 1, 0, 1},
+ {&__pyx_n_u__48, __pyx_k__48, sizeof(__pyx_k__48), 0, 1, 0, 1},
+ {&__pyx_n_u__49, __pyx_k__49, sizeof(__pyx_k__49), 0, 1, 0, 1},
+ {&__pyx_n_u__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 1, 0, 1},
+ {&__pyx_n_u__50, __pyx_k__50, sizeof(__pyx_k__50), 0, 1, 0, 1},
+ {&__pyx_n_u__51, __pyx_k__51, sizeof(__pyx_k__51), 0, 1, 0, 1},
+ {&__pyx_n_u__52, __pyx_k__52, sizeof(__pyx_k__52), 0, 1, 0, 1},
+ {&__pyx_n_u__53, __pyx_k__53, sizeof(__pyx_k__53), 0, 1, 0, 1},
+ {&__pyx_n_u__54, __pyx_k__54, sizeof(__pyx_k__54), 0, 1, 0, 1},
+ {&__pyx_n_u__55, __pyx_k__55, sizeof(__pyx_k__55), 0, 1, 0, 1},
+ {&__pyx_n_u__56, __pyx_k__56, sizeof(__pyx_k__56), 0, 1, 0, 1},
+ {&__pyx_n_u__57, __pyx_k__57, sizeof(__pyx_k__57), 0, 1, 0, 1},
+ {&__pyx_n_u__58, __pyx_k__58, sizeof(__pyx_k__58), 0, 1, 0, 1},
+ {&__pyx_n_u__59, __pyx_k__59, sizeof(__pyx_k__59), 0, 1, 0, 1},
+ {&__pyx_n_u__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 1, 0, 1},
+ {&__pyx_n_u__60, __pyx_k__60, sizeof(__pyx_k__60), 0, 1, 0, 1},
+ {&__pyx_n_u__61, __pyx_k__61, sizeof(__pyx_k__61), 0, 1, 0, 1},
+ {&__pyx_n_u__62, __pyx_k__62, sizeof(__pyx_k__62), 0, 1, 0, 1},
+ {&__pyx_n_u__63, __pyx_k__63, sizeof(__pyx_k__63), 0, 1, 0, 1},
+ {&__pyx_n_u__64, __pyx_k__64, sizeof(__pyx_k__64), 0, 1, 0, 1},
+ {&__pyx_n_u__65, __pyx_k__65, sizeof(__pyx_k__65), 0, 1, 0, 1},
+ {&__pyx_n_u__66, __pyx_k__66, sizeof(__pyx_k__66), 0, 1, 0, 1},
+ {&__pyx_n_u__67, __pyx_k__67, sizeof(__pyx_k__67), 0, 1, 0, 1},
+ {&__pyx_n_u__68, __pyx_k__68, sizeof(__pyx_k__68), 0, 1, 0, 1},
+ {&__pyx_n_u__69, __pyx_k__69, sizeof(__pyx_k__69), 0, 1, 0, 1},
+ {&__pyx_n_u__7, __pyx_k__7, sizeof(__pyx_k__7), 0, 1, 0, 1},
+ {&__pyx_n_u__70, __pyx_k__70, sizeof(__pyx_k__70), 0, 1, 0, 1},
+ {&__pyx_n_u__71, __pyx_k__71, sizeof(__pyx_k__71), 0, 1, 0, 1},
+ {&__pyx_n_u__72, __pyx_k__72, sizeof(__pyx_k__72), 0, 1, 0, 1},
+ {&__pyx_n_u__73, __pyx_k__73, sizeof(__pyx_k__73), 0, 1, 0, 1},
+ {&__pyx_n_u__74, __pyx_k__74, sizeof(__pyx_k__74), 0, 1, 0, 1},
+ {&__pyx_n_u__75, __pyx_k__75, sizeof(__pyx_k__75), 0, 1, 0, 1},
+ {&__pyx_n_u__76, __pyx_k__76, sizeof(__pyx_k__76), 0, 1, 0, 1},
+ {&__pyx_n_u__77, __pyx_k__77, sizeof(__pyx_k__77), 0, 1, 0, 1},
+ {&__pyx_n_u__78, __pyx_k__78, sizeof(__pyx_k__78), 0, 1, 0, 1},
+ {&__pyx_n_u__79, __pyx_k__79, sizeof(__pyx_k__79), 0, 1, 0, 1},
+ {&__pyx_n_u__8, __pyx_k__8, sizeof(__pyx_k__8), 0, 1, 0, 1},
+ {&__pyx_n_u__80, __pyx_k__80, sizeof(__pyx_k__80), 0, 1, 0, 1},
+ {&__pyx_n_u__81, __pyx_k__81, sizeof(__pyx_k__81), 0, 1, 0, 1},
+ {&__pyx_n_u__82, __pyx_k__82, sizeof(__pyx_k__82), 0, 1, 0, 1},
+ {&__pyx_n_u__83, __pyx_k__83, sizeof(__pyx_k__83), 0, 1, 0, 1},
+ {&__pyx_n_u__84, __pyx_k__84, sizeof(__pyx_k__84), 0, 1, 0, 1},
+ {&__pyx_n_u__85, __pyx_k__85, sizeof(__pyx_k__85), 0, 1, 0, 1},
+ {&__pyx_n_u__86, __pyx_k__86, sizeof(__pyx_k__86), 0, 1, 0, 1},
+ {&__pyx_n_u__87, __pyx_k__87, sizeof(__pyx_k__87), 0, 1, 0, 1},
+ {&__pyx_n_u__88, __pyx_k__88, sizeof(__pyx_k__88), 0, 1, 0, 1},
+ {&__pyx_n_u__89, __pyx_k__89, sizeof(__pyx_k__89), 0, 1, 0, 1},
+ {&__pyx_n_u__9, __pyx_k__9, sizeof(__pyx_k__9), 0, 1, 0, 1},
+ {&__pyx_n_u__90, __pyx_k__90, sizeof(__pyx_k__90), 0, 1, 0, 1},
+ {&__pyx_n_u__91, __pyx_k__91, sizeof(__pyx_k__91), 0, 1, 0, 1},
+ {&__pyx_n_u__92, __pyx_k__92, sizeof(__pyx_k__92), 0, 1, 0, 1},
+ {&__pyx_n_u__93, __pyx_k__93, sizeof(__pyx_k__93), 0, 1, 0, 1},
+ {&__pyx_n_u__94, __pyx_k__94, sizeof(__pyx_k__94), 0, 1, 0, 1},
+ {&__pyx_n_u__95, __pyx_k__95, sizeof(__pyx_k__95), 0, 1, 0, 1},
+ {&__pyx_n_u__96, __pyx_k__96, sizeof(__pyx_k__96), 0, 1, 0, 1},
+ {&__pyx_n_u__97, __pyx_k__97, sizeof(__pyx_k__97), 0, 1, 0, 1},
+ {&__pyx_n_u__98, __pyx_k__98, sizeof(__pyx_k__98), 0, 1, 0, 1},
+ {&__pyx_n_u__99, __pyx_k__99, sizeof(__pyx_k__99), 0, 1, 0, 1},
+ {&__pyx_n_u_as, __pyx_k_as, sizeof(__pyx_k_as), 0, 1, 0, 1},
+ {&__pyx_n_u_asm_Beng, __pyx_k_asm_Beng, sizeof(__pyx_k_asm_Beng), 0, 1, 0, 1},
+ {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1},
+ {&__pyx_n_u_awa_Deva, __pyx_k_awa_Deva, sizeof(__pyx_k_awa_Deva), 0, 1, 0, 1},
+ {&__pyx_kp_u_b_w_https_ftp_w_w_w_b, __pyx_k_b_w_https_ftp_w_w_w_b, sizeof(__pyx_k_b_w_https_ftp_w_w_w_b), 0, 1, 0, 0},
+ {&__pyx_n_s_batch, __pyx_k_batch, sizeof(__pyx_k_batch), 0, 0, 1, 1},
+ {&__pyx_n_u_ben_Beng, __pyx_k_ben_Beng, sizeof(__pyx_k_ben_Beng), 0, 1, 0, 1},
+ {&__pyx_n_u_bho_Deva, __pyx_k_bho_Deva, sizeof(__pyx_k_bho_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_bn, __pyx_k_bn, sizeof(__pyx_k_bn), 0, 1, 0, 1},
+ {&__pyx_n_u_brx_Deva, __pyx_k_brx_Deva, sizeof(__pyx_k_brx_Deva), 0, 1, 0, 1},
+ {&__pyx_n_s_chr, __pyx_k_chr, sizeof(__pyx_k_chr), 0, 0, 1, 1},
+ {&__pyx_n_s_cinit___locals_lambda, __pyx_k_cinit___locals_lambda, sizeof(__pyx_k_cinit___locals_lambda), 0, 0, 1, 1},
+ {&__pyx_n_s_clear, __pyx_k_clear, sizeof(__pyx_k_clear), 0, 0, 1, 1},
+ {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1},
+ {&__pyx_n_s_compile, __pyx_k_compile, sizeof(__pyx_k_compile), 0, 0, 1, 1},
+ {&__pyx_kp_u_d, __pyx_k_d, sizeof(__pyx_k_d), 0, 1, 0, 0},
+ {&__pyx_kp_u_d_d, __pyx_k_d_d, sizeof(__pyx_k_d_d), 0, 1, 0, 0},
+ {&__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d, __pyx_k_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d, sizeof(__pyx_k_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d), 0, 1, 0, 0},
+ {&__pyx_n_s_desc, __pyx_k_desc, sizeof(__pyx_k_desc), 0, 0, 1, 1},
+ {&__pyx_n_s_detokenize, __pyx_k_detokenize, sizeof(__pyx_k_detokenize), 0, 0, 1, 1},
+ {&__pyx_kp_u_disable, __pyx_k_disable, sizeof(__pyx_k_disable), 0, 1, 0, 0},
+ {&__pyx_n_u_doi_Deva, __pyx_k_doi_Deva, sizeof(__pyx_k_doi_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_en, __pyx_k_en, sizeof(__pyx_k_en), 0, 1, 0, 1},
+ {&__pyx_kp_u_enable, __pyx_k_enable, sizeof(__pyx_k_enable), 0, 1, 0, 0},
+ {&__pyx_n_u_eng_Latn, __pyx_k_eng_Latn, sizeof(__pyx_k_eng_Latn), 0, 1, 0, 1},
+ {&__pyx_n_s_escape, __pyx_k_escape, sizeof(__pyx_k_escape), 0, 0, 1, 1},
+ {&__pyx_n_s_findall, __pyx_k_findall, sizeof(__pyx_k_findall), 0, 0, 1, 1},
+ {&__pyx_kp_u_gc, __pyx_k_gc, sizeof(__pyx_k_gc), 0, 1, 0, 0},
+ {&__pyx_n_s_get, __pyx_k_get, sizeof(__pyx_k_get), 0, 0, 1, 1},
+ {&__pyx_n_s_get_normalizer, __pyx_k_get_normalizer, sizeof(__pyx_k_get_normalizer), 0, 0, 1, 1},
+ {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1},
+ {&__pyx_n_u_gom_Deva, __pyx_k_gom_Deva, sizeof(__pyx_k_gom_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_gon_Deva, __pyx_k_gon_Deva, sizeof(__pyx_k_gon_Deva), 0, 1, 0, 1},
+ {&__pyx_n_s_group, __pyx_k_group, sizeof(__pyx_k_group), 0, 0, 1, 1},
+ {&__pyx_n_u_gu, __pyx_k_gu, sizeof(__pyx_k_gu), 0, 1, 0, 1},
+ {&__pyx_n_u_guj_Gujr, __pyx_k_guj_Gujr, sizeof(__pyx_k_guj_Gujr), 0, 1, 0, 1},
+ {&__pyx_n_u_hi, __pyx_k_hi, sizeof(__pyx_k_hi), 0, 1, 0, 1},
+ {&__pyx_n_u_hin_Deva, __pyx_k_hin_Deva, sizeof(__pyx_k_hin_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_hne_Deva, __pyx_k_hne_Deva, sizeof(__pyx_k_hne_Deva), 0, 1, 0, 1},
+ {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
+ {&__pyx_n_s_indic_detokenize, __pyx_k_indic_detokenize, sizeof(__pyx_k_indic_detokenize), 0, 0, 1, 1},
+ {&__pyx_n_s_indic_tokenize, __pyx_k_indic_tokenize, sizeof(__pyx_k_indic_tokenize), 0, 0, 1, 1},
+ {&__pyx_n_s_indicnlp_normalize_indic_normali, __pyx_k_indicnlp_normalize_indic_normali, sizeof(__pyx_k_indicnlp_normalize_indic_normali), 0, 0, 1, 1},
+ {&__pyx_n_s_indicnlp_tokenize, __pyx_k_indicnlp_tokenize, sizeof(__pyx_k_indicnlp_tokenize), 0, 0, 1, 1},
+ {&__pyx_n_s_indicnlp_transliterate_unicode_t, __pyx_k_indicnlp_transliterate_unicode_t, sizeof(__pyx_k_indicnlp_transliterate_unicode_t), 0, 0, 1, 1},
+ {&__pyx_n_s_inference, __pyx_k_inference, sizeof(__pyx_k_inference), 0, 0, 1, 1},
+ {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1},
+ {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1},
+ {&__pyx_n_s_is_target, __pyx_k_is_target, sizeof(__pyx_k_is_target), 0, 0, 1, 1},
+ {&__pyx_kp_u_isenabled, __pyx_k_isenabled, sizeof(__pyx_k_isenabled), 0, 1, 0, 0},
+ {&__pyx_n_s_items, __pyx_k_items, sizeof(__pyx_k_items), 0, 0, 1, 1},
+ {&__pyx_n_u_kK, __pyx_k_kK, sizeof(__pyx_k_kK), 0, 1, 0, 1},
+ {&__pyx_n_u_kan_Knda, __pyx_k_kan_Knda, sizeof(__pyx_k_kan_Knda), 0, 1, 0, 1},
+ {&__pyx_n_u_kas_Arab, __pyx_k_kas_Arab, sizeof(__pyx_k_kas_Arab), 0, 1, 0, 1},
+ {&__pyx_n_u_kas_Deva, __pyx_k_kas_Deva, sizeof(__pyx_k_kas_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_kha_Latn, __pyx_k_kha_Latn, sizeof(__pyx_k_kha_Latn), 0, 1, 0, 1},
+ {&__pyx_n_u_kn, __pyx_k_kn, sizeof(__pyx_k_kn), 0, 1, 0, 1},
+ {&__pyx_n_s_lang, __pyx_k_lang, sizeof(__pyx_k_lang), 0, 0, 1, 1},
+ {&__pyx_n_u_line, __pyx_k_line, sizeof(__pyx_k_line), 0, 1, 0, 1},
+ {&__pyx_n_u_lus_Latn, __pyx_k_lus_Latn, sizeof(__pyx_k_lus_Latn), 0, 1, 0, 1},
+ {&__pyx_n_s_m, __pyx_k_m, sizeof(__pyx_k_m), 0, 0, 1, 1},
+ {&__pyx_n_u_mag_Deva, __pyx_k_mag_Deva, sizeof(__pyx_k_mag_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_mai_Deva, __pyx_k_mai_Deva, sizeof(__pyx_k_mai_Deva), 0, 1, 0, 1},
+ {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
+ {&__pyx_n_u_mal_Mlym, __pyx_k_mal_Mlym, sizeof(__pyx_k_mal_Mlym), 0, 1, 0, 1},
+ {&__pyx_n_u_mar_Deva, __pyx_k_mar_Deva, sizeof(__pyx_k_mar_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_ml, __pyx_k_ml, sizeof(__pyx_k_ml), 0, 1, 0, 1},
+ {&__pyx_n_u_mni_Beng, __pyx_k_mni_Beng, sizeof(__pyx_k_mni_Beng), 0, 1, 0, 1},
+ {&__pyx_n_u_mni_Mtei, __pyx_k_mni_Mtei, sizeof(__pyx_k_mni_Mtei), 0, 1, 0, 1},
+ {&__pyx_n_u_mr, __pyx_k_mr, sizeof(__pyx_k_mr), 0, 1, 0, 1},
+ {&__pyx_kp_u_n, __pyx_k_n, sizeof(__pyx_k_n), 0, 1, 0, 0},
+ {&__pyx_kp_u_n_2, __pyx_k_n_2, sizeof(__pyx_k_n_2), 0, 1, 0, 0},
+ {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1},
+ {&__pyx_n_u_ne, __pyx_k_ne, sizeof(__pyx_k_ne), 0, 1, 0, 1},
+ {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0},
+ {&__pyx_n_s_normalize, __pyx_k_normalize, sizeof(__pyx_k_normalize), 0, 0, 1, 1},
+ {&__pyx_n_u_npi_Deva, __pyx_k_npi_Deva, sizeof(__pyx_k_npi_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_or, __pyx_k_or, sizeof(__pyx_k_or), 0, 1, 0, 1},
+ {&__pyx_n_u_ory, __pyx_k_ory, sizeof(__pyx_k_ory), 0, 1, 0, 1},
+ {&__pyx_n_u_ory_Orya, __pyx_k_ory_Orya, sizeof(__pyx_k_ory_Orya), 0, 1, 0, 1},
+ {&__pyx_n_u_pa, __pyx_k_pa, sizeof(__pyx_k_pa), 0, 1, 0, 1},
+ {&__pyx_n_u_pan_Guru, __pyx_k_pan_Guru, sizeof(__pyx_k_pan_Guru), 0, 1, 0, 1},
+ {&__pyx_n_s_postprocess_batch, __pyx_k_postprocess_batch, sizeof(__pyx_k_postprocess_batch), 0, 0, 1, 1},
+ {&__pyx_n_s_preprocess_batch, __pyx_k_preprocess_batch, sizeof(__pyx_k_preprocess_batch), 0, 0, 1, 1},
+ {&__pyx_n_s_put, __pyx_k_put, sizeof(__pyx_k_put), 0, 0, 1, 1},
+ {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1},
+ {&__pyx_n_s_pyx_vtable, __pyx_k_pyx_vtable, sizeof(__pyx_k_pyx_vtable), 0, 0, 1, 1},
+ {&__pyx_n_s_queue, __pyx_k_queue, sizeof(__pyx_k_queue), 0, 0, 1, 1},
+ {&__pyx_kp_u_r, __pyx_k_r, sizeof(__pyx_k_r), 0, 1, 0, 0},
+ {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1},
+ {&__pyx_n_s_re, __pyx_k_re, sizeof(__pyx_k_re), 0, 0, 1, 1},
+ {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1},
+ {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1},
+ {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1},
+ {&__pyx_n_s_regex, __pyx_k_regex, sizeof(__pyx_k_regex), 0, 0, 1, 1},
+ {&__pyx_n_s_replace, __pyx_k_replace, sizeof(__pyx_k_replace), 0, 0, 1, 1},
+ {&__pyx_kp_u_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 1, 0, 0},
+ {&__pyx_kp_u_s_2, __pyx_k_s_2, sizeof(__pyx_k_s_2), 0, 1, 0, 0},
+ {&__pyx_kp_u_s_3, __pyx_k_s_3, sizeof(__pyx_k_s_3), 0, 1, 0, 0},
+ {&__pyx_kp_u_s_s, __pyx_k_s_s, sizeof(__pyx_k_s_s), 0, 1, 0, 0},
+ {&__pyx_kp_u_s_s_2, __pyx_k_s_s_2, sizeof(__pyx_k_s_s_2), 0, 1, 0, 0},
+ {&__pyx_n_s_sacremoses, __pyx_k_sacremoses, sizeof(__pyx_k_sacremoses), 0, 0, 1, 1},
+ {&__pyx_n_u_san_Deva, __pyx_k_san_Deva, sizeof(__pyx_k_san_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_sat_Olck, __pyx_k_sat_Olck, sizeof(__pyx_k_sat_Olck), 0, 1, 0, 1},
+ {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1},
+ {&__pyx_n_s_sents, __pyx_k_sents, sizeof(__pyx_k_sents), 0, 0, 1, 1},
+ {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1},
+ {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1},
+ {&__pyx_n_u_snd_Arab, __pyx_k_snd_Arab, sizeof(__pyx_k_snd_Arab), 0, 1, 0, 1},
+ {&__pyx_n_u_snd_Deva, __pyx_k_snd_Deva, sizeof(__pyx_k_snd_Deva), 0, 1, 0, 1},
+ {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1},
+ {&__pyx_n_s_split, __pyx_k_split, sizeof(__pyx_k_split), 0, 0, 1, 1},
+ {&__pyx_n_s_src_lang, __pyx_k_src_lang, sizeof(__pyx_k_src_lang), 0, 0, 1, 1},
+ {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0},
+ {&__pyx_n_s_strip, __pyx_k_strip, sizeof(__pyx_k_strip), 0, 0, 1, 1},
+ {&__pyx_n_s_sub, __pyx_k_sub, sizeof(__pyx_k_sub), 0, 0, 1, 1},
+ {&__pyx_n_u_ta, __pyx_k_ta, sizeof(__pyx_k_ta), 0, 1, 0, 1},
+ {&__pyx_n_u_tam_Taml, __pyx_k_tam_Taml, sizeof(__pyx_k_tam_Taml), 0, 1, 0, 1},
+ {&__pyx_n_u_te, __pyx_k_te, sizeof(__pyx_k_te), 0, 1, 0, 1},
+ {&__pyx_n_u_tel_Telu, __pyx_k_tel_Telu, sizeof(__pyx_k_tel_Telu), 0, 1, 0, 1},
+ {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
+ {&__pyx_n_s_tgt_lang, __pyx_k_tgt_lang, sizeof(__pyx_k_tgt_lang), 0, 0, 1, 1},
+ {&__pyx_n_s_tokenize, __pyx_k_tokenize, sizeof(__pyx_k_tokenize), 0, 0, 1, 1},
+ {&__pyx_n_s_total, __pyx_k_total, sizeof(__pyx_k_total), 0, 0, 1, 1},
+ {&__pyx_n_s_tqdm, __pyx_k_tqdm, sizeof(__pyx_k_tqdm), 0, 0, 1, 1},
+ {&__pyx_n_s_translate, __pyx_k_translate, sizeof(__pyx_k_translate), 0, 0, 1, 1},
+ {&__pyx_n_s_transliterate, __pyx_k_transliterate, sizeof(__pyx_k_transliterate), 0, 0, 1, 1},
+ {&__pyx_n_s_trivial_detokenize, __pyx_k_trivial_detokenize, sizeof(__pyx_k_trivial_detokenize), 0, 0, 1, 1},
+ {&__pyx_n_s_trivial_tokenize, __pyx_k_trivial_tokenize, sizeof(__pyx_k_trivial_tokenize), 0, 0, 1, 1},
+ {&__pyx_n_s_typing, __pyx_k_typing, sizeof(__pyx_k_typing), 0, 0, 1, 1},
+ {&__pyx_n_s_unit, __pyx_k_unit, sizeof(__pyx_k_unit), 0, 0, 1, 1},
+ {&__pyx_n_u_unr_Deva, __pyx_k_unr_Deva, sizeof(__pyx_k_unr_Deva), 0, 1, 0, 1},
+ {&__pyx_n_u_ur, __pyx_k_ur, sizeof(__pyx_k_ur), 0, 1, 0, 1},
+ {&__pyx_n_u_urd_Arab, __pyx_k_urd_Arab, sizeof(__pyx_k_urd_Arab), 0, 1, 0, 1},
+ {&__pyx_n_s_visualize, __pyx_k_visualize, sizeof(__pyx_k_visualize), 0, 0, 1, 1},
+ {0, 0, 0, 0, 0, 0, 0}
+ };
+ return __Pyx_InitStrings(__pyx_string_tab);
+}
+/* #### Code section: cached_builtins ### */
+static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
+ __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 145, __pyx_L1_error)
+ __pyx_builtin_chr = __Pyx_GetBuiltinName(__pyx_n_s_chr); if (!__pyx_builtin_chr) __PYX_ERR(0, 146, __pyx_L1_error)
+ __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(1, 2, __pyx_L1_error)
+ return 0;
+ __pyx_L1_error:;
+ return -1;
+}
+/* #### Code section: cached_constants ### */
+
+static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+ /* "IndicTransToolkit/processor.pyx":312
+ *
+ * # Clean up any remaining placeholder artifacts
+ * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]") # <<<<<<<<<<<<<<
+ * self._placeholder_entity_maps.put(placeholder_entity_map)
+ * return text
+ */
+ __pyx_tuple__150 = PyTuple_Pack(2, __pyx_kp_u__149, __pyx_kp_u__141); if (unlikely(!__pyx_tuple__150)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__150);
+ __Pyx_GIVEREF(__pyx_tuple__150);
+ __pyx_tuple__152 = PyTuple_Pack(2, __pyx_kp_u__151, __pyx_kp_u__143); if (unlikely(!__pyx_tuple__152)) __PYX_ERR(0, 312, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__152);
+ __Pyx_GIVEREF(__pyx_tuple__152);
+
+ /* "IndicTransToolkit/processor.pyx":427
+ * if script_code in ["Arab", "Aran"]:
+ * sent = (
+ * sent.replace(" ", "") # <<<<<<<<<<<<<<
+ * .replace(" ", "")
+ * .replace(" ", "")
+ */
+ __pyx_tuple__158 = PyTuple_Pack(2, __pyx_kp_u__156, __pyx_kp_u__157); if (unlikely(!__pyx_tuple__158)) __PYX_ERR(0, 427, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__158);
+ __Pyx_GIVEREF(__pyx_tuple__158);
+
+ /* "IndicTransToolkit/processor.pyx":428
+ * sent = (
+ * sent.replace(" ", "")
+ * .replace(" ", "") # <<<<<<<<<<<<<<
+ * .replace(" ", "")
+ * .replace("", "")
+ */
+ __pyx_tuple__161 = PyTuple_Pack(2, __pyx_kp_u__159, __pyx_kp_u__160); if (unlikely(!__pyx_tuple__161)) __PYX_ERR(0, 428, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__161);
+ __Pyx_GIVEREF(__pyx_tuple__161);
+
+ /* "IndicTransToolkit/processor.pyx":429
+ * sent.replace(" ", "")
+ * .replace(" ", "")
+ * .replace(" ", "") # <<<<<<<<<<<<<<
+ * .replace("", "")
+ * )
+ */
+ __pyx_tuple__164 = PyTuple_Pack(2, __pyx_kp_u__162, __pyx_kp_u__163); if (unlikely(!__pyx_tuple__164)) __PYX_ERR(0, 429, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__164);
+ __Pyx_GIVEREF(__pyx_tuple__164);
+
+ /* "IndicTransToolkit/processor.pyx":430
+ * .replace(" ", "")
+ * .replace(" ", "")
+ * .replace("", "") # <<<<<<<<<<<<<<
+ * )
+ *
+ */
+ __pyx_tuple__167 = PyTuple_Pack(2, __pyx_kp_u__165, __pyx_n_u__166); if (unlikely(!__pyx_tuple__167)) __PYX_ERR(0, 430, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__167);
+ __Pyx_GIVEREF(__pyx_tuple__167);
+
+ /* "IndicTransToolkit/processor.pyx":435
+ * # Oriya fix
+ * if lang_code == "ory":
+ * sent = sent.replace("", "") # <<<<<<<<<<<<<<
+ *
+ * # Restore placeholders
+ */
+ __pyx_tuple__170 = PyTuple_Pack(2, __pyx_kp_u__168, __pyx_n_u__169); if (unlikely(!__pyx_tuple__170)) __PYX_ERR(0, 435, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__170);
+ __Pyx_GIVEREF(__pyx_tuple__170);
+
+ /* "IndicTransToolkit/processor.pyx":449
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ * cpdef list preprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] batch,
+ */
+ __pyx_tuple__172 = PyTuple_Pack(6, __pyx_n_s_self, __pyx_n_s_batch, __pyx_n_s_src_lang, __pyx_n_s_tgt_lang, __pyx_n_s_is_target, __pyx_n_s_visualize); if (unlikely(!__pyx_tuple__172)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__172);
+ __Pyx_GIVEREF(__pyx_tuple__172);
+ __pyx_codeobj__173 = (PyObject*)__Pyx_PyCode_New(6, 0, 0, 6, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__172, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_IndicTransToolkit_processor_pyx, __pyx_n_s_preprocess_batch, 449, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__173)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __pyx_tuple__174 = PyTuple_Pack(3, Py_None, Py_False, Py_False); if (unlikely(!__pyx_tuple__174)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__174);
+ __Pyx_GIVEREF(__pyx_tuple__174);
+
+ /* "IndicTransToolkit/processor.pyx":479
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ * cpdef list postprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] sents,
+ */
+ __pyx_tuple__175 = PyTuple_Pack(4, __pyx_n_s_self, __pyx_n_s_sents, __pyx_n_s_lang, __pyx_n_s_visualize); if (unlikely(!__pyx_tuple__175)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__175);
+ __Pyx_GIVEREF(__pyx_tuple__175);
+ __pyx_codeobj__176 = (PyObject*)__Pyx_PyCode_New(4, 0, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__175, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_IndicTransToolkit_processor_pyx, __pyx_n_s_postprocess_batch, 479, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__176)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __pyx_tuple__177 = PyTuple_Pack(2, __pyx_n_u_hin_Deva, Py_False); if (unlikely(!__pyx_tuple__177)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__177);
+ __Pyx_GIVEREF(__pyx_tuple__177);
+
+ /* "(tree fragment)":1
+ * def __reduce_cython__(self): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state):
+ */
+ __pyx_tuple__178 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__178)) __PYX_ERR(1, 1, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__178);
+ __Pyx_GIVEREF(__pyx_tuple__178);
+ __pyx_codeobj__179 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__178, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__179)) __PYX_ERR(1, 1, __pyx_L1_error)
+
+ /* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ */
+ __pyx_tuple__180 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_pyx_state); if (unlikely(!__pyx_tuple__180)) __PYX_ERR(1, 3, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_tuple__180);
+ __Pyx_GIVEREF(__pyx_tuple__180);
+ __pyx_codeobj__181 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__180, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__181)) __PYX_ERR(1, 3, __pyx_L1_error)
+ __Pyx_RefNannyFinishContext();
+ return 0;
+ __pyx_L1_error:;
+ __Pyx_RefNannyFinishContext();
+ return -1;
+}
+/* #### Code section: init_constants ### */
+
+static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) {
+ __pyx_umethod_PyDict_Type_get.type = (PyObject*)&PyDict_Type;
+ __pyx_umethod_PyDict_Type_get.method_name = &__pyx_n_s_get;
+ __pyx_umethod_PyUnicode_Type_strip.type = (PyObject*)&PyUnicode_Type;
+ __pyx_umethod_PyUnicode_Type_strip.method_name = &__pyx_n_s_strip;
+ __pyx_umethod_PyUnicode_Type_translate.type = (PyObject*)&PyUnicode_Type;
+ __pyx_umethod_PyUnicode_Type_translate.method_name = &__pyx_n_s_translate;
+ if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error);
+ __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error)
+ return 0;
+ __pyx_L1_error:;
+ return -1;
+}
+/* #### Code section: init_globals ### */
+
+static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {
+ return 0;
+}
+/* #### Code section: init_module ### */
+
+static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/
+
+static int __Pyx_modinit_global_init_code(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0);
+ /*--- Global init code ---*/
+ __Pyx_RefNannyFinishContext();
+ return 0;
+}
+
+static int __Pyx_modinit_variable_export_code(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0);
+ /*--- Variable export code ---*/
+ __Pyx_RefNannyFinishContext();
+ return 0;
+}
+
+static int __Pyx_modinit_function_export_code(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0);
+ /*--- Function export code ---*/
+ __Pyx_RefNannyFinishContext();
+ return 0;
+}
+
+static int __Pyx_modinit_type_init_code(void) {
+ __Pyx_RefNannyDeclarations
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
+ /*--- Type init code ---*/
+ __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor = &__pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._apply_punc_replacements = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__apply_punc_replacements;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._punc_norm = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__punc_norm;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._wrap_with_placeholders = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__wrap_with_placeholders;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._normalize = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__normalize;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._do_indic_tokenize_and_transliterate = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, int))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__do_indic_tokenize_and_transliterate;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._preprocess = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, PyObject *, int))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__preprocess;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._postprocess = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__postprocess;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor.preprocess_batch = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch;
+ __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor.postprocess_batch = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch;
+ #if CYTHON_USE_TYPE_SPECS
+ __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_17IndicTransToolkit_9processor_IndicProcessor_spec, NULL); if (unlikely(!__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor)) __PYX_ERR(0, 20, __pyx_L1_error)
+ if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_17IndicTransToolkit_9processor_IndicProcessor_spec, __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
+ #else
+ __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor = &__pyx_type_17IndicTransToolkit_9processor_IndicProcessor;
+ #endif
+ #if !CYTHON_COMPILING_IN_LIMITED_API
+ #endif
+ #if !CYTHON_USE_TYPE_SPECS
+ if (__Pyx_PyType_Ready(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_print = 0;
+ #endif
+ #if !CYTHON_COMPILING_IN_LIMITED_API
+ if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_dictoffset && __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_getattro == PyObject_GenericGetAttr)) {
+ __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_getattro = __Pyx_PyObject_GenericGetAttr;
+ }
+ #endif
+ if (__Pyx_SetVtable(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor, __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
+ #if !CYTHON_COMPILING_IN_LIMITED_API
+ if (__Pyx_MergeVtables(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
+ #endif
+ if (PyObject_SetAttr(__pyx_m, __pyx_n_s_IndicProcessor, (PyObject *) __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
+ #if !CYTHON_COMPILING_IN_LIMITED_API
+ if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
+ #endif
+ __Pyx_RefNannyFinishContext();
+ return 0;
+ __pyx_L1_error:;
+ __Pyx_RefNannyFinishContext();
+ return -1;
+}
+
+static int __Pyx_modinit_type_import_code(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0);
+ /*--- Type import code ---*/
+ __Pyx_RefNannyFinishContext();
+ return 0;
+}
+
+static int __Pyx_modinit_variable_import_code(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0);
+ /*--- Variable import code ---*/
+ __Pyx_RefNannyFinishContext();
+ return 0;
+}
+
+static int __Pyx_modinit_function_import_code(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0);
+ /*--- Function import code ---*/
+ __Pyx_RefNannyFinishContext();
+ return 0;
+}
+
+
+#if PY_MAJOR_VERSION >= 3
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/
+static int __pyx_pymod_exec_processor(PyObject* module); /*proto*/
+static PyModuleDef_Slot __pyx_moduledef_slots[] = {
+ {Py_mod_create, (void*)__pyx_pymod_create},
+ {Py_mod_exec, (void*)__pyx_pymod_exec_processor},
+ {0, NULL}
+};
+#endif
+
+#ifdef __cplusplus
+namespace {
+ struct PyModuleDef __pyx_moduledef =
+ #else
+ static struct PyModuleDef __pyx_moduledef =
+ #endif
+ {
+ PyModuleDef_HEAD_INIT,
+ "processor",
+ __pyx_k_Cython_version_of_the_IndicProc, /* m_doc */
+ #if CYTHON_PEP489_MULTI_PHASE_INIT
+ 0, /* m_size */
+ #elif CYTHON_USE_MODULE_STATE
+ sizeof(__pyx_mstate), /* m_size */
+ #else
+ -1, /* m_size */
+ #endif
+ __pyx_methods /* m_methods */,
+ #if CYTHON_PEP489_MULTI_PHASE_INIT
+ __pyx_moduledef_slots, /* m_slots */
+ #else
+ NULL, /* m_reload */
+ #endif
+ #if CYTHON_USE_MODULE_STATE
+ __pyx_m_traverse, /* m_traverse */
+ __pyx_m_clear, /* m_clear */
+ NULL /* m_free */
+ #else
+ NULL, /* m_traverse */
+ NULL, /* m_clear */
+ NULL /* m_free */
+ #endif
+ };
+ #ifdef __cplusplus
+} /* anonymous namespace */
+#endif
+#endif
+
+#ifndef CYTHON_NO_PYINIT_EXPORT
+#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
+#elif PY_MAJOR_VERSION < 3
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" void
+#else
+#define __Pyx_PyMODINIT_FUNC void
+#endif
+#else
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
+#else
+#define __Pyx_PyMODINIT_FUNC PyObject *
+#endif
+#endif
+
+
+#if PY_MAJOR_VERSION < 3
+__Pyx_PyMODINIT_FUNC initprocessor(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC initprocessor(void)
+#else
+__Pyx_PyMODINIT_FUNC PyInit_processor(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC PyInit_processor(void)
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+{
+ return PyModuleDef_Init(&__pyx_moduledef);
+}
+static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) {
+ #if PY_VERSION_HEX >= 0x030700A1
+ static PY_INT64_T main_interpreter_id = -1;
+ PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp);
+ if (main_interpreter_id == -1) {
+ main_interpreter_id = current_id;
+ return (unlikely(current_id == -1)) ? -1 : 0;
+ } else if (unlikely(main_interpreter_id != current_id))
+ #else
+ static PyInterpreterState *main_interpreter = NULL;
+ PyInterpreterState *current_interpreter = PyThreadState_Get()->interp;
+ if (!main_interpreter) {
+ main_interpreter = current_interpreter;
+ } else if (unlikely(main_interpreter != current_interpreter))
+ #endif
+ {
+ PyErr_SetString(
+ PyExc_ImportError,
+ "Interpreter change detected - this module can only be loaded into one interpreter per process.");
+ return -1;
+ }
+ return 0;
+}
+#if CYTHON_COMPILING_IN_LIMITED_API
+static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none)
+#else
+static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none)
+#endif
+{
+ PyObject *value = PyObject_GetAttrString(spec, from_name);
+ int result = 0;
+ if (likely(value)) {
+ if (allow_none || value != Py_None) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+ result = PyModule_AddObject(module, to_name, value);
+#else
+ result = PyDict_SetItemString(moddict, to_name, value);
+#endif
+ }
+ Py_DECREF(value);
+ } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ PyErr_Clear();
+ } else {
+ result = -1;
+ }
+ return result;
+}
+static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) {
+ PyObject *module = NULL, *moddict, *modname;
+ CYTHON_UNUSED_VAR(def);
+ if (__Pyx_check_single_interpreter())
+ return NULL;
+ if (__pyx_m)
+ return __Pyx_NewRef(__pyx_m);
+ modname = PyObject_GetAttrString(spec, "name");
+ if (unlikely(!modname)) goto bad;
+ module = PyModule_NewObject(modname);
+ Py_DECREF(modname);
+ if (unlikely(!module)) goto bad;
+#if CYTHON_COMPILING_IN_LIMITED_API
+ moddict = module;
+#else
+ moddict = PyModule_GetDict(module);
+ if (unlikely(!moddict)) goto bad;
+#endif
+ if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad;
+ if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad;
+ if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad;
+ if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad;
+ return module;
+bad:
+ Py_XDECREF(module);
+ return NULL;
+}
+
+
+static CYTHON_SMALL_CODE int __pyx_pymod_exec_processor(PyObject *__pyx_pyinit_module)
+#endif
+#endif
+{
+ int stringtab_initialized = 0;
+ #if CYTHON_USE_MODULE_STATE
+ int pystate_addmodule_run = 0;
+ #endif
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannyDeclarations
+ #if CYTHON_PEP489_MULTI_PHASE_INIT
+ if (__pyx_m) {
+ if (__pyx_m == __pyx_pyinit_module) return 0;
+ PyErr_SetString(PyExc_RuntimeError, "Module 'processor' has already been imported. Re-initialisation is not supported.");
+ return -1;
+ }
+ #elif PY_MAJOR_VERSION >= 3
+ if (__pyx_m) return __Pyx_NewRef(__pyx_m);
+ #endif
+ /*--- Module creation code ---*/
+ #if CYTHON_PEP489_MULTI_PHASE_INIT
+ __pyx_m = __pyx_pyinit_module;
+ Py_INCREF(__pyx_m);
+ #else
+ #if PY_MAJOR_VERSION < 3
+ __pyx_m = Py_InitModule4("processor", __pyx_methods, __pyx_k_Cython_version_of_the_IndicProc, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
+ if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
+ #elif CYTHON_USE_MODULE_STATE
+ __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+ {
+ int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef);
+ __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "processor" pseudovariable */
+ if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error)
+ pystate_addmodule_run = 1;
+ }
+ #else
+ __pyx_m = PyModule_Create(&__pyx_moduledef);
+ if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ #endif
+ CYTHON_UNUSED_VAR(__pyx_t_1);
+ __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error)
+ Py_INCREF(__pyx_d);
+ __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error)
+ __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error)
+ if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #if CYTHON_REFNANNY
+__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+if (!__Pyx_RefNanny) {
+ PyErr_Clear();
+ __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+ if (!__Pyx_RefNanny)
+ Py_FatalError("failed to import 'refnanny' module");
+}
+#endif
+ __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_processor(void)", 0);
+ if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #ifdef __Pxy_PyFrame_Initialize_Offsets
+ __Pxy_PyFrame_Initialize_Offsets();
+ #endif
+ __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error)
+ __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error)
+ __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error)
+ #ifdef __Pyx_CyFunction_USED
+ if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ #ifdef __Pyx_FusedFunction_USED
+ if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ #ifdef __Pyx_Coroutine_USED
+ if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ #ifdef __Pyx_Generator_USED
+ if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ #ifdef __Pyx_AsyncGen_USED
+ if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ #ifdef __Pyx_StopAsyncIteration_USED
+ if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ /*--- Library function declarations ---*/
+ /*--- Threads initialization code ---*/
+ #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+ PyEval_InitThreads();
+ #endif
+ /*--- Initialize various global constants etc. ---*/
+ if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ stringtab_initialized = 1;
+ if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
+ if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+ if (__pyx_module_is_main_IndicTransToolkit__processor) {
+ if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ }
+ #if PY_MAJOR_VERSION >= 3
+ {
+ PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
+ if (!PyDict_GetItemString(modules, "IndicTransToolkit.processor")) {
+ if (unlikely((PyDict_SetItemString(modules, "IndicTransToolkit.processor", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error)
+ }
+ }
+ #endif
+ /*--- Builtin init code ---*/
+ if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ /*--- Constants init code ---*/
+ if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ /*--- Global type/function init code ---*/
+ (void)__Pyx_modinit_global_init_code();
+ (void)__Pyx_modinit_variable_export_code();
+ (void)__Pyx_modinit_function_export_code();
+ if (unlikely((__Pyx_modinit_type_init_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error)
+ (void)__Pyx_modinit_type_import_code();
+ (void)__Pyx_modinit_variable_import_code();
+ (void)__Pyx_modinit_function_import_code();
+ /*--- Execution code ---*/
+ #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)
+ if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ #endif
+
+ /* "IndicTransToolkit/processor.pyx":8
+ * """
+ *
+ * import regex as re # <<<<<<<<<<<<<<
+ * from tqdm import tqdm
+ * from queue import Queue
+ */
+ __pyx_t_2 = __Pyx_ImportDottedModule(__pyx_n_s_regex, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 8, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_re, __pyx_t_2) < 0) __PYX_ERR(0, 8, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":9
+ *
+ * import regex as re
+ * from tqdm import tqdm # <<<<<<<<<<<<<<
+ * from queue import Queue
+ * from typing import List, Dict, Union
+ */
+ __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_INCREF(__pyx_n_s_tqdm);
+ __Pyx_GIVEREF(__pyx_n_s_tqdm);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_tqdm)) __PYX_ERR(0, 9, __pyx_L1_error);
+ __pyx_t_3 = __Pyx_Import(__pyx_n_s_tqdm, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 9, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_tqdm); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_tqdm, __pyx_t_2) < 0) __PYX_ERR(0, 9, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":10
+ * import regex as re
+ * from tqdm import tqdm
+ * from queue import Queue # <<<<<<<<<<<<<<
+ * from typing import List, Dict, Union
+ *
+ */
+ __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_INCREF(__pyx_n_s_Queue);
+ __Pyx_GIVEREF(__pyx_n_s_Queue);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_Queue)) __PYX_ERR(0, 10, __pyx_L1_error);
+ __pyx_t_2 = __Pyx_Import(__pyx_n_s_queue, __pyx_t_3, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_Queue); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_Queue, __pyx_t_3) < 0) __PYX_ERR(0, 10, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":11
+ * from tqdm import tqdm
+ * from queue import Queue
+ * from typing import List, Dict, Union # <<<<<<<<<<<<<<
+ *
+ * # Importing Python objects since these libraries don't offer C-extensions
+ */
+ __pyx_t_2 = PyList_New(3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_INCREF(__pyx_n_s_List);
+ __Pyx_GIVEREF(__pyx_n_s_List);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_List)) __PYX_ERR(0, 11, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_n_s_Dict);
+ __Pyx_GIVEREF(__pyx_n_s_Dict);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 1, __pyx_n_s_Dict)) __PYX_ERR(0, 11, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_n_s_Union);
+ __Pyx_GIVEREF(__pyx_n_s_Union);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 2, __pyx_n_s_Union)) __PYX_ERR(0, 11, __pyx_L1_error);
+ __pyx_t_3 = __Pyx_Import(__pyx_n_s_typing, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_List); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_List, __pyx_t_2) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_Dict); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_Dict, __pyx_t_2) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_Union); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_Union, __pyx_t_2) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":14
+ *
+ * # Importing Python objects since these libraries don't offer C-extensions
+ * from indicnlp.tokenize import indic_tokenize, indic_detokenize # <<<<<<<<<<<<<<
+ * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
+ * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer
+ */
+ __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 14, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_INCREF(__pyx_n_s_indic_tokenize);
+ __Pyx_GIVEREF(__pyx_n_s_indic_tokenize);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_indic_tokenize)) __PYX_ERR(0, 14, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_n_s_indic_detokenize);
+ __Pyx_GIVEREF(__pyx_n_s_indic_detokenize);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_n_s_indic_detokenize)) __PYX_ERR(0, 14, __pyx_L1_error);
+ __pyx_t_2 = __Pyx_Import(__pyx_n_s_indicnlp_tokenize, __pyx_t_3, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 14, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_indic_tokenize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 14, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_indic_tokenize, __pyx_t_3) < 0) __PYX_ERR(0, 14, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_indic_detokenize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 14, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_indic_detokenize, __pyx_t_3) < 0) __PYX_ERR(0, 14, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":15
+ * # Importing Python objects since these libraries don't offer C-extensions
+ * from indicnlp.tokenize import indic_tokenize, indic_detokenize
+ * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory # <<<<<<<<<<<<<<
+ * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer
+ * from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator
+ */
+ __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 15, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_INCREF(__pyx_n_s_IndicNormalizerFactory);
+ __Pyx_GIVEREF(__pyx_n_s_IndicNormalizerFactory);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_IndicNormalizerFactory)) __PYX_ERR(0, 15, __pyx_L1_error);
+ __pyx_t_3 = __Pyx_Import(__pyx_n_s_indicnlp_normalize_indic_normali, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 15, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_IndicNormalizerFactory); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 15, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_IndicNormalizerFactory, __pyx_t_2) < 0) __PYX_ERR(0, 15, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":16
+ * from indicnlp.tokenize import indic_tokenize, indic_detokenize
+ * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
+ * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer # <<<<<<<<<<<<<<
+ * from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator
+ *
+ */
+ __pyx_t_3 = PyList_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_INCREF(__pyx_n_s_MosesPunctNormalizer);
+ __Pyx_GIVEREF(__pyx_n_s_MosesPunctNormalizer);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_MosesPunctNormalizer)) __PYX_ERR(0, 16, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_n_s_MosesTokenizer);
+ __Pyx_GIVEREF(__pyx_n_s_MosesTokenizer);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_n_s_MosesTokenizer)) __PYX_ERR(0, 16, __pyx_L1_error);
+ __Pyx_INCREF(__pyx_n_s_MosesDetokenizer);
+ __Pyx_GIVEREF(__pyx_n_s_MosesDetokenizer);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 2, __pyx_n_s_MosesDetokenizer)) __PYX_ERR(0, 16, __pyx_L1_error);
+ __pyx_t_2 = __Pyx_Import(__pyx_n_s_sacremoses, __pyx_t_3, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_MosesPunctNormalizer); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_MosesPunctNormalizer, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_MosesTokenizer); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_MosesTokenizer, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_MosesDetokenizer); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_MosesDetokenizer, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":17
+ * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
+ * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer
+ * from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator # <<<<<<<<<<<<<<
+ *
+ *
+ */
+ __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 17, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_INCREF(__pyx_n_s_UnicodeIndicTransliterator);
+ __Pyx_GIVEREF(__pyx_n_s_UnicodeIndicTransliterator);
+ if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_UnicodeIndicTransliterator)) __PYX_ERR(0, 17, __pyx_L1_error);
+ __pyx_t_3 = __Pyx_Import(__pyx_n_s_indicnlp_transliterate_unicode_t, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_UnicodeIndicTransliterator); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 17, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_UnicodeIndicTransliterator, __pyx_t_2) < 0) __PYX_ERR(0, 17, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":449
+ *
+ * # Exposed Method: Preprocess a Batch of Sentences
+ * cpdef list preprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] batch,
+ */
+ __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor_preprocess_batch, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__173)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_3, __pyx_tuple__174);
+ if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor, __pyx_n_s_preprocess_batch, __pyx_t_3) < 0) __PYX_ERR(0, 449, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ PyType_Modified(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor);
+
+ /* "IndicTransToolkit/processor.pyx":479
+ *
+ * # Exposed Method: Postprocess a Batch of Sentences
+ * cpdef list postprocess_batch( # <<<<<<<<<<<<<<
+ * self,
+ * List[str] sents,
+ */
+ __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor_postprocess_batch, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__176)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_3, __pyx_tuple__177);
+ if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor, __pyx_n_s_postprocess_batch, __pyx_t_3) < 0) __PYX_ERR(0, 479, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ PyType_Modified(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor);
+
+ /* "(tree fragment)":1
+ * def __reduce_cython__(self): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state):
+ */
+ __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor___reduce_cython, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__179)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_3) < 0) __PYX_ERR(1, 1, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<<
+ * raise TypeError, "no default __reduce__ due to non-trivial __cinit__"
+ */
+ __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor___setstate_cython, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__181)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 3, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_3) < 0) __PYX_ERR(1, 3, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "IndicTransToolkit/processor.pyx":1
+ * # cython: language_level=3, boundscheck=False, cdivision=True, wraparound=False # <<<<<<<<<<<<<<
+ * """
+ * Cython version of the IndicProcessor class with optimizations for performance.
+ */
+ __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 1, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /*--- Wrapped vars code ---*/
+
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ if (__pyx_m) {
+ if (__pyx_d && stringtab_initialized) {
+ __Pyx_AddTraceback("init IndicTransToolkit.processor", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ }
+ #if !CYTHON_USE_MODULE_STATE
+ Py_CLEAR(__pyx_m);
+ #else
+ Py_DECREF(__pyx_m);
+ if (pystate_addmodule_run) {
+ PyObject *tp, *value, *tb;
+ PyErr_Fetch(&tp, &value, &tb);
+ PyState_RemoveModule(&__pyx_moduledef);
+ PyErr_Restore(tp, value, tb);
+ }
+ #endif
+ } else if (!PyErr_Occurred()) {
+ PyErr_SetString(PyExc_ImportError, "init IndicTransToolkit.processor");
+ }
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ #if CYTHON_PEP489_MULTI_PHASE_INIT
+ return (__pyx_m != NULL) ? 0 : -1;
+ #elif PY_MAJOR_VERSION >= 3
+ return __pyx_m;
+ #else
+ return;
+ #endif
+}
+/* #### Code section: cleanup_globals ### */
+/* #### Code section: cleanup_module ### */
+/* #### Code section: main_method ### */
+/* #### Code section: utility_code_pragmas ### */
+#ifdef _MSC_VER
+#pragma warning( push )
+/* Warning 4127: conditional expression is constant
+ * Cython uses constant conditional expressions to allow in inline functions to be optimized at
+ * compile-time, so this warning is not useful
+ */
+#pragma warning( disable : 4127 )
+#endif
+
+
+
+/* #### Code section: utility_code_def ### */
+
+/* --- Runtime support code --- */
+/* Refnanny */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+ PyObject *m = NULL, *p = NULL;
+ void *r = NULL;
+ m = PyImport_ImportModule(modname);
+ if (!m) goto end;
+ p = PyObject_GetAttrString(m, "RefNannyAPI");
+ if (!p) goto end;
+ r = PyLong_AsVoidPtr(p);
+end:
+ Py_XDECREF(p);
+ Py_XDECREF(m);
+ return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif
+
+/* PyErrExceptionMatches */
+#if CYTHON_FAST_THREAD_STATE
+static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+ Py_ssize_t i, n;
+ n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+ for (i=0; i= 0x030C00A6
+ PyObject *current_exception = tstate->current_exception;
+ if (unlikely(!current_exception)) return 0;
+ exc_type = (PyObject*) Py_TYPE(current_exception);
+ if (exc_type == err) return 1;
+#else
+ exc_type = tstate->curexc_type;
+ if (exc_type == err) return 1;
+ if (unlikely(!exc_type)) return 0;
+#endif
+ #if CYTHON_AVOID_BORROWED_REFS
+ Py_INCREF(exc_type);
+ #endif
+ if (unlikely(PyTuple_Check(err))) {
+ result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err);
+ } else {
+ result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err);
+ }
+ #if CYTHON_AVOID_BORROWED_REFS
+ Py_DECREF(exc_type);
+ #endif
+ return result;
+}
+#endif
+
+/* PyErrFetchRestore */
+#if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+#if PY_VERSION_HEX >= 0x030C00A6
+ PyObject *tmp_value;
+ assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value)));
+ if (value) {
+ #if CYTHON_COMPILING_IN_CPYTHON
+ if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb))
+ #endif
+ PyException_SetTraceback(value, tb);
+ }
+ tmp_value = tstate->current_exception;
+ tstate->current_exception = value;
+ Py_XDECREF(tmp_value);
+ Py_XDECREF(type);
+ Py_XDECREF(tb);
+#else
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+ tmp_type = tstate->curexc_type;
+ tmp_value = tstate->curexc_value;
+ tmp_tb = tstate->curexc_traceback;
+ tstate->curexc_type = type;
+ tstate->curexc_value = value;
+ tstate->curexc_traceback = tb;
+ Py_XDECREF(tmp_type);
+ Py_XDECREF(tmp_value);
+ Py_XDECREF(tmp_tb);
+#endif
+}
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+#if PY_VERSION_HEX >= 0x030C00A6
+ PyObject* exc_value;
+ exc_value = tstate->current_exception;
+ tstate->current_exception = 0;
+ *value = exc_value;
+ *type = NULL;
+ *tb = NULL;
+ if (exc_value) {
+ *type = (PyObject*) Py_TYPE(exc_value);
+ Py_INCREF(*type);
+ #if CYTHON_COMPILING_IN_CPYTHON
+ *tb = ((PyBaseExceptionObject*) exc_value)->traceback;
+ Py_XINCREF(*tb);
+ #else
+ *tb = PyException_GetTraceback(exc_value);
+ #endif
+ }
+#else
+ *type = tstate->curexc_type;
+ *value = tstate->curexc_value;
+ *tb = tstate->curexc_traceback;
+ tstate->curexc_type = 0;
+ tstate->curexc_value = 0;
+ tstate->curexc_traceback = 0;
+#endif
+}
+#endif
+
+/* PyObjectGetAttrStr */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
+ PyTypeObject* tp = Py_TYPE(obj);
+ if (likely(tp->tp_getattro))
+ return tp->tp_getattro(obj, attr_name);
+#if PY_MAJOR_VERSION < 3
+ if (likely(tp->tp_getattr))
+ return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
+#endif
+ return PyObject_GetAttr(obj, attr_name);
+}
+#endif
+
+/* PyObjectGetAttrStrNoError */
+#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1
+static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) {
+ __Pyx_PyThreadState_declare
+ __Pyx_PyThreadState_assign
+ if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError)))
+ __Pyx_PyErr_Clear();
+}
+#endif
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) {
+ PyObject *result;
+#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1
+ (void) PyObject_GetOptionalAttr(obj, attr_name, &result);
+ return result;
+#else
+#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1
+ PyTypeObject* tp = Py_TYPE(obj);
+ if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) {
+ return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1);
+ }
+#endif
+ result = __Pyx_PyObject_GetAttrStr(obj, attr_name);
+ if (unlikely(!result)) {
+ __Pyx_PyObject_GetAttrStr_ClearAttributeError();
+ }
+ return result;
+#endif
+}
+
+/* GetBuiltinName */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
+ PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name);
+ if (unlikely(!result) && !PyErr_Occurred()) {
+ PyErr_Format(PyExc_NameError,
+#if PY_MAJOR_VERSION >= 3
+ "name '%U' is not defined", name);
+#else
+ "name '%.200s' is not defined", PyString_AS_STRING(name));
+#endif
+ }
+ return result;
+}
+
+/* TupleAndListFromArray */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) {
+ PyObject *v;
+ Py_ssize_t i;
+ for (i = 0; i < length; i++) {
+ v = dest[i] = src[i];
+ Py_INCREF(v);
+ }
+}
+static CYTHON_INLINE PyObject *
+__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
+{
+ PyObject *res;
+ if (n <= 0) {
+ Py_INCREF(__pyx_empty_tuple);
+ return __pyx_empty_tuple;
+ }
+ res = PyTuple_New(n);
+ if (unlikely(res == NULL)) return NULL;
+ __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n);
+ return res;
+}
+static CYTHON_INLINE PyObject *
+__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n)
+{
+ PyObject *res;
+ if (n <= 0) {
+ return PyList_New(0);
+ }
+ res = PyList_New(n);
+ if (unlikely(res == NULL)) return NULL;
+ __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n);
+ return res;
+}
+#endif
+
+/* BytesEquals */
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API
+ return PyObject_RichCompareBool(s1, s2, equals);
+#else
+ if (s1 == s2) {
+ return (equals == Py_EQ);
+ } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
+ const char *ps1, *ps2;
+ Py_ssize_t length = PyBytes_GET_SIZE(s1);
+ if (length != PyBytes_GET_SIZE(s2))
+ return (equals == Py_NE);
+ ps1 = PyBytes_AS_STRING(s1);
+ ps2 = PyBytes_AS_STRING(s2);
+ if (ps1[0] != ps2[0]) {
+ return (equals == Py_NE);
+ } else if (length == 1) {
+ return (equals == Py_EQ);
+ } else {
+ int result;
+#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000)
+ Py_hash_t hash1, hash2;
+ hash1 = ((PyBytesObject*)s1)->ob_shash;
+ hash2 = ((PyBytesObject*)s2)->ob_shash;
+ if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
+ return (equals == Py_NE);
+ }
+#endif
+ result = memcmp(ps1, ps2, (size_t)length);
+ return (equals == Py_EQ) ? (result == 0) : (result != 0);
+ }
+ } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
+ return (equals == Py_NE);
+ } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
+ return (equals == Py_NE);
+ } else {
+ int result;
+ PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+ if (!py_result)
+ return -1;
+ result = __Pyx_PyObject_IsTrue(py_result);
+ Py_DECREF(py_result);
+ return result;
+ }
+#endif
+}
+
+/* UnicodeEquals */
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API
+ return PyObject_RichCompareBool(s1, s2, equals);
+#else
+#if PY_MAJOR_VERSION < 3
+ PyObject* owned_ref = NULL;
+#endif
+ int s1_is_unicode, s2_is_unicode;
+ if (s1 == s2) {
+ goto return_eq;
+ }
+ s1_is_unicode = PyUnicode_CheckExact(s1);
+ s2_is_unicode = PyUnicode_CheckExact(s2);
+#if PY_MAJOR_VERSION < 3
+ if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) {
+ owned_ref = PyUnicode_FromObject(s2);
+ if (unlikely(!owned_ref))
+ return -1;
+ s2 = owned_ref;
+ s2_is_unicode = 1;
+ } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) {
+ owned_ref = PyUnicode_FromObject(s1);
+ if (unlikely(!owned_ref))
+ return -1;
+ s1 = owned_ref;
+ s1_is_unicode = 1;
+ } else if (((!s2_is_unicode) & (!s1_is_unicode))) {
+ return __Pyx_PyBytes_Equals(s1, s2, equals);
+ }
+#endif
+ if (s1_is_unicode & s2_is_unicode) {
+ Py_ssize_t length;
+ int kind;
+ void *data1, *data2;
+ if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0))
+ return -1;
+ length = __Pyx_PyUnicode_GET_LENGTH(s1);
+ if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) {
+ goto return_ne;
+ }
+#if CYTHON_USE_UNICODE_INTERNALS
+ {
+ Py_hash_t hash1, hash2;
+ #if CYTHON_PEP393_ENABLED
+ hash1 = ((PyASCIIObject*)s1)->hash;
+ hash2 = ((PyASCIIObject*)s2)->hash;
+ #else
+ hash1 = ((PyUnicodeObject*)s1)->hash;
+ hash2 = ((PyUnicodeObject*)s2)->hash;
+ #endif
+ if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
+ goto return_ne;
+ }
+ }
+#endif
+ kind = __Pyx_PyUnicode_KIND(s1);
+ if (kind != __Pyx_PyUnicode_KIND(s2)) {
+ goto return_ne;
+ }
+ data1 = __Pyx_PyUnicode_DATA(s1);
+ data2 = __Pyx_PyUnicode_DATA(s2);
+ if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) {
+ goto return_ne;
+ } else if (length == 1) {
+ goto return_eq;
+ } else {
+ int result = memcmp(data1, data2, (size_t)(length * kind));
+ #if PY_MAJOR_VERSION < 3
+ Py_XDECREF(owned_ref);
+ #endif
+ return (equals == Py_EQ) ? (result == 0) : (result != 0);
+ }
+ } else if ((s1 == Py_None) & s2_is_unicode) {
+ goto return_ne;
+ } else if ((s2 == Py_None) & s1_is_unicode) {
+ goto return_ne;
+ } else {
+ int result;
+ PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+ #if PY_MAJOR_VERSION < 3
+ Py_XDECREF(owned_ref);
+ #endif
+ if (!py_result)
+ return -1;
+ result = __Pyx_PyObject_IsTrue(py_result);
+ Py_DECREF(py_result);
+ return result;
+ }
+return_eq:
+ #if PY_MAJOR_VERSION < 3
+ Py_XDECREF(owned_ref);
+ #endif
+ return (equals == Py_EQ);
+return_ne:
+ #if PY_MAJOR_VERSION < 3
+ Py_XDECREF(owned_ref);
+ #endif
+ return (equals == Py_NE);
+#endif
+}
+
+/* fastcall */
+#if CYTHON_METH_FASTCALL
+static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s)
+{
+ Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames);
+ for (i = 0; i < n; i++)
+ {
+ if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i];
+ }
+ for (i = 0; i < n; i++)
+ {
+ int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ);
+ if (unlikely(eq != 0)) {
+ if (unlikely(eq < 0)) return NULL;
+ return kwvalues[i];
+ }
+ }
+ return NULL;
+}
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000
+CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) {
+ Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames);
+ PyObject *dict;
+ dict = PyDict_New();
+ if (unlikely(!dict))
+ return NULL;
+ for (i=0; i= 3
+ "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+ #else
+ "%s() got multiple values for keyword argument '%s'", func_name,
+ PyString_AsString(kw_name));
+ #endif
+}
+
+/* ParseKeywords */
+static int __Pyx_ParseOptionalKeywords(
+ PyObject *kwds,
+ PyObject *const *kwvalues,
+ PyObject **argnames[],
+ PyObject *kwds2,
+ PyObject *values[],
+ Py_ssize_t num_pos_args,
+ const char* function_name)
+{
+ PyObject *key = 0, *value = 0;
+ Py_ssize_t pos = 0;
+ PyObject*** name;
+ PyObject*** first_kw_arg = argnames + num_pos_args;
+ int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds));
+ while (1) {
+ Py_XDECREF(key); key = NULL;
+ Py_XDECREF(value); value = NULL;
+ if (kwds_is_tuple) {
+ Py_ssize_t size;
+#if CYTHON_ASSUME_SAFE_MACROS
+ size = PyTuple_GET_SIZE(kwds);
+#else
+ size = PyTuple_Size(kwds);
+ if (size < 0) goto bad;
+#endif
+ if (pos >= size) break;
+#if CYTHON_AVOID_BORROWED_REFS
+ key = __Pyx_PySequence_ITEM(kwds, pos);
+ if (!key) goto bad;
+#elif CYTHON_ASSUME_SAFE_MACROS
+ key = PyTuple_GET_ITEM(kwds, pos);
+#else
+ key = PyTuple_GetItem(kwds, pos);
+ if (!key) goto bad;
+#endif
+ value = kwvalues[pos];
+ pos++;
+ }
+ else
+ {
+ if (!PyDict_Next(kwds, &pos, &key, &value)) break;
+#if CYTHON_AVOID_BORROWED_REFS
+ Py_INCREF(key);
+#endif
+ }
+ name = first_kw_arg;
+ while (*name && (**name != key)) name++;
+ if (*name) {
+ values[name-argnames] = value;
+#if CYTHON_AVOID_BORROWED_REFS
+ Py_INCREF(value);
+ Py_DECREF(key);
+#endif
+ key = NULL;
+ value = NULL;
+ continue;
+ }
+#if !CYTHON_AVOID_BORROWED_REFS
+ Py_INCREF(key);
+#endif
+ Py_INCREF(value);
+ name = first_kw_arg;
+ #if PY_MAJOR_VERSION < 3
+ if (likely(PyString_Check(key))) {
+ while (*name) {
+ if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
+ && _PyString_Eq(**name, key)) {
+ values[name-argnames] = value;
+#if CYTHON_AVOID_BORROWED_REFS
+ value = NULL;
+#endif
+ break;
+ }
+ name++;
+ }
+ if (*name) continue;
+ else {
+ PyObject*** argname = argnames;
+ while (argname != first_kw_arg) {
+ if ((**argname == key) || (
+ (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
+ && _PyString_Eq(**argname, key))) {
+ goto arg_passed_twice;
+ }
+ argname++;
+ }
+ }
+ } else
+ #endif
+ if (likely(PyUnicode_Check(key))) {
+ while (*name) {
+ int cmp = (
+ #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+ (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+ #endif
+ PyUnicode_Compare(**name, key)
+ );
+ if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+ if (cmp == 0) {
+ values[name-argnames] = value;
+#if CYTHON_AVOID_BORROWED_REFS
+ value = NULL;
+#endif
+ break;
+ }
+ name++;
+ }
+ if (*name) continue;
+ else {
+ PyObject*** argname = argnames;
+ while (argname != first_kw_arg) {
+ int cmp = (**argname == key) ? 0 :
+ #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+ (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+ #endif
+ PyUnicode_Compare(**argname, key);
+ if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+ if (cmp == 0) goto arg_passed_twice;
+ argname++;
+ }
+ }
+ } else
+ goto invalid_keyword_type;
+ if (kwds2) {
+ if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+ } else {
+ goto invalid_keyword;
+ }
+ }
+ Py_XDECREF(key);
+ Py_XDECREF(value);
+ return 0;
+arg_passed_twice:
+ __Pyx_RaiseDoubleKeywordsError(function_name, key);
+ goto bad;
+invalid_keyword_type:
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() keywords must be strings", function_name);
+ goto bad;
+invalid_keyword:
+ #if PY_MAJOR_VERSION < 3
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() got an unexpected keyword argument '%.200s'",
+ function_name, PyString_AsString(key));
+ #else
+ PyErr_Format(PyExc_TypeError,
+ "%s() got an unexpected keyword argument '%U'",
+ function_name, key);
+ #endif
+bad:
+ Py_XDECREF(key);
+ Py_XDECREF(value);
+ return -1;
+}
+
+/* RaiseArgTupleInvalid */
+static void __Pyx_RaiseArgtupleInvalid(
+ const char* func_name,
+ int exact,
+ Py_ssize_t num_min,
+ Py_ssize_t num_max,
+ Py_ssize_t num_found)
+{
+ Py_ssize_t num_expected;
+ const char *more_or_less;
+ if (num_found < num_min) {
+ num_expected = num_min;
+ more_or_less = "at least";
+ } else {
+ num_expected = num_max;
+ more_or_less = "at most";
+ }
+ if (exact) {
+ more_or_less = "exactly";
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ func_name, more_or_less, num_expected,
+ (num_expected == 1) ? "" : "s", num_found);
+}
+
+/* PyFunctionFastCall */
+#if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL
+static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na,
+ PyObject *globals) {
+ PyFrameObject *f;
+ PyThreadState *tstate = __Pyx_PyThreadState_Current;
+ PyObject **fastlocals;
+ Py_ssize_t i;
+ PyObject *result;
+ assert(globals != NULL);
+ /* XXX Perhaps we should create a specialized
+ PyFrame_New() that doesn't take locals, but does
+ take builtins without sanity checking them.
+ */
+ assert(tstate != NULL);
+ f = PyFrame_New(tstate, co, globals, NULL);
+ if (f == NULL) {
+ return NULL;
+ }
+ fastlocals = __Pyx_PyFrame_GetLocalsplus(f);
+ for (i = 0; i < na; i++) {
+ Py_INCREF(*args);
+ fastlocals[i] = *args++;
+ }
+ result = PyEval_EvalFrameEx(f,0);
+ ++tstate->recursion_depth;
+ Py_DECREF(f);
+ --tstate->recursion_depth;
+ return result;
+}
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) {
+ PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
+ PyObject *globals = PyFunction_GET_GLOBALS(func);
+ PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
+ PyObject *closure;
+#if PY_MAJOR_VERSION >= 3
+ PyObject *kwdefs;
+#endif
+ PyObject *kwtuple, **k;
+ PyObject **d;
+ Py_ssize_t nd;
+ Py_ssize_t nk;
+ PyObject *result;
+ assert(kwargs == NULL || PyDict_Check(kwargs));
+ nk = kwargs ? PyDict_Size(kwargs) : 0;
+ #if PY_MAJOR_VERSION < 3
+ if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) {
+ return NULL;
+ }
+ #else
+ if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) {
+ return NULL;
+ }
+ #endif
+ if (
+#if PY_MAJOR_VERSION >= 3
+ co->co_kwonlyargcount == 0 &&
+#endif
+ likely(kwargs == NULL || nk == 0) &&
+ co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
+ if (argdefs == NULL && co->co_argcount == nargs) {
+ result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals);
+ goto done;
+ }
+ else if (nargs == 0 && argdefs != NULL
+ && co->co_argcount == Py_SIZE(argdefs)) {
+ /* function called with no arguments, but all parameters have
+ a default value: use default values as arguments .*/
+ args = &PyTuple_GET_ITEM(argdefs, 0);
+ result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals);
+ goto done;
+ }
+ }
+ if (kwargs != NULL) {
+ Py_ssize_t pos, i;
+ kwtuple = PyTuple_New(2 * nk);
+ if (kwtuple == NULL) {
+ result = NULL;
+ goto done;
+ }
+ k = &PyTuple_GET_ITEM(kwtuple, 0);
+ pos = i = 0;
+ while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) {
+ Py_INCREF(k[i]);
+ Py_INCREF(k[i+1]);
+ i += 2;
+ }
+ nk = i / 2;
+ }
+ else {
+ kwtuple = NULL;
+ k = NULL;
+ }
+ closure = PyFunction_GET_CLOSURE(func);
+#if PY_MAJOR_VERSION >= 3
+ kwdefs = PyFunction_GET_KW_DEFAULTS(func);
+#endif
+ if (argdefs != NULL) {
+ d = &PyTuple_GET_ITEM(argdefs, 0);
+ nd = Py_SIZE(argdefs);
+ }
+ else {
+ d = NULL;
+ nd = 0;
+ }
+#if PY_MAJOR_VERSION >= 3
+ result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL,
+ args, (int)nargs,
+ k, (int)nk,
+ d, (int)nd, kwdefs, closure);
+#else
+ result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL,
+ args, (int)nargs,
+ k, (int)nk,
+ d, (int)nd, closure);
+#endif
+ Py_XDECREF(kwtuple);
+done:
+ Py_LeaveRecursiveCall();
+ return result;
+}
+#endif
+
+/* PyObjectCall */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+ PyObject *result;
+ ternaryfunc call = Py_TYPE(func)->tp_call;
+ if (unlikely(!call))
+ return PyObject_Call(func, arg, kw);
+ #if PY_MAJOR_VERSION < 3
+ if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+ return NULL;
+ #else
+ if (unlikely(Py_EnterRecursiveCall(" while calling a Python object")))
+ return NULL;
+ #endif
+ result = (*call)(func, arg, kw);
+ Py_LeaveRecursiveCall();
+ if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+ PyErr_SetString(
+ PyExc_SystemError,
+ "NULL result without error in PyObject_Call");
+ }
+ return result;
+}
+#endif
+
+/* PyObjectCallMethO */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
+ PyObject *self, *result;
+ PyCFunction cfunc;
+ cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func);
+ self = __Pyx_CyOrPyCFunction_GET_SELF(func);
+ #if PY_MAJOR_VERSION < 3
+ if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+ return NULL;
+ #else
+ if (unlikely(Py_EnterRecursiveCall(" while calling a Python object")))
+ return NULL;
+ #endif
+ result = cfunc(self, arg);
+ Py_LeaveRecursiveCall();
+ if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+ PyErr_SetString(
+ PyExc_SystemError,
+ "NULL result without error in PyObject_Call");
+ }
+ return result;
+}
+#endif
+
+/* PyObjectFastCall */
+#if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API
+static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) {
+ PyObject *argstuple;
+ PyObject *result = 0;
+ size_t i;
+ argstuple = PyTuple_New((Py_ssize_t)nargs);
+ if (unlikely(!argstuple)) return NULL;
+ for (i = 0; i < nargs; i++) {
+ Py_INCREF(args[i]);
+ if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad;
+ }
+ result = __Pyx_PyObject_Call(func, argstuple, kwargs);
+ bad:
+ Py_DECREF(argstuple);
+ return result;
+}
+#endif
+static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) {
+ Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs);
+#if CYTHON_COMPILING_IN_CPYTHON
+ if (nargs == 0 && kwargs == NULL) {
+ if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS))
+ return __Pyx_PyObject_CallMethO(func, NULL);
+ }
+ else if (nargs == 1 && kwargs == NULL) {
+ if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O))
+ return __Pyx_PyObject_CallMethO(func, args[0]);
+ }
+#endif
+ #if PY_VERSION_HEX < 0x030800B1
+ #if CYTHON_FAST_PYCCALL
+ if (PyCFunction_Check(func)) {
+ if (kwargs) {
+ return _PyCFunction_FastCallDict(func, args, nargs, kwargs);
+ } else {
+ return _PyCFunction_FastCallKeywords(func, args, nargs, NULL);
+ }
+ }
+ #if PY_VERSION_HEX >= 0x030700A1
+ if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) {
+ return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL);
+ }
+ #endif
+ #endif
+ #if CYTHON_FAST_PYCALL
+ if (PyFunction_Check(func)) {
+ return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs);
+ }
+ #endif
+ #endif
+ if (kwargs == NULL) {
+ #if CYTHON_VECTORCALL
+ #if PY_VERSION_HEX < 0x03090000
+ vectorcallfunc f = _PyVectorcall_Function(func);
+ #else
+ vectorcallfunc f = PyVectorcall_Function(func);
+ #endif
+ if (f) {
+ return f(func, args, (size_t)nargs, NULL);
+ }
+ #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL
+ if (__Pyx_CyFunction_CheckExact(func)) {
+ __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func);
+ if (f) return f(func, args, (size_t)nargs, NULL);
+ }
+ #endif
+ }
+ if (nargs == 0) {
+ return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs);
+ }
+ #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API
+ return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs);
+ #else
+ return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs);
+ #endif
+}
+
+/* IterFinish */
+static CYTHON_INLINE int __Pyx_IterFinish(void) {
+ PyObject* exc_type;
+ __Pyx_PyThreadState_declare
+ __Pyx_PyThreadState_assign
+ exc_type = __Pyx_PyErr_CurrentExceptionType();
+ if (unlikely(exc_type)) {
+ if (unlikely(!__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration)))
+ return -1;
+ __Pyx_PyErr_Clear();
+ return 0;
+ }
+ return 0;
+}
+
+/* PyObjectCallNoArg */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
+ PyObject *arg[2] = {NULL, NULL};
+ return __Pyx_PyObject_FastCall(func, arg + 1, 0 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET);
+}
+
+/* PyObjectCallOneArg */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+ PyObject *args[2] = {NULL, arg};
+ return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET);
+}
+
+/* PyObjectGetMethod */
+static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) {
+ PyObject *attr;
+#if CYTHON_UNPACK_METHODS && CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_PYTYPE_LOOKUP
+ __Pyx_TypeName type_name;
+ PyTypeObject *tp = Py_TYPE(obj);
+ PyObject *descr;
+ descrgetfunc f = NULL;
+ PyObject **dictptr, *dict;
+ int meth_found = 0;
+ assert (*method == NULL);
+ if (unlikely(tp->tp_getattro != PyObject_GenericGetAttr)) {
+ attr = __Pyx_PyObject_GetAttrStr(obj, name);
+ goto try_unpack;
+ }
+ if (unlikely(tp->tp_dict == NULL) && unlikely(PyType_Ready(tp) < 0)) {
+ return 0;
+ }
+ descr = _PyType_Lookup(tp, name);
+ if (likely(descr != NULL)) {
+ Py_INCREF(descr);
+#if defined(Py_TPFLAGS_METHOD_DESCRIPTOR) && Py_TPFLAGS_METHOD_DESCRIPTOR
+ if (__Pyx_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR))
+#elif PY_MAJOR_VERSION >= 3
+ #ifdef __Pyx_CyFunction_USED
+ if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type) || __Pyx_CyFunction_Check(descr)))
+ #else
+ if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type)))
+ #endif
+#else
+ #ifdef __Pyx_CyFunction_USED
+ if (likely(PyFunction_Check(descr) || __Pyx_CyFunction_Check(descr)))
+ #else
+ if (likely(PyFunction_Check(descr)))
+ #endif
+#endif
+ {
+ meth_found = 1;
+ } else {
+ f = Py_TYPE(descr)->tp_descr_get;
+ if (f != NULL && PyDescr_IsData(descr)) {
+ attr = f(descr, obj, (PyObject *)Py_TYPE(obj));
+ Py_DECREF(descr);
+ goto try_unpack;
+ }
+ }
+ }
+ dictptr = _PyObject_GetDictPtr(obj);
+ if (dictptr != NULL && (dict = *dictptr) != NULL) {
+ Py_INCREF(dict);
+ attr = __Pyx_PyDict_GetItemStr(dict, name);
+ if (attr != NULL) {
+ Py_INCREF(attr);
+ Py_DECREF(dict);
+ Py_XDECREF(descr);
+ goto try_unpack;
+ }
+ Py_DECREF(dict);
+ }
+ if (meth_found) {
+ *method = descr;
+ return 1;
+ }
+ if (f != NULL) {
+ attr = f(descr, obj, (PyObject *)Py_TYPE(obj));
+ Py_DECREF(descr);
+ goto try_unpack;
+ }
+ if (likely(descr != NULL)) {
+ *method = descr;
+ return 0;
+ }
+ type_name = __Pyx_PyType_GetName(tp);
+ PyErr_Format(PyExc_AttributeError,
+#if PY_MAJOR_VERSION >= 3
+ "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'",
+ type_name, name);
+#else
+ "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'",
+ type_name, PyString_AS_STRING(name));
+#endif
+ __Pyx_DECREF_TypeName(type_name);
+ return 0;
+#else
+ attr = __Pyx_PyObject_GetAttrStr(obj, name);
+ goto try_unpack;
+#endif
+try_unpack:
+#if CYTHON_UNPACK_METHODS
+ if (likely(attr) && PyMethod_Check(attr) && likely(PyMethod_GET_SELF(attr) == obj)) {
+ PyObject *function = PyMethod_GET_FUNCTION(attr);
+ Py_INCREF(function);
+ Py_DECREF(attr);
+ *method = function;
+ return 1;
+ }
+#endif
+ *method = attr;
+ return 0;
+}
+
+/* PyObjectCallMethod0 */
+static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) {
+ PyObject *method = NULL, *result = NULL;
+ int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
+ if (likely(is_method)) {
+ result = __Pyx_PyObject_CallOneArg(method, obj);
+ Py_DECREF(method);
+ return result;
+ }
+ if (unlikely(!method)) goto bad;
+ result = __Pyx_PyObject_CallNoArg(method);
+ Py_DECREF(method);
+bad:
+ return result;
+}
+
+/* RaiseNeedMoreValuesToUnpack */
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
+ PyErr_Format(PyExc_ValueError,
+ "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack",
+ index, (index == 1) ? "" : "s");
+}
+
+/* RaiseTooManyValuesToUnpack */
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
+ PyErr_Format(PyExc_ValueError,
+ "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected);
+}
+
+/* UnpackItemEndCheck */
+static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) {
+ if (unlikely(retval)) {
+ Py_DECREF(retval);
+ __Pyx_RaiseTooManyValuesError(expected);
+ return -1;
+ }
+ return __Pyx_IterFinish();
+}
+
+/* RaiseNoneIterError */
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+}
+
+/* UnpackTupleError */
+static void __Pyx_UnpackTupleError(PyObject *t, Py_ssize_t index) {
+ if (t == Py_None) {
+ __Pyx_RaiseNoneNotIterableError();
+ } else if (PyTuple_GET_SIZE(t) < index) {
+ __Pyx_RaiseNeedMoreValuesError(PyTuple_GET_SIZE(t));
+ } else {
+ __Pyx_RaiseTooManyValuesError(index);
+ }
+}
+
+/* UnpackTuple2 */
+static CYTHON_INLINE int __Pyx_unpack_tuple2_exact(
+ PyObject* tuple, PyObject** pvalue1, PyObject** pvalue2, int decref_tuple) {
+ PyObject *value1 = NULL, *value2 = NULL;
+#if CYTHON_COMPILING_IN_PYPY
+ value1 = PySequence_ITEM(tuple, 0); if (unlikely(!value1)) goto bad;
+ value2 = PySequence_ITEM(tuple, 1); if (unlikely(!value2)) goto bad;
+#else
+ value1 = PyTuple_GET_ITEM(tuple, 0); Py_INCREF(value1);
+ value2 = PyTuple_GET_ITEM(tuple, 1); Py_INCREF(value2);
+#endif
+ if (decref_tuple) {
+ Py_DECREF(tuple);
+ }
+ *pvalue1 = value1;
+ *pvalue2 = value2;
+ return 0;
+#if CYTHON_COMPILING_IN_PYPY
+bad:
+ Py_XDECREF(value1);
+ Py_XDECREF(value2);
+ if (decref_tuple) { Py_XDECREF(tuple); }
+ return -1;
+#endif
+}
+static int __Pyx_unpack_tuple2_generic(PyObject* tuple, PyObject** pvalue1, PyObject** pvalue2,
+ int has_known_size, int decref_tuple) {
+ Py_ssize_t index;
+ PyObject *value1 = NULL, *value2 = NULL, *iter = NULL;
+ iternextfunc iternext;
+ iter = PyObject_GetIter(tuple);
+ if (unlikely(!iter)) goto bad;
+ if (decref_tuple) { Py_DECREF(tuple); tuple = NULL; }
+ iternext = __Pyx_PyObject_GetIterNextFunc(iter);
+ value1 = iternext(iter); if (unlikely(!value1)) { index = 0; goto unpacking_failed; }
+ value2 = iternext(iter); if (unlikely(!value2)) { index = 1; goto unpacking_failed; }
+ if (!has_known_size && unlikely(__Pyx_IternextUnpackEndCheck(iternext(iter), 2))) goto bad;
+ Py_DECREF(iter);
+ *pvalue1 = value1;
+ *pvalue2 = value2;
+ return 0;
+unpacking_failed:
+ if (!has_known_size && __Pyx_IterFinish() == 0)
+ __Pyx_RaiseNeedMoreValuesError(index);
+bad:
+ Py_XDECREF(iter);
+ Py_XDECREF(value1);
+ Py_XDECREF(value2);
+ if (decref_tuple) { Py_XDECREF(tuple); }
+ return -1;
+}
+
+/* dict_iter */
+#if CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+#include
+#endif
+static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* iterable, int is_dict, PyObject* method_name,
+ Py_ssize_t* p_orig_length, int* p_source_is_dict) {
+ is_dict = is_dict || likely(PyDict_CheckExact(iterable));
+ *p_source_is_dict = is_dict;
+ if (is_dict) {
+#if !CYTHON_COMPILING_IN_PYPY
+ *p_orig_length = PyDict_Size(iterable);
+ Py_INCREF(iterable);
+ return iterable;
+#elif PY_MAJOR_VERSION >= 3
+ static PyObject *py_items = NULL, *py_keys = NULL, *py_values = NULL;
+ PyObject **pp = NULL;
+ if (method_name) {
+ const char *name = PyUnicode_AsUTF8(method_name);
+ if (strcmp(name, "iteritems") == 0) pp = &py_items;
+ else if (strcmp(name, "iterkeys") == 0) pp = &py_keys;
+ else if (strcmp(name, "itervalues") == 0) pp = &py_values;
+ if (pp) {
+ if (!*pp) {
+ *pp = PyUnicode_FromString(name + 4);
+ if (!*pp)
+ return NULL;
+ }
+ method_name = *pp;
+ }
+ }
+#endif
+ }
+ *p_orig_length = 0;
+ if (method_name) {
+ PyObject* iter;
+ iterable = __Pyx_PyObject_CallMethod0(iterable, method_name);
+ if (!iterable)
+ return NULL;
+#if !CYTHON_COMPILING_IN_PYPY
+ if (PyTuple_CheckExact(iterable) || PyList_CheckExact(iterable))
+ return iterable;
+#endif
+ iter = PyObject_GetIter(iterable);
+ Py_DECREF(iterable);
+ return iter;
+ }
+ return PyObject_GetIter(iterable);
+}
+static CYTHON_INLINE int __Pyx_dict_iter_next(
+ PyObject* iter_obj, CYTHON_NCP_UNUSED Py_ssize_t orig_length, CYTHON_NCP_UNUSED Py_ssize_t* ppos,
+ PyObject** pkey, PyObject** pvalue, PyObject** pitem, int source_is_dict) {
+ PyObject* next_item;
+#if !CYTHON_COMPILING_IN_PYPY
+ if (source_is_dict) {
+ PyObject *key, *value;
+ if (unlikely(orig_length != PyDict_Size(iter_obj))) {
+ PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration");
+ return -1;
+ }
+ if (unlikely(!PyDict_Next(iter_obj, ppos, &key, &value))) {
+ return 0;
+ }
+ if (pitem) {
+ PyObject* tuple = PyTuple_New(2);
+ if (unlikely(!tuple)) {
+ return -1;
+ }
+ Py_INCREF(key);
+ Py_INCREF(value);
+ PyTuple_SET_ITEM(tuple, 0, key);
+ PyTuple_SET_ITEM(tuple, 1, value);
+ *pitem = tuple;
+ } else {
+ if (pkey) {
+ Py_INCREF(key);
+ *pkey = key;
+ }
+ if (pvalue) {
+ Py_INCREF(value);
+ *pvalue = value;
+ }
+ }
+ return 1;
+ } else if (PyTuple_CheckExact(iter_obj)) {
+ Py_ssize_t pos = *ppos;
+ if (unlikely(pos >= PyTuple_GET_SIZE(iter_obj))) return 0;
+ *ppos = pos + 1;
+ next_item = PyTuple_GET_ITEM(iter_obj, pos);
+ Py_INCREF(next_item);
+ } else if (PyList_CheckExact(iter_obj)) {
+ Py_ssize_t pos = *ppos;
+ if (unlikely(pos >= PyList_GET_SIZE(iter_obj))) return 0;
+ *ppos = pos + 1;
+ next_item = PyList_GET_ITEM(iter_obj, pos);
+ Py_INCREF(next_item);
+ } else
+#endif
+ {
+ next_item = PyIter_Next(iter_obj);
+ if (unlikely(!next_item)) {
+ return __Pyx_IterFinish();
+ }
+ }
+ if (pitem) {
+ *pitem = next_item;
+ } else if (pkey && pvalue) {
+ if (__Pyx_unpack_tuple2(next_item, pkey, pvalue, source_is_dict, source_is_dict, 1))
+ return -1;
+ } else if (pkey) {
+ *pkey = next_item;
+ } else {
+ *pvalue = next_item;
+ }
+ return 1;
+}
+
+/* UnicodeAsUCS4 */
+static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject* x) {
+ Py_ssize_t length;
+ #if CYTHON_PEP393_ENABLED
+ length = PyUnicode_GET_LENGTH(x);
+ if (likely(length == 1)) {
+ return PyUnicode_READ_CHAR(x, 0);
+ }
+ #else
+ length = PyUnicode_GET_SIZE(x);
+ if (likely(length == 1)) {
+ return PyUnicode_AS_UNICODE(x)[0];
+ }
+ #if Py_UNICODE_SIZE == 2
+ else if (PyUnicode_GET_SIZE(x) == 2) {
+ Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0];
+ if (high_val >= 0xD800 && high_val <= 0xDBFF) {
+ Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1];
+ if (low_val >= 0xDC00 && low_val <= 0xDFFF) {
+ return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1)));
+ }
+ }
+ }
+ #endif
+ #endif
+ PyErr_Format(PyExc_ValueError,
+ "only single character unicode strings can be converted to Py_UCS4, "
+ "got length %" CYTHON_FORMAT_SSIZE_T "d", length);
+ return (Py_UCS4)-1;
+}
+
+/* object_ord */
+static long __Pyx__PyObject_Ord(PyObject* c) {
+ Py_ssize_t size;
+ if (PyBytes_Check(c)) {
+ size = PyBytes_GET_SIZE(c);
+ if (likely(size == 1)) {
+ return (unsigned char) PyBytes_AS_STRING(c)[0];
+ }
+#if PY_MAJOR_VERSION < 3
+ } else if (PyUnicode_Check(c)) {
+ return (long)__Pyx_PyUnicode_AsPy_UCS4(c);
+#endif
+#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
+ } else if (PyByteArray_Check(c)) {
+ size = PyByteArray_GET_SIZE(c);
+ if (likely(size == 1)) {
+ return (unsigned char) PyByteArray_AS_STRING(c)[0];
+ }
+#endif
+ } else {
+ __Pyx_TypeName c_type_name = __Pyx_PyType_GetName(Py_TYPE(c));
+ PyErr_Format(PyExc_TypeError,
+ "ord() expected string of length 1, but " __Pyx_FMT_TYPENAME " found",
+ c_type_name);
+ __Pyx_DECREF_TypeName(c_type_name);
+ return (long)(Py_UCS4)-1;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "ord() expected a character, but string of length %zd found", size);
+ return (long)(Py_UCS4)-1;
+}
+
+/* PyDictVersioning */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) {
+ PyObject *dict = Py_TYPE(obj)->tp_dict;
+ return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0;
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) {
+ PyObject **dictptr = NULL;
+ Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset;
+ if (offset) {
+#if CYTHON_COMPILING_IN_CPYTHON
+ dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj);
+#else
+ dictptr = _PyObject_GetDictPtr(obj);
+#endif
+ }
+ return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0;
+}
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) {
+ PyObject *dict = Py_TYPE(obj)->tp_dict;
+ if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict)))
+ return 0;
+ return obj_dict_version == __Pyx_get_object_dict_version(obj);
+}
+#endif
+
+/* GetModuleGlobalName */
+#if CYTHON_USE_DICT_VERSIONS
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value)
+#else
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
+#endif
+{
+ PyObject *result;
+#if !CYTHON_AVOID_BORROWED_REFS
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && PY_VERSION_HEX < 0x030d0000
+ result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash);
+ __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+ if (likely(result)) {
+ return __Pyx_NewRef(result);
+ } else if (unlikely(PyErr_Occurred())) {
+ return NULL;
+ }
+#elif CYTHON_COMPILING_IN_LIMITED_API
+ if (unlikely(!__pyx_m)) {
+ return NULL;
+ }
+ result = PyObject_GetAttr(__pyx_m, name);
+ if (likely(result)) {
+ return result;
+ }
+#else
+ result = PyDict_GetItem(__pyx_d, name);
+ __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+ if (likely(result)) {
+ return __Pyx_NewRef(result);
+ }
+#endif
+#else
+ result = PyObject_GetItem(__pyx_d, name);
+ __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+ if (likely(result)) {
+ return __Pyx_NewRef(result);
+ }
+ PyErr_Clear();
+#endif
+ return __Pyx_GetBuiltinName(name);
+}
+
+/* FixUpExtensionType */
+#if CYTHON_USE_TYPE_SPECS
+static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) {
+#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+ CYTHON_UNUSED_VAR(spec);
+ CYTHON_UNUSED_VAR(type);
+#else
+ const PyType_Slot *slot = spec->slots;
+ while (slot && slot->slot && slot->slot != Py_tp_members)
+ slot++;
+ if (slot && slot->slot == Py_tp_members) {
+ int changed = 0;
+#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON)
+ const
+#endif
+ PyMemberDef *memb = (PyMemberDef*) slot->pfunc;
+ while (memb && memb->name) {
+ if (memb->name[0] == '_' && memb->name[1] == '_') {
+#if PY_VERSION_HEX < 0x030900b1
+ if (strcmp(memb->name, "__weaklistoffset__") == 0) {
+ assert(memb->type == T_PYSSIZET);
+ assert(memb->flags == READONLY);
+ type->tp_weaklistoffset = memb->offset;
+ changed = 1;
+ }
+ else if (strcmp(memb->name, "__dictoffset__") == 0) {
+ assert(memb->type == T_PYSSIZET);
+ assert(memb->flags == READONLY);
+ type->tp_dictoffset = memb->offset;
+ changed = 1;
+ }
+#if CYTHON_METH_FASTCALL
+ else if (strcmp(memb->name, "__vectorcalloffset__") == 0) {
+ assert(memb->type == T_PYSSIZET);
+ assert(memb->flags == READONLY);
+#if PY_VERSION_HEX >= 0x030800b4
+ type->tp_vectorcall_offset = memb->offset;
+#else
+ type->tp_print = (printfunc) memb->offset;
+#endif
+ changed = 1;
+ }
+#endif
+#else
+ if ((0));
+#endif
+#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON
+ else if (strcmp(memb->name, "__module__") == 0) {
+ PyObject *descr;
+ assert(memb->type == T_OBJECT);
+ assert(memb->flags == 0 || memb->flags == READONLY);
+ descr = PyDescr_NewMember(type, memb);
+ if (unlikely(!descr))
+ return -1;
+ if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) {
+ Py_DECREF(descr);
+ return -1;
+ }
+ Py_DECREF(descr);
+ changed = 1;
+ }
+#endif
+ }
+ memb++;
+ }
+ if (changed)
+ PyType_Modified(type);
+ }
+#endif
+ return 0;
+}
+#endif
+
+/* FetchSharedCythonModule */
+static PyObject *__Pyx_FetchSharedCythonABIModule(void) {
+ return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME);
+}
+
+/* FetchCommonType */
+static int __Pyx_VerifyCachedType(PyObject *cached_type,
+ const char *name,
+ Py_ssize_t basicsize,
+ Py_ssize_t expected_basicsize) {
+ if (!PyType_Check(cached_type)) {
+ PyErr_Format(PyExc_TypeError,
+ "Shared Cython type %.200s is not a type object", name);
+ return -1;
+ }
+ if (basicsize != expected_basicsize) {
+ PyErr_Format(PyExc_TypeError,
+ "Shared Cython type %.200s has the wrong size, try recompiling",
+ name);
+ return -1;
+ }
+ return 0;
+}
+#if !CYTHON_USE_TYPE_SPECS
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) {
+ PyObject* abi_module;
+ const char* object_name;
+ PyTypeObject *cached_type = NULL;
+ abi_module = __Pyx_FetchSharedCythonABIModule();
+ if (!abi_module) return NULL;
+ object_name = strrchr(type->tp_name, '.');
+ object_name = object_name ? object_name+1 : type->tp_name;
+ cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name);
+ if (cached_type) {
+ if (__Pyx_VerifyCachedType(
+ (PyObject *)cached_type,
+ object_name,
+ cached_type->tp_basicsize,
+ type->tp_basicsize) < 0) {
+ goto bad;
+ }
+ goto done;
+ }
+ if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad;
+ PyErr_Clear();
+ if (PyType_Ready(type) < 0) goto bad;
+ if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0)
+ goto bad;
+ Py_INCREF(type);
+ cached_type = type;
+done:
+ Py_DECREF(abi_module);
+ return cached_type;
+bad:
+ Py_XDECREF(cached_type);
+ cached_type = NULL;
+ goto done;
+}
+#else
+static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) {
+ PyObject *abi_module, *cached_type = NULL;
+ const char* object_name = strrchr(spec->name, '.');
+ object_name = object_name ? object_name+1 : spec->name;
+ abi_module = __Pyx_FetchSharedCythonABIModule();
+ if (!abi_module) return NULL;
+ cached_type = PyObject_GetAttrString(abi_module, object_name);
+ if (cached_type) {
+ Py_ssize_t basicsize;
+#if CYTHON_COMPILING_IN_LIMITED_API
+ PyObject *py_basicsize;
+ py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__");
+ if (unlikely(!py_basicsize)) goto bad;
+ basicsize = PyLong_AsSsize_t(py_basicsize);
+ Py_DECREF(py_basicsize);
+ py_basicsize = 0;
+ if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+#else
+ basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1;
+#endif
+ if (__Pyx_VerifyCachedType(
+ cached_type,
+ object_name,
+ basicsize,
+ spec->basicsize) < 0) {
+ goto bad;
+ }
+ goto done;
+ }
+ if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad;
+ PyErr_Clear();
+ CYTHON_UNUSED_VAR(module);
+ cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases);
+ if (unlikely(!cached_type)) goto bad;
+ if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad;
+ if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad;
+done:
+ Py_DECREF(abi_module);
+ assert(cached_type == NULL || PyType_Check(cached_type));
+ return (PyTypeObject *) cached_type;
+bad:
+ Py_XDECREF(cached_type);
+ cached_type = NULL;
+ goto done;
+}
+#endif
+
+/* PyVectorcallFastCallDict */
+#if CYTHON_METH_FASTCALL
+static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw)
+{
+ PyObject *res = NULL;
+ PyObject *kwnames;
+ PyObject **newargs;
+ PyObject **kwvalues;
+ Py_ssize_t i, pos;
+ size_t j;
+ PyObject *key, *value;
+ unsigned long keys_are_strings;
+ Py_ssize_t nkw = PyDict_GET_SIZE(kw);
+ newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0]));
+ if (unlikely(newargs == NULL)) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ for (j = 0; j < nargs; j++) newargs[j] = args[j];
+ kwnames = PyTuple_New(nkw);
+ if (unlikely(kwnames == NULL)) {
+ PyMem_Free(newargs);
+ return NULL;
+ }
+ kwvalues = newargs + nargs;
+ pos = i = 0;
+ keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS;
+ while (PyDict_Next(kw, &pos, &key, &value)) {
+ keys_are_strings &= Py_TYPE(key)->tp_flags;
+ Py_INCREF(key);
+ Py_INCREF(value);
+ PyTuple_SET_ITEM(kwnames, i, key);
+ kwvalues[i] = value;
+ i++;
+ }
+ if (unlikely(!keys_are_strings)) {
+ PyErr_SetString(PyExc_TypeError, "keywords must be strings");
+ goto cleanup;
+ }
+ res = vc(func, newargs, nargs, kwnames);
+cleanup:
+ Py_DECREF(kwnames);
+ for (i = 0; i < nkw; i++)
+ Py_DECREF(kwvalues[i]);
+ PyMem_Free(newargs);
+ return res;
+}
+static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw)
+{
+ if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) {
+ return vc(func, args, nargs, NULL);
+ }
+ return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw);
+}
+#endif
+
+/* CythonFunctionShared */
+#if CYTHON_COMPILING_IN_LIMITED_API
+static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) {
+ if (__Pyx_CyFunction_Check(func)) {
+ return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc;
+ } else if (PyCFunction_Check(func)) {
+ return PyCFunction_GetFunction(func) == (PyCFunction) cfunc;
+ }
+ return 0;
+}
+#else
+static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) {
+ return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc;
+}
+#endif
+static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) {
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+ __Pyx_Py_XDECREF_SET(
+ __Pyx_CyFunction_GetClassObj(f),
+ ((classobj) ? __Pyx_NewRef(classobj) : NULL));
+#else
+ __Pyx_Py_XDECREF_SET(
+ ((PyCMethodObject *) (f))->mm_class,
+ (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL));
+#endif
+}
+static PyObject *
+__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure)
+{
+ CYTHON_UNUSED_VAR(closure);
+ if (unlikely(op->func_doc == NULL)) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+ op->func_doc = PyObject_GetAttrString(op->func, "__doc__");
+ if (unlikely(!op->func_doc)) return NULL;
+#else
+ if (((PyCFunctionObject*)op)->m_ml->ml_doc) {
+#if PY_MAJOR_VERSION >= 3
+ op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc);
+#else
+ op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc);
+#endif
+ if (unlikely(op->func_doc == NULL))
+ return NULL;
+ } else {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+#endif
+ }
+ Py_INCREF(op->func_doc);
+ return op->func_doc;
+}
+static int
+__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+ if (value == NULL) {
+ value = Py_None;
+ }
+ Py_INCREF(value);
+ __Pyx_Py_XDECREF_SET(op->func_doc, value);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+ if (unlikely(op->func_name == NULL)) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+ op->func_name = PyObject_GetAttrString(op->func, "__name__");
+#elif PY_MAJOR_VERSION >= 3
+ op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name);
+#else
+ op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name);
+#endif
+ if (unlikely(op->func_name == NULL))
+ return NULL;
+ }
+ Py_INCREF(op->func_name);
+ return op->func_name;
+}
+static int
+__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+#if PY_MAJOR_VERSION >= 3
+ if (unlikely(value == NULL || !PyUnicode_Check(value)))
+#else
+ if (unlikely(value == NULL || !PyString_Check(value)))
+#endif
+ {
+ PyErr_SetString(PyExc_TypeError,
+ "__name__ must be set to a string object");
+ return -1;
+ }
+ Py_INCREF(value);
+ __Pyx_Py_XDECREF_SET(op->func_name, value);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+ Py_INCREF(op->func_qualname);
+ return op->func_qualname;
+}
+static int
+__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+#if PY_MAJOR_VERSION >= 3
+ if (unlikely(value == NULL || !PyUnicode_Check(value)))
+#else
+ if (unlikely(value == NULL || !PyString_Check(value)))
+#endif
+ {
+ PyErr_SetString(PyExc_TypeError,
+ "__qualname__ must be set to a string object");
+ return -1;
+ }
+ Py_INCREF(value);
+ __Pyx_Py_XDECREF_SET(op->func_qualname, value);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+ if (unlikely(op->func_dict == NULL)) {
+ op->func_dict = PyDict_New();
+ if (unlikely(op->func_dict == NULL))
+ return NULL;
+ }
+ Py_INCREF(op->func_dict);
+ return op->func_dict;
+}
+static int
+__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+ if (unlikely(value == NULL)) {
+ PyErr_SetString(PyExc_TypeError,
+ "function's dictionary may not be deleted");
+ return -1;
+ }
+ if (unlikely(!PyDict_Check(value))) {
+ PyErr_SetString(PyExc_TypeError,
+ "setting function's dictionary to a non-dict");
+ return -1;
+ }
+ Py_INCREF(value);
+ __Pyx_Py_XDECREF_SET(op->func_dict, value);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context)
+{
+ CYTHON_UNUSED_VAR(context);
+ Py_INCREF(op->func_globals);
+ return op->func_globals;
+}
+static PyObject *
+__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context)
+{
+ CYTHON_UNUSED_VAR(op);
+ CYTHON_UNUSED_VAR(context);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+static PyObject *
+__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context)
+{
+ PyObject* result = (op->func_code) ? op->func_code : Py_None;
+ CYTHON_UNUSED_VAR(context);
+ Py_INCREF(result);
+ return result;
+}
+static int
+__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) {
+ int result = 0;
+ PyObject *res = op->defaults_getter((PyObject *) op);
+ if (unlikely(!res))
+ return -1;
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ op->defaults_tuple = PyTuple_GET_ITEM(res, 0);
+ Py_INCREF(op->defaults_tuple);
+ op->defaults_kwdict = PyTuple_GET_ITEM(res, 1);
+ Py_INCREF(op->defaults_kwdict);
+ #else
+ op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0);
+ if (unlikely(!op->defaults_tuple)) result = -1;
+ else {
+ op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1);
+ if (unlikely(!op->defaults_kwdict)) result = -1;
+ }
+ #endif
+ Py_DECREF(res);
+ return result;
+}
+static int
+__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+ CYTHON_UNUSED_VAR(context);
+ if (!value) {
+ value = Py_None;
+ } else if (unlikely(value != Py_None && !PyTuple_Check(value))) {
+ PyErr_SetString(PyExc_TypeError,
+ "__defaults__ must be set to a tuple object");
+ return -1;
+ }
+ PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not "
+ "currently affect the values used in function calls", 1);
+ Py_INCREF(value);
+ __Pyx_Py_XDECREF_SET(op->defaults_tuple, value);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) {
+ PyObject* result = op->defaults_tuple;
+ CYTHON_UNUSED_VAR(context);
+ if (unlikely(!result)) {
+ if (op->defaults_getter) {
+ if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL;
+ result = op->defaults_tuple;
+ } else {
+ result = Py_None;
+ }
+ }
+ Py_INCREF(result);
+ return result;
+}
+static int
+__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+ CYTHON_UNUSED_VAR(context);
+ if (!value) {
+ value = Py_None;
+ } else if (unlikely(value != Py_None && !PyDict_Check(value))) {
+ PyErr_SetString(PyExc_TypeError,
+ "__kwdefaults__ must be set to a dict object");
+ return -1;
+ }
+ PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not "
+ "currently affect the values used in function calls", 1);
+ Py_INCREF(value);
+ __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) {
+ PyObject* result = op->defaults_kwdict;
+ CYTHON_UNUSED_VAR(context);
+ if (unlikely(!result)) {
+ if (op->defaults_getter) {
+ if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL;
+ result = op->defaults_kwdict;
+ } else {
+ result = Py_None;
+ }
+ }
+ Py_INCREF(result);
+ return result;
+}
+static int
+__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+ CYTHON_UNUSED_VAR(context);
+ if (!value || value == Py_None) {
+ value = NULL;
+ } else if (unlikely(!PyDict_Check(value))) {
+ PyErr_SetString(PyExc_TypeError,
+ "__annotations__ must be set to a dict object");
+ return -1;
+ }
+ Py_XINCREF(value);
+ __Pyx_Py_XDECREF_SET(op->func_annotations, value);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) {
+ PyObject* result = op->func_annotations;
+ CYTHON_UNUSED_VAR(context);
+ if (unlikely(!result)) {
+ result = PyDict_New();
+ if (unlikely(!result)) return NULL;
+ op->func_annotations = result;
+ }
+ Py_INCREF(result);
+ return result;
+}
+static PyObject *
+__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) {
+ int is_coroutine;
+ CYTHON_UNUSED_VAR(context);
+ if (op->func_is_coroutine) {
+ return __Pyx_NewRef(op->func_is_coroutine);
+ }
+ is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE;
+#if PY_VERSION_HEX >= 0x03050000
+ if (is_coroutine) {
+ PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine;
+ fromlist = PyList_New(1);
+ if (unlikely(!fromlist)) return NULL;
+ Py_INCREF(marker);
+#if CYTHON_ASSUME_SAFE_MACROS
+ PyList_SET_ITEM(fromlist, 0, marker);
+#else
+ if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) {
+ Py_DECREF(marker);
+ Py_DECREF(fromlist);
+ return NULL;
+ }
+#endif
+ module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0);
+ Py_DECREF(fromlist);
+ if (unlikely(!module)) goto ignore;
+ op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker);
+ Py_DECREF(module);
+ if (likely(op->func_is_coroutine)) {
+ return __Pyx_NewRef(op->func_is_coroutine);
+ }
+ignore:
+ PyErr_Clear();
+ }
+#endif
+ op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine);
+ return __Pyx_NewRef(op->func_is_coroutine);
+}
+#if CYTHON_COMPILING_IN_LIMITED_API
+static PyObject *
+__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) {
+ CYTHON_UNUSED_VAR(context);
+ return PyObject_GetAttrString(op->func, "__module__");
+}
+static int
+__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+ CYTHON_UNUSED_VAR(context);
+ return PyObject_SetAttrString(op->func, "__module__", value);
+}
+#endif
+static PyGetSetDef __pyx_CyFunction_getsets[] = {
+ {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+ {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+ {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+ {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+ {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0},
+ {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+ {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+ {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+ {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+ {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+ {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+ {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+ {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+ {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
+ {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
+ {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0},
+ {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0},
+ {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0},
+#if CYTHON_COMPILING_IN_LIMITED_API
+ {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0},
+#endif
+ {0, 0, 0, 0, 0}
+};
+static PyMemberDef __pyx_CyFunction_members[] = {
+#if !CYTHON_COMPILING_IN_LIMITED_API
+ {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0},
+#endif
+#if CYTHON_USE_TYPE_SPECS
+ {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0},
+#if CYTHON_METH_FASTCALL
+#if CYTHON_BACKPORT_VECTORCALL
+ {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0},
+#else
+#if !CYTHON_COMPILING_IN_LIMITED_API
+ {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0},
+#endif
+#endif
+#endif
+#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API
+ {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0},
+#else
+ {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0},
+#endif
+#endif
+ {0, 0, 0, 0, 0}
+};
+static PyObject *
+__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args)
+{
+ CYTHON_UNUSED_VAR(args);
+#if PY_MAJOR_VERSION >= 3
+ Py_INCREF(m->func_qualname);
+ return m->func_qualname;
+#else
+ return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name);
+#endif
+}
+static PyMethodDef __pyx_CyFunction_methods[] = {
+ {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0},
+ {0, 0, 0, 0}
+};
+#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API
+#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist)
+#else
+#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist)
+#endif
+static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname,
+ PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
+#if !CYTHON_COMPILING_IN_LIMITED_API
+ PyCFunctionObject *cf = (PyCFunctionObject*) op;
+#endif
+ if (unlikely(op == NULL))
+ return NULL;
+#if CYTHON_COMPILING_IN_LIMITED_API
+ op->func = PyCFunction_NewEx(ml, (PyObject*)op, module);
+ if (unlikely(!op->func)) return NULL;
+#endif
+ op->flags = flags;
+ __Pyx_CyFunction_weakreflist(op) = NULL;
+#if !CYTHON_COMPILING_IN_LIMITED_API
+ cf->m_ml = ml;
+ cf->m_self = (PyObject *) op;
+#endif
+ Py_XINCREF(closure);
+ op->func_closure = closure;
+#if !CYTHON_COMPILING_IN_LIMITED_API
+ Py_XINCREF(module);
+ cf->m_module = module;
+#endif
+ op->func_dict = NULL;
+ op->func_name = NULL;
+ Py_INCREF(qualname);
+ op->func_qualname = qualname;
+ op->func_doc = NULL;
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+ op->func_classobj = NULL;
+#else
+ ((PyCMethodObject*)op)->mm_class = NULL;
+#endif
+ op->func_globals = globals;
+ Py_INCREF(op->func_globals);
+ Py_XINCREF(code);
+ op->func_code = code;
+ op->defaults_pyobjects = 0;
+ op->defaults_size = 0;
+ op->defaults = NULL;
+ op->defaults_tuple = NULL;
+ op->defaults_kwdict = NULL;
+ op->defaults_getter = NULL;
+ op->func_annotations = NULL;
+ op->func_is_coroutine = NULL;
+#if CYTHON_METH_FASTCALL
+ switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) {
+ case METH_NOARGS:
+ __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS;
+ break;
+ case METH_O:
+ __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O;
+ break;
+ case METH_METHOD | METH_FASTCALL | METH_KEYWORDS:
+ __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD;
+ break;
+ case METH_FASTCALL | METH_KEYWORDS:
+ __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS;
+ break;
+ case METH_VARARGS | METH_KEYWORDS:
+ __Pyx_CyFunction_func_vectorcall(op) = NULL;
+ break;
+ default:
+ PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction");
+ Py_DECREF(op);
+ return NULL;
+ }
+#endif
+ return (PyObject *) op;
+}
+static int
+__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m)
+{
+ Py_CLEAR(m->func_closure);
+#if CYTHON_COMPILING_IN_LIMITED_API
+ Py_CLEAR(m->func);
+#else
+ Py_CLEAR(((PyCFunctionObject*)m)->m_module);
+#endif
+ Py_CLEAR(m->func_dict);
+ Py_CLEAR(m->func_name);
+ Py_CLEAR(m->func_qualname);
+ Py_CLEAR(m->func_doc);
+ Py_CLEAR(m->func_globals);
+ Py_CLEAR(m->func_code);
+#if !CYTHON_COMPILING_IN_LIMITED_API
+#if PY_VERSION_HEX < 0x030900B1
+ Py_CLEAR(__Pyx_CyFunction_GetClassObj(m));
+#else
+ {
+ PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class;
+ ((PyCMethodObject *) (m))->mm_class = NULL;
+ Py_XDECREF(cls);
+ }
+#endif
+#endif
+ Py_CLEAR(m->defaults_tuple);
+ Py_CLEAR(m->defaults_kwdict);
+ Py_CLEAR(m->func_annotations);
+ Py_CLEAR(m->func_is_coroutine);
+ if (m->defaults) {
+ PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+ int i;
+ for (i = 0; i < m->defaults_pyobjects; i++)
+ Py_XDECREF(pydefaults[i]);
+ PyObject_Free(m->defaults);
+ m->defaults = NULL;
+ }
+ return 0;
+}
+static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m)
+{
+ if (__Pyx_CyFunction_weakreflist(m) != NULL)
+ PyObject_ClearWeakRefs((PyObject *) m);
+ __Pyx_CyFunction_clear(m);
+ __Pyx_PyHeapTypeObject_GC_Del(m);
+}
+static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m)
+{
+ PyObject_GC_UnTrack(m);
+ __Pyx__CyFunction_dealloc(m);
+}
+static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg)
+{
+ Py_VISIT(m->func_closure);
+#if CYTHON_COMPILING_IN_LIMITED_API
+ Py_VISIT(m->func);
+#else
+ Py_VISIT(((PyCFunctionObject*)m)->m_module);
+#endif
+ Py_VISIT(m->func_dict);
+ Py_VISIT(m->func_name);
+ Py_VISIT(m->func_qualname);
+ Py_VISIT(m->func_doc);
+ Py_VISIT(m->func_globals);
+ Py_VISIT(m->func_code);
+#if !CYTHON_COMPILING_IN_LIMITED_API
+ Py_VISIT(__Pyx_CyFunction_GetClassObj(m));
+#endif
+ Py_VISIT(m->defaults_tuple);
+ Py_VISIT(m->defaults_kwdict);
+ Py_VISIT(m->func_is_coroutine);
+ if (m->defaults) {
+ PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+ int i;
+ for (i = 0; i < m->defaults_pyobjects; i++)
+ Py_VISIT(pydefaults[i]);
+ }
+ return 0;
+}
+static PyObject*
+__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op)
+{
+#if PY_MAJOR_VERSION >= 3
+ return PyUnicode_FromFormat("",
+ op->func_qualname, (void *)op);
+#else
+ return PyString_FromFormat("",
+ PyString_AsString(op->func_qualname), (void *)op);
+#endif
+}
+static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+ PyObject *f = ((__pyx_CyFunctionObject*)func)->func;
+ PyObject *py_name = NULL;
+ PyCFunction meth;
+ int flags;
+ meth = PyCFunction_GetFunction(f);
+ if (unlikely(!meth)) return NULL;
+ flags = PyCFunction_GetFlags(f);
+ if (unlikely(flags < 0)) return NULL;
+#else
+ PyCFunctionObject* f = (PyCFunctionObject*)func;
+ PyCFunction meth = f->m_ml->ml_meth;
+ int flags = f->m_ml->ml_flags;
+#endif
+ Py_ssize_t size;
+ switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) {
+ case METH_VARARGS:
+ if (likely(kw == NULL || PyDict_Size(kw) == 0))
+ return (*meth)(self, arg);
+ break;
+ case METH_VARARGS | METH_KEYWORDS:
+ return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw);
+ case METH_NOARGS:
+ if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
+#if CYTHON_ASSUME_SAFE_MACROS
+ size = PyTuple_GET_SIZE(arg);
+#else
+ size = PyTuple_Size(arg);
+ if (unlikely(size < 0)) return NULL;
+#endif
+ if (likely(size == 0))
+ return (*meth)(self, NULL);
+#if CYTHON_COMPILING_IN_LIMITED_API
+ py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL);
+ if (!py_name) return NULL;
+ PyErr_Format(PyExc_TypeError,
+ "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ py_name, size);
+ Py_DECREF(py_name);
+#else
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ f->m_ml->ml_name, size);
+#endif
+ return NULL;
+ }
+ break;
+ case METH_O:
+ if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
+#if CYTHON_ASSUME_SAFE_MACROS
+ size = PyTuple_GET_SIZE(arg);
+#else
+ size = PyTuple_Size(arg);
+ if (unlikely(size < 0)) return NULL;
+#endif
+ if (likely(size == 1)) {
+ PyObject *result, *arg0;
+ #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ arg0 = PyTuple_GET_ITEM(arg, 0);
+ #else
+ arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL;
+ #endif
+ result = (*meth)(self, arg0);
+ #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
+ Py_DECREF(arg0);
+ #endif
+ return result;
+ }
+#if CYTHON_COMPILING_IN_LIMITED_API
+ py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL);
+ if (!py_name) return NULL;
+ PyErr_Format(PyExc_TypeError,
+ "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ py_name, size);
+ Py_DECREF(py_name);
+#else
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ f->m_ml->ml_name, size);
+#endif
+ return NULL;
+ }
+ break;
+ default:
+ PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction");
+ return NULL;
+ }
+#if CYTHON_COMPILING_IN_LIMITED_API
+ py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL);
+ if (!py_name) return NULL;
+ PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments",
+ py_name);
+ Py_DECREF(py_name);
+#else
+ PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments",
+ f->m_ml->ml_name);
+#endif
+ return NULL;
+}
+static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+ PyObject *self, *result;
+#if CYTHON_COMPILING_IN_LIMITED_API
+ self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func);
+ if (unlikely(!self) && PyErr_Occurred()) return NULL;
+#else
+ self = ((PyCFunctionObject*)func)->m_self;
+#endif
+ result = __Pyx_CyFunction_CallMethod(func, self, arg, kw);
+ return result;
+}
+static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) {
+ PyObject *result;
+ __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func;
+#if CYTHON_METH_FASTCALL
+ __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc);
+ if (vc) {
+#if CYTHON_ASSUME_SAFE_MACROS
+ return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw);
+#else
+ (void) &__Pyx_PyVectorcall_FastCallDict;
+ return PyVectorcall_Call(func, args, kw);
+#endif
+ }
+#endif
+ if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) {
+ Py_ssize_t argc;
+ PyObject *new_args;
+ PyObject *self;
+#if CYTHON_ASSUME_SAFE_MACROS
+ argc = PyTuple_GET_SIZE(args);
+#else
+ argc = PyTuple_Size(args);
+ if (unlikely(!argc) < 0) return NULL;
+#endif
+ new_args = PyTuple_GetSlice(args, 1, argc);
+ if (unlikely(!new_args))
+ return NULL;
+ self = PyTuple_GetItem(args, 0);
+ if (unlikely(!self)) {
+ Py_DECREF(new_args);
+#if PY_MAJOR_VERSION > 2
+ PyErr_Format(PyExc_TypeError,
+ "unbound method %.200S() needs an argument",
+ cyfunc->func_qualname);
+#else
+ PyErr_SetString(PyExc_TypeError,
+ "unbound method needs an argument");
+#endif
+ return NULL;
+ }
+ result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw);
+ Py_DECREF(new_args);
+ } else {
+ result = __Pyx_CyFunction_Call(func, args, kw);
+ }
+ return result;
+}
+#if CYTHON_METH_FASTCALL
+static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames)
+{
+ int ret = 0;
+ if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) {
+ if (unlikely(nargs < 1)) {
+ PyErr_Format(PyExc_TypeError, "%.200s() needs an argument",
+ ((PyCFunctionObject*)cyfunc)->m_ml->ml_name);
+ return -1;
+ }
+ ret = 1;
+ }
+ if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) {
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name);
+ return -1;
+ }
+ return ret;
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+ __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+ PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+#if CYTHON_BACKPORT_VECTORCALL
+ Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+ Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+ PyObject *self;
+ switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) {
+ case 1:
+ self = args[0];
+ args += 1;
+ nargs -= 1;
+ break;
+ case 0:
+ self = ((PyCFunctionObject*)cyfunc)->m_self;
+ break;
+ default:
+ return NULL;
+ }
+ if (unlikely(nargs != 0)) {
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ def->ml_name, nargs);
+ return NULL;
+ }
+ return def->ml_meth(self, NULL);
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+ __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+ PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+#if CYTHON_BACKPORT_VECTORCALL
+ Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+ Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+ PyObject *self;
+ switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) {
+ case 1:
+ self = args[0];
+ args += 1;
+ nargs -= 1;
+ break;
+ case 0:
+ self = ((PyCFunctionObject*)cyfunc)->m_self;
+ break;
+ default:
+ return NULL;
+ }
+ if (unlikely(nargs != 1)) {
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ def->ml_name, nargs);
+ return NULL;
+ }
+ return def->ml_meth(self, args[0]);
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+ __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+ PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+#if CYTHON_BACKPORT_VECTORCALL
+ Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+ Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+ PyObject *self;
+ switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) {
+ case 1:
+ self = args[0];
+ args += 1;
+ nargs -= 1;
+ break;
+ case 0:
+ self = ((PyCFunctionObject*)cyfunc)->m_self;
+ break;
+ default:
+ return NULL;
+ }
+ return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames);
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+ __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+ PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+ PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc);
+#if CYTHON_BACKPORT_VECTORCALL
+ Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+ Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+ PyObject *self;
+ switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) {
+ case 1:
+ self = args[0];
+ args += 1;
+ nargs -= 1;
+ break;
+ case 0:
+ self = ((PyCFunctionObject*)cyfunc)->m_self;
+ break;
+ default:
+ return NULL;
+ }
+ return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames);
+}
+#endif
+#if CYTHON_USE_TYPE_SPECS
+static PyType_Slot __pyx_CyFunctionType_slots[] = {
+ {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc},
+ {Py_tp_repr, (void *)__Pyx_CyFunction_repr},
+ {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod},
+ {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse},
+ {Py_tp_clear, (void *)__Pyx_CyFunction_clear},
+ {Py_tp_methods, (void *)__pyx_CyFunction_methods},
+ {Py_tp_members, (void *)__pyx_CyFunction_members},
+ {Py_tp_getset, (void *)__pyx_CyFunction_getsets},
+ {Py_tp_descr_get, (void *)__Pyx_PyMethod_New},
+ {0, 0},
+};
+static PyType_Spec __pyx_CyFunctionType_spec = {
+ __PYX_TYPE_MODULE_PREFIX "cython_function_or_method",
+ sizeof(__pyx_CyFunctionObject),
+ 0,
+#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR
+ Py_TPFLAGS_METHOD_DESCRIPTOR |
+#endif
+#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL)
+ _Py_TPFLAGS_HAVE_VECTORCALL |
+#endif
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
+ __pyx_CyFunctionType_slots
+};
+#else
+static PyTypeObject __pyx_CyFunctionType_type = {
+ PyVarObject_HEAD_INIT(0, 0)
+ __PYX_TYPE_MODULE_PREFIX "cython_function_or_method",
+ sizeof(__pyx_CyFunctionObject),
+ 0,
+ (destructor) __Pyx_CyFunction_dealloc,
+#if !CYTHON_METH_FASTCALL
+ 0,
+#elif CYTHON_BACKPORT_VECTORCALL
+ (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall),
+#else
+ offsetof(PyCFunctionObject, vectorcall),
+#endif
+ 0,
+ 0,
+#if PY_MAJOR_VERSION < 3
+ 0,
+#else
+ 0,
+#endif
+ (reprfunc) __Pyx_CyFunction_repr,
+ 0,
+ 0,
+ 0,
+ 0,
+ __Pyx_CyFunction_CallAsMethod,
+ 0,
+ 0,
+ 0,
+ 0,
+#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR
+ Py_TPFLAGS_METHOD_DESCRIPTOR |
+#endif
+#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL
+ _Py_TPFLAGS_HAVE_VECTORCALL |
+#endif
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
+ 0,
+ (traverseproc) __Pyx_CyFunction_traverse,
+ (inquiry) __Pyx_CyFunction_clear,
+ 0,
+#if PY_VERSION_HEX < 0x030500A0
+ offsetof(__pyx_CyFunctionObject, func_weakreflist),
+#else
+ offsetof(PyCFunctionObject, m_weakreflist),
+#endif
+ 0,
+ 0,
+ __pyx_CyFunction_methods,
+ __pyx_CyFunction_members,
+ __pyx_CyFunction_getsets,
+ 0,
+ 0,
+ __Pyx_PyMethod_New,
+ 0,
+ offsetof(__pyx_CyFunctionObject, func_dict),
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+#if PY_VERSION_HEX >= 0x030400a1
+ 0,
+#endif
+#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
+ 0,
+#endif
+#if __PYX_NEED_TP_PRINT_SLOT
+ 0,
+#endif
+#if PY_VERSION_HEX >= 0x030C0000
+ 0,
+#endif
+#if PY_VERSION_HEX >= 0x030d00A4
+ 0,
+#endif
+#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000
+ 0,
+#endif
+};
+#endif
+static int __pyx_CyFunction_init(PyObject *module) {
+#if CYTHON_USE_TYPE_SPECS
+ __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL);
+#else
+ CYTHON_UNUSED_VAR(module);
+ __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type);
+#endif
+ if (unlikely(__pyx_CyFunctionType == NULL)) {
+ return -1;
+ }
+ return 0;
+}
+static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) {
+ __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+ m->defaults = PyObject_Malloc(size);
+ if (unlikely(!m->defaults))
+ return PyErr_NoMemory();
+ memset(m->defaults, 0, size);
+ m->defaults_pyobjects = pyobjects;
+ m->defaults_size = size;
+ return m->defaults;
+}
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) {
+ __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+ m->defaults_tuple = tuple;
+ Py_INCREF(tuple);
+}
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) {
+ __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+ m->defaults_kwdict = dict;
+ Py_INCREF(dict);
+}
+static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) {
+ __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+ m->func_annotations = dict;
+ Py_INCREF(dict);
+}
+
+/* CythonFunction */
+static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname,
+ PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
+ PyObject *op = __Pyx_CyFunction_Init(
+ PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType),
+ ml, flags, qualname, closure, module, globals, code
+ );
+ if (likely(op)) {
+ PyObject_GC_Track(op);
+ }
+ return op;
+}
+
+/* RaiseUnexpectedTypeError */
+static int
+__Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj)
+{
+ __Pyx_TypeName obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj));
+ PyErr_Format(PyExc_TypeError, "Expected %s, got " __Pyx_FMT_TYPENAME,
+ expected, obj_type_name);
+ __Pyx_DECREF_TypeName(obj_type_name);
+ return 0;
+}
+
+/* UnpackUnboundCMethod */
+static PyObject *__Pyx_SelflessCall(PyObject *method, PyObject *args, PyObject *kwargs) {
+ PyObject *result;
+ PyObject *selfless_args = PyTuple_GetSlice(args, 1, PyTuple_Size(args));
+ if (unlikely(!selfless_args)) return NULL;
+ result = PyObject_Call(method, selfless_args, kwargs);
+ Py_DECREF(selfless_args);
+ return result;
+}
+static PyMethodDef __Pyx_UnboundCMethod_Def = {
+ "CythonUnboundCMethod",
+ __PYX_REINTERPRET_FUNCION(PyCFunction, __Pyx_SelflessCall),
+ METH_VARARGS | METH_KEYWORDS,
+ NULL
+};
+static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) {
+ PyObject *method;
+ method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name);
+ if (unlikely(!method))
+ return -1;
+ target->method = method;
+#if CYTHON_COMPILING_IN_CPYTHON
+ #if PY_MAJOR_VERSION >= 3
+ if (likely(__Pyx_TypeCheck(method, &PyMethodDescr_Type)))
+ #else
+ if (likely(!__Pyx_CyOrPyCFunction_Check(method)))
+ #endif
+ {
+ PyMethodDescrObject *descr = (PyMethodDescrObject*) method;
+ target->func = descr->d_method->ml_meth;
+ target->flag = descr->d_method->ml_flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_STACKLESS);
+ } else
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+#else
+ if (PyCFunction_Check(method))
+#endif
+ {
+ PyObject *self;
+ int self_found;
+#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY
+ self = PyObject_GetAttrString(method, "__self__");
+ if (!self) {
+ PyErr_Clear();
+ }
+#else
+ self = PyCFunction_GET_SELF(method);
+#endif
+ self_found = (self && self != Py_None);
+#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY
+ Py_XDECREF(self);
+#endif
+ if (self_found) {
+ PyObject *unbound_method = PyCFunction_New(&__Pyx_UnboundCMethod_Def, method);
+ if (unlikely(!unbound_method)) return -1;
+ Py_DECREF(method);
+ target->method = unbound_method;
+ }
+ }
+ return 0;
+}
+
+/* CallUnboundCMethod0 */
+static PyObject* __Pyx__CallUnboundCMethod0(__Pyx_CachedCFunction* cfunc, PyObject* self) {
+ PyObject *args, *result = NULL;
+ if (unlikely(!cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL;
+#if CYTHON_ASSUME_SAFE_MACROS
+ args = PyTuple_New(1);
+ if (unlikely(!args)) goto bad;
+ Py_INCREF(self);
+ PyTuple_SET_ITEM(args, 0, self);
+#else
+ args = PyTuple_Pack(1, self);
+ if (unlikely(!args)) goto bad;
+#endif
+ result = __Pyx_PyObject_Call(cfunc->method, args, NULL);
+ Py_DECREF(args);
+bad:
+ return result;
+}
+
+/* set_iter */
+static CYTHON_INLINE PyObject* __Pyx_set_iterator(PyObject* iterable, int is_set,
+ Py_ssize_t* p_orig_length, int* p_source_is_set) {
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000
+ is_set = is_set || likely(PySet_CheckExact(iterable) || PyFrozenSet_CheckExact(iterable));
+ *p_source_is_set = is_set;
+ if (likely(is_set)) {
+ *p_orig_length = PySet_Size(iterable);
+ Py_INCREF(iterable);
+ return iterable;
+ }
+#else
+ CYTHON_UNUSED_VAR(is_set);
+ *p_source_is_set = 0;
+#endif
+ *p_orig_length = 0;
+ return PyObject_GetIter(iterable);
+}
+static CYTHON_INLINE int __Pyx_set_iter_next(
+ PyObject* iter_obj, Py_ssize_t orig_length,
+ Py_ssize_t* ppos, PyObject **value,
+ int source_is_set) {
+ if (!CYTHON_COMPILING_IN_CPYTHON || PY_VERSION_HEX >= 0x030d0000 || unlikely(!source_is_set)) {
+ *value = PyIter_Next(iter_obj);
+ if (unlikely(!*value)) {
+ return __Pyx_IterFinish();
+ }
+ CYTHON_UNUSED_VAR(orig_length);
+ CYTHON_UNUSED_VAR(ppos);
+ return 1;
+ }
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000
+ if (unlikely(PySet_GET_SIZE(iter_obj) != orig_length)) {
+ PyErr_SetString(
+ PyExc_RuntimeError,
+ "set changed size during iteration");
+ return -1;
+ }
+ {
+ Py_hash_t hash;
+ int ret = _PySet_NextEntry(iter_obj, ppos, value, &hash);
+ assert (ret != -1);
+ if (likely(ret)) {
+ Py_INCREF(*value);
+ return 1;
+ }
+ }
+#endif
+ return 0;
+}
+
+/* CIntToDigits */
+static const char DIGIT_PAIRS_10[2*10*10+1] = {
+ "00010203040506070809"
+ "10111213141516171819"
+ "20212223242526272829"
+ "30313233343536373839"
+ "40414243444546474849"
+ "50515253545556575859"
+ "60616263646566676869"
+ "70717273747576777879"
+ "80818283848586878889"
+ "90919293949596979899"
+};
+static const char DIGIT_PAIRS_8[2*8*8+1] = {
+ "0001020304050607"
+ "1011121314151617"
+ "2021222324252627"
+ "3031323334353637"
+ "4041424344454647"
+ "5051525354555657"
+ "6061626364656667"
+ "7071727374757677"
+};
+static const char DIGITS_HEX[2*16+1] = {
+ "0123456789abcdef"
+ "0123456789ABCDEF"
+};
+
+/* BuildPyUnicode */
+static PyObject* __Pyx_PyUnicode_BuildFromAscii(Py_ssize_t ulength, char* chars, int clength,
+ int prepend_sign, char padding_char) {
+ PyObject *uval;
+ Py_ssize_t uoffset = ulength - clength;
+#if CYTHON_USE_UNICODE_INTERNALS
+ Py_ssize_t i;
+#if CYTHON_PEP393_ENABLED
+ void *udata;
+ uval = PyUnicode_New(ulength, 127);
+ if (unlikely(!uval)) return NULL;
+ udata = PyUnicode_DATA(uval);
+#else
+ Py_UNICODE *udata;
+ uval = PyUnicode_FromUnicode(NULL, ulength);
+ if (unlikely(!uval)) return NULL;
+ udata = PyUnicode_AS_UNICODE(uval);
+#endif
+ if (uoffset > 0) {
+ i = 0;
+ if (prepend_sign) {
+ __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, 0, '-');
+ i++;
+ }
+ for (; i < uoffset; i++) {
+ __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, i, padding_char);
+ }
+ }
+ for (i=0; i < clength; i++) {
+ __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, uoffset+i, chars[i]);
+ }
+#else
+ {
+ PyObject *sign = NULL, *padding = NULL;
+ uval = NULL;
+ if (uoffset > 0) {
+ prepend_sign = !!prepend_sign;
+ if (uoffset > prepend_sign) {
+ padding = PyUnicode_FromOrdinal(padding_char);
+ if (likely(padding) && uoffset > prepend_sign + 1) {
+ PyObject *tmp;
+ PyObject *repeat = PyInt_FromSsize_t(uoffset - prepend_sign);
+ if (unlikely(!repeat)) goto done_or_error;
+ tmp = PyNumber_Multiply(padding, repeat);
+ Py_DECREF(repeat);
+ Py_DECREF(padding);
+ padding = tmp;
+ }
+ if (unlikely(!padding)) goto done_or_error;
+ }
+ if (prepend_sign) {
+ sign = PyUnicode_FromOrdinal('-');
+ if (unlikely(!sign)) goto done_or_error;
+ }
+ }
+ uval = PyUnicode_DecodeASCII(chars, clength, NULL);
+ if (likely(uval) && padding) {
+ PyObject *tmp = PyNumber_Add(padding, uval);
+ Py_DECREF(uval);
+ uval = tmp;
+ }
+ if (likely(uval) && sign) {
+ PyObject *tmp = PyNumber_Add(sign, uval);
+ Py_DECREF(uval);
+ uval = tmp;
+ }
+done_or_error:
+ Py_XDECREF(padding);
+ Py_XDECREF(sign);
+ }
+#endif
+ return uval;
+}
+
+/* CIntToPyUnicode */
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_int(int value, Py_ssize_t width, char padding_char, char format_char) {
+ char digits[sizeof(int)*3+2];
+ char *dpos, *end = digits + sizeof(int)*3+2;
+ const char *hex_digits = DIGITS_HEX;
+ Py_ssize_t length, ulength;
+ int prepend_sign, last_one_off;
+ int remaining;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+ const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+ const int is_unsigned = neg_one > const_zero;
+ if (format_char == 'X') {
+ hex_digits += 16;
+ format_char = 'x';
+ }
+ remaining = value;
+ last_one_off = 0;
+ dpos = end;
+ do {
+ int digit_pos;
+ switch (format_char) {
+ case 'o':
+ digit_pos = abs((int)(remaining % (8*8)));
+ remaining = (int) (remaining / (8*8));
+ dpos -= 2;
+ memcpy(dpos, DIGIT_PAIRS_8 + digit_pos * 2, 2);
+ last_one_off = (digit_pos < 8);
+ break;
+ case 'd':
+ digit_pos = abs((int)(remaining % (10*10)));
+ remaining = (int) (remaining / (10*10));
+ dpos -= 2;
+ memcpy(dpos, DIGIT_PAIRS_10 + digit_pos * 2, 2);
+ last_one_off = (digit_pos < 10);
+ break;
+ case 'x':
+ *(--dpos) = hex_digits[abs((int)(remaining % 16))];
+ remaining = (int) (remaining / 16);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ } while (unlikely(remaining != 0));
+ assert(!last_one_off || *dpos == '0');
+ dpos += last_one_off;
+ length = end - dpos;
+ ulength = length;
+ prepend_sign = 0;
+ if (!is_unsigned && value <= neg_one) {
+ if (padding_char == ' ' || width <= length + 1) {
+ *(--dpos) = '-';
+ ++length;
+ } else {
+ prepend_sign = 1;
+ }
+ ++ulength;
+ }
+ if (width > ulength) {
+ ulength = width;
+ }
+ if (ulength == 1) {
+ return PyUnicode_FromOrdinal(*dpos);
+ }
+ return __Pyx_PyUnicode_BuildFromAscii(ulength, dpos, (int) length, prepend_sign, padding_char);
+}
+
+/* JoinPyUnicode */
+static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength,
+ Py_UCS4 max_char) {
+#if CYTHON_USE_UNICODE_INTERNALS && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ PyObject *result_uval;
+ int result_ukind, kind_shift;
+ Py_ssize_t i, char_pos;
+ void *result_udata;
+ CYTHON_MAYBE_UNUSED_VAR(max_char);
+#if CYTHON_PEP393_ENABLED
+ result_uval = PyUnicode_New(result_ulength, max_char);
+ if (unlikely(!result_uval)) return NULL;
+ result_ukind = (max_char <= 255) ? PyUnicode_1BYTE_KIND : (max_char <= 65535) ? PyUnicode_2BYTE_KIND : PyUnicode_4BYTE_KIND;
+ kind_shift = (result_ukind == PyUnicode_4BYTE_KIND) ? 2 : result_ukind - 1;
+ result_udata = PyUnicode_DATA(result_uval);
+#else
+ result_uval = PyUnicode_FromUnicode(NULL, result_ulength);
+ if (unlikely(!result_uval)) return NULL;
+ result_ukind = sizeof(Py_UNICODE);
+ kind_shift = (result_ukind == 4) ? 2 : result_ukind - 1;
+ result_udata = PyUnicode_AS_UNICODE(result_uval);
+#endif
+ assert(kind_shift == 2 || kind_shift == 1 || kind_shift == 0);
+ char_pos = 0;
+ for (i=0; i < value_count; i++) {
+ int ukind;
+ Py_ssize_t ulength;
+ void *udata;
+ PyObject *uval = PyTuple_GET_ITEM(value_tuple, i);
+ if (unlikely(__Pyx_PyUnicode_READY(uval)))
+ goto bad;
+ ulength = __Pyx_PyUnicode_GET_LENGTH(uval);
+ if (unlikely(!ulength))
+ continue;
+ if (unlikely((PY_SSIZE_T_MAX >> kind_shift) - ulength < char_pos))
+ goto overflow;
+ ukind = __Pyx_PyUnicode_KIND(uval);
+ udata = __Pyx_PyUnicode_DATA(uval);
+ if (!CYTHON_PEP393_ENABLED || ukind == result_ukind) {
+ memcpy((char *)result_udata + (char_pos << kind_shift), udata, (size_t) (ulength << kind_shift));
+ } else {
+ #if PY_VERSION_HEX >= 0x030d0000
+ if (unlikely(PyUnicode_CopyCharacters(result_uval, char_pos, uval, 0, ulength) < 0)) goto bad;
+ #elif CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030300F0 || defined(_PyUnicode_FastCopyCharacters)
+ _PyUnicode_FastCopyCharacters(result_uval, char_pos, uval, 0, ulength);
+ #else
+ Py_ssize_t j;
+ for (j=0; j < ulength; j++) {
+ Py_UCS4 uchar = __Pyx_PyUnicode_READ(ukind, udata, j);
+ __Pyx_PyUnicode_WRITE(result_ukind, result_udata, char_pos+j, uchar);
+ }
+ #endif
+ }
+ char_pos += ulength;
+ }
+ return result_uval;
+overflow:
+ PyErr_SetString(PyExc_OverflowError, "join() result is too long for a Python string");
+bad:
+ Py_DECREF(result_uval);
+ return NULL;
+#else
+ CYTHON_UNUSED_VAR(max_char);
+ CYTHON_UNUSED_VAR(result_ulength);
+ CYTHON_UNUSED_VAR(value_count);
+ return PyUnicode_Join(__pyx_empty_unicode, value_tuple);
+#endif
+}
+
+/* UnicodeConcatInPlace */
+# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+static int
+__Pyx_unicode_modifiable(PyObject *unicode)
+{
+ if (Py_REFCNT(unicode) != 1)
+ return 0;
+ if (!PyUnicode_CheckExact(unicode))
+ return 0;
+ if (PyUnicode_CHECK_INTERNED(unicode))
+ return 0;
+ return 1;
+}
+static CYTHON_INLINE PyObject *__Pyx_PyUnicode_ConcatInPlaceImpl(PyObject **p_left, PyObject *right
+ #if CYTHON_REFNANNY
+ , void* __pyx_refnanny
+ #endif
+ ) {
+ PyObject *left = *p_left;
+ Py_ssize_t left_len, right_len, new_len;
+ if (unlikely(__Pyx_PyUnicode_READY(left) == -1))
+ return NULL;
+ if (unlikely(__Pyx_PyUnicode_READY(right) == -1))
+ return NULL;
+ left_len = PyUnicode_GET_LENGTH(left);
+ if (left_len == 0) {
+ Py_INCREF(right);
+ return right;
+ }
+ right_len = PyUnicode_GET_LENGTH(right);
+ if (right_len == 0) {
+ Py_INCREF(left);
+ return left;
+ }
+ if (unlikely(left_len > PY_SSIZE_T_MAX - right_len)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "strings are too large to concat");
+ return NULL;
+ }
+ new_len = left_len + right_len;
+ if (__Pyx_unicode_modifiable(left)
+ && PyUnicode_CheckExact(right)
+ && PyUnicode_KIND(right) <= PyUnicode_KIND(left)
+ && !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right))) {
+ int ret;
+ __Pyx_GIVEREF(*p_left);
+ ret = PyUnicode_Resize(p_left, new_len);
+ __Pyx_GOTREF(*p_left);
+ if (unlikely(ret != 0))
+ return NULL;
+ #if PY_VERSION_HEX >= 0x030d0000
+ if (unlikely(PyUnicode_CopyCharacters(*p_left, left_len, right, 0, right_len) < 0)) return NULL;
+ #else
+ _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);
+ #endif
+ __Pyx_INCREF(*p_left);
+ __Pyx_GIVEREF(*p_left);
+ return *p_left;
+ } else {
+ return __Pyx_PyUnicode_Concat(left, right);
+ }
+ }
+#endif
+
+/* CallUnboundCMethod1 */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg) {
+ if (likely(cfunc->func)) {
+ int flag = cfunc->flag;
+ if (flag == METH_O) {
+ return (*(cfunc->func))(self, arg);
+ } else if ((PY_VERSION_HEX >= 0x030600B1) && flag == METH_FASTCALL) {
+ #if PY_VERSION_HEX >= 0x030700A0
+ return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, &arg, 1);
+ #else
+ return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL);
+ #endif
+ } else if ((PY_VERSION_HEX >= 0x030700A0) && flag == (METH_FASTCALL | METH_KEYWORDS)) {
+ return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL);
+ }
+ }
+ return __Pyx__CallUnboundCMethod1(cfunc, self, arg);
+}
+#endif
+static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg){
+ PyObject *args, *result = NULL;
+ if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL;
+#if CYTHON_COMPILING_IN_CPYTHON
+ if (cfunc->func && (cfunc->flag & METH_VARARGS)) {
+ args = PyTuple_New(1);
+ if (unlikely(!args)) goto bad;
+ Py_INCREF(arg);
+ PyTuple_SET_ITEM(args, 0, arg);
+ if (cfunc->flag & METH_KEYWORDS)
+ result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL);
+ else
+ result = (*cfunc->func)(self, args);
+ } else {
+ args = PyTuple_New(2);
+ if (unlikely(!args)) goto bad;
+ Py_INCREF(self);
+ PyTuple_SET_ITEM(args, 0, self);
+ Py_INCREF(arg);
+ PyTuple_SET_ITEM(args, 1, arg);
+ result = __Pyx_PyObject_Call(cfunc->method, args, NULL);
+ }
+#else
+ args = PyTuple_Pack(2, self, arg);
+ if (unlikely(!args)) goto bad;
+ result = __Pyx_PyObject_Call(cfunc->method, args, NULL);
+#endif
+bad:
+ Py_XDECREF(args);
+ return result;
+}
+
+/* CallUnboundCMethod2 */
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030600B1
+static CYTHON_INLINE PyObject *__Pyx_CallUnboundCMethod2(__Pyx_CachedCFunction *cfunc, PyObject *self, PyObject *arg1, PyObject *arg2) {
+ if (likely(cfunc->func)) {
+ PyObject *args[2] = {arg1, arg2};
+ if (cfunc->flag == METH_FASTCALL) {
+ #if PY_VERSION_HEX >= 0x030700A0
+ return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, args, 2);
+ #else
+ return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, 2, NULL);
+ #endif
+ }
+ #if PY_VERSION_HEX >= 0x030700A0
+ if (cfunc->flag == (METH_FASTCALL | METH_KEYWORDS))
+ return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, 2, NULL);
+ #endif
+ }
+ return __Pyx__CallUnboundCMethod2(cfunc, self, arg1, arg2);
+}
+#endif
+static PyObject* __Pyx__CallUnboundCMethod2(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg1, PyObject* arg2){
+ PyObject *args, *result = NULL;
+ if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL;
+#if CYTHON_COMPILING_IN_CPYTHON
+ if (cfunc->func && (cfunc->flag & METH_VARARGS)) {
+ args = PyTuple_New(2);
+ if (unlikely(!args)) goto bad;
+ Py_INCREF(arg1);
+ PyTuple_SET_ITEM(args, 0, arg1);
+ Py_INCREF(arg2);
+ PyTuple_SET_ITEM(args, 1, arg2);
+ if (cfunc->flag & METH_KEYWORDS)
+ result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL);
+ else
+ result = (*cfunc->func)(self, args);
+ } else {
+ args = PyTuple_New(3);
+ if (unlikely(!args)) goto bad;
+ Py_INCREF(self);
+ PyTuple_SET_ITEM(args, 0, self);
+ Py_INCREF(arg1);
+ PyTuple_SET_ITEM(args, 1, arg1);
+ Py_INCREF(arg2);
+ PyTuple_SET_ITEM(args, 2, arg2);
+ result = __Pyx_PyObject_Call(cfunc->method, args, NULL);
+ }
+#else
+ args = PyTuple_Pack(3, self, arg1, arg2);
+ if (unlikely(!args)) goto bad;
+ result = __Pyx_PyObject_Call(cfunc->method, args, NULL);
+#endif
+bad:
+ Py_XDECREF(args);
+ return result;
+}
+
+/* dict_getitem_default */
+static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) {
+ PyObject* value;
+#if PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000)
+ value = PyDict_GetItemWithError(d, key);
+ if (unlikely(!value)) {
+ if (unlikely(PyErr_Occurred()))
+ return NULL;
+ value = default_value;
+ }
+ Py_INCREF(value);
+ if ((1));
+#else
+ if (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key)) {
+ value = PyDict_GetItem(d, key);
+ if (unlikely(!value)) {
+ value = default_value;
+ }
+ Py_INCREF(value);
+ }
+#endif
+ else {
+ if (default_value == Py_None)
+ value = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyDict_Type_get, d, key);
+ else
+ value = __Pyx_CallUnboundCMethod2(&__pyx_umethod_PyDict_Type_get, d, key, default_value);
+ }
+ return value;
+}
+
+/* GetItemInt */
+static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
+ PyObject *r;
+ if (unlikely(!j)) return NULL;
+ r = PyObject_GetItem(o, j);
+ Py_DECREF(j);
+ return r;
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+ CYTHON_NCP_UNUSED int wraparound,
+ CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ Py_ssize_t wrapped_i = i;
+ if (wraparound & unlikely(i < 0)) {
+ wrapped_i += PyList_GET_SIZE(o);
+ }
+ if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) {
+ PyObject *r = PyList_GET_ITEM(o, wrapped_i);
+ Py_INCREF(r);
+ return r;
+ }
+ return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+ return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+ CYTHON_NCP_UNUSED int wraparound,
+ CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ Py_ssize_t wrapped_i = i;
+ if (wraparound & unlikely(i < 0)) {
+ wrapped_i += PyTuple_GET_SIZE(o);
+ }
+ if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) {
+ PyObject *r = PyTuple_GET_ITEM(o, wrapped_i);
+ Py_INCREF(r);
+ return r;
+ }
+ return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+ return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list,
+ CYTHON_NCP_UNUSED int wraparound,
+ CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
+ if (is_list || PyList_CheckExact(o)) {
+ Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
+ if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) {
+ PyObject *r = PyList_GET_ITEM(o, n);
+ Py_INCREF(r);
+ return r;
+ }
+ }
+ else if (PyTuple_CheckExact(o)) {
+ Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
+ if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) {
+ PyObject *r = PyTuple_GET_ITEM(o, n);
+ Py_INCREF(r);
+ return r;
+ }
+ } else {
+ PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping;
+ PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence;
+ if (mm && mm->mp_subscript) {
+ PyObject *r, *key = PyInt_FromSsize_t(i);
+ if (unlikely(!key)) return NULL;
+ r = mm->mp_subscript(o, key);
+ Py_DECREF(key);
+ return r;
+ }
+ if (likely(sm && sm->sq_item)) {
+ if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) {
+ Py_ssize_t l = sm->sq_length(o);
+ if (likely(l >= 0)) {
+ i += l;
+ } else {
+ if (!PyErr_ExceptionMatches(PyExc_OverflowError))
+ return NULL;
+ PyErr_Clear();
+ }
+ }
+ return sm->sq_item(o, i);
+ }
+ }
+#else
+ if (is_list || !PyMapping_Check(o)) {
+ return PySequence_GetItem(o, i);
+ }
+#endif
+ return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+
+/* PyUnicode_Unicode */
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Unicode(PyObject *obj) {
+ if (unlikely(obj == Py_None))
+ obj = __pyx_kp_u_None;
+ return __Pyx_NewRef(obj);
+}
+
+/* ArgTypeTest */
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact)
+{
+ __Pyx_TypeName type_name;
+ __Pyx_TypeName obj_type_name;
+ if (unlikely(!type)) {
+ PyErr_SetString(PyExc_SystemError, "Missing type object");
+ return 0;
+ }
+ else if (exact) {
+ #if PY_MAJOR_VERSION == 2
+ if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
+ #endif
+ }
+ else {
+ if (likely(__Pyx_TypeCheck(obj, type))) return 1;
+ }
+ type_name = __Pyx_PyType_GetName(type);
+ obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj));
+ PyErr_Format(PyExc_TypeError,
+ "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME
+ ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name);
+ __Pyx_DECREF_TypeName(type_name);
+ __Pyx_DECREF_TypeName(obj_type_name);
+ return 0;
+}
+
+/* KeywordStringCheck */
+static int __Pyx_CheckKeywordStrings(
+ PyObject *kw,
+ const char* function_name,
+ int kw_allowed)
+{
+ PyObject* key = 0;
+ Py_ssize_t pos = 0;
+#if CYTHON_COMPILING_IN_PYPY
+ if (!kw_allowed && PyDict_Next(kw, &pos, &key, 0))
+ goto invalid_keyword;
+ return 1;
+#else
+ if (CYTHON_METH_FASTCALL && likely(PyTuple_Check(kw))) {
+ Py_ssize_t kwsize;
+#if CYTHON_ASSUME_SAFE_MACROS
+ kwsize = PyTuple_GET_SIZE(kw);
+#else
+ kwsize = PyTuple_Size(kw);
+ if (kwsize < 0) return 0;
+#endif
+ if (unlikely(kwsize == 0))
+ return 1;
+ if (!kw_allowed) {
+#if CYTHON_ASSUME_SAFE_MACROS
+ key = PyTuple_GET_ITEM(kw, 0);
+#else
+ key = PyTuple_GetItem(kw, pos);
+ if (!key) return 0;
+#endif
+ goto invalid_keyword;
+ }
+#if PY_VERSION_HEX < 0x03090000
+ for (pos = 0; pos < kwsize; pos++) {
+#if CYTHON_ASSUME_SAFE_MACROS
+ key = PyTuple_GET_ITEM(kw, pos);
+#else
+ key = PyTuple_GetItem(kw, pos);
+ if (!key) return 0;
+#endif
+ if (unlikely(!PyUnicode_Check(key)))
+ goto invalid_keyword_type;
+ }
+#endif
+ return 1;
+ }
+ while (PyDict_Next(kw, &pos, &key, 0)) {
+ #if PY_MAJOR_VERSION < 3
+ if (unlikely(!PyString_Check(key)))
+ #endif
+ if (unlikely(!PyUnicode_Check(key)))
+ goto invalid_keyword_type;
+ }
+ if (!kw_allowed && unlikely(key))
+ goto invalid_keyword;
+ return 1;
+invalid_keyword_type:
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() keywords must be strings", function_name);
+ return 0;
+#endif
+invalid_keyword:
+ #if PY_MAJOR_VERSION < 3
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() got an unexpected keyword argument '%.200s'",
+ function_name, PyString_AsString(key));
+ #else
+ PyErr_Format(PyExc_TypeError,
+ "%s() got an unexpected keyword argument '%U'",
+ function_name, key);
+ #endif
+ return 0;
+}
+
+/* RaiseException */
+#if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+ __Pyx_PyThreadState_declare
+ CYTHON_UNUSED_VAR(cause);
+ Py_XINCREF(type);
+ if (!value || value == Py_None)
+ value = NULL;
+ else
+ Py_INCREF(value);
+ if (!tb || tb == Py_None)
+ tb = NULL;
+ else {
+ Py_INCREF(tb);
+ if (!PyTraceBack_Check(tb)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: arg 3 must be a traceback or None");
+ goto raise_error;
+ }
+ }
+ if (PyType_Check(type)) {
+#if CYTHON_COMPILING_IN_PYPY
+ if (!value) {
+ Py_INCREF(Py_None);
+ value = Py_None;
+ }
+#endif
+ PyErr_NormalizeException(&type, &value, &tb);
+ } else {
+ if (value) {
+ PyErr_SetString(PyExc_TypeError,
+ "instance exception may not have a separate value");
+ goto raise_error;
+ }
+ value = type;
+ type = (PyObject*) Py_TYPE(type);
+ Py_INCREF(type);
+ if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: exception class must be a subclass of BaseException");
+ goto raise_error;
+ }
+ }
+ __Pyx_PyThreadState_assign
+ __Pyx_ErrRestore(type, value, tb);
+ return;
+raise_error:
+ Py_XDECREF(value);
+ Py_XDECREF(type);
+ Py_XDECREF(tb);
+ return;
+}
+#else
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+ PyObject* owned_instance = NULL;
+ if (tb == Py_None) {
+ tb = 0;
+ } else if (tb && !PyTraceBack_Check(tb)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: arg 3 must be a traceback or None");
+ goto bad;
+ }
+ if (value == Py_None)
+ value = 0;
+ if (PyExceptionInstance_Check(type)) {
+ if (value) {
+ PyErr_SetString(PyExc_TypeError,
+ "instance exception may not have a separate value");
+ goto bad;
+ }
+ value = type;
+ type = (PyObject*) Py_TYPE(value);
+ } else if (PyExceptionClass_Check(type)) {
+ PyObject *instance_class = NULL;
+ if (value && PyExceptionInstance_Check(value)) {
+ instance_class = (PyObject*) Py_TYPE(value);
+ if (instance_class != type) {
+ int is_subclass = PyObject_IsSubclass(instance_class, type);
+ if (!is_subclass) {
+ instance_class = NULL;
+ } else if (unlikely(is_subclass == -1)) {
+ goto bad;
+ } else {
+ type = instance_class;
+ }
+ }
+ }
+ if (!instance_class) {
+ PyObject *args;
+ if (!value)
+ args = PyTuple_New(0);
+ else if (PyTuple_Check(value)) {
+ Py_INCREF(value);
+ args = value;
+ } else
+ args = PyTuple_Pack(1, value);
+ if (!args)
+ goto bad;
+ owned_instance = PyObject_Call(type, args, NULL);
+ Py_DECREF(args);
+ if (!owned_instance)
+ goto bad;
+ value = owned_instance;
+ if (!PyExceptionInstance_Check(value)) {
+ PyErr_Format(PyExc_TypeError,
+ "calling %R should have returned an instance of "
+ "BaseException, not %R",
+ type, Py_TYPE(value));
+ goto bad;
+ }
+ }
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: exception class must be a subclass of BaseException");
+ goto bad;
+ }
+ if (cause) {
+ PyObject *fixed_cause;
+ if (cause == Py_None) {
+ fixed_cause = NULL;
+ } else if (PyExceptionClass_Check(cause)) {
+ fixed_cause = PyObject_CallObject(cause, NULL);
+ if (fixed_cause == NULL)
+ goto bad;
+ } else if (PyExceptionInstance_Check(cause)) {
+ fixed_cause = cause;
+ Py_INCREF(fixed_cause);
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "exception causes must derive from "
+ "BaseException");
+ goto bad;
+ }
+ PyException_SetCause(value, fixed_cause);
+ }
+ PyErr_SetObject(type, value);
+ if (tb) {
+ #if PY_VERSION_HEX >= 0x030C00A6
+ PyException_SetTraceback(value, tb);
+ #elif CYTHON_FAST_THREAD_STATE
+ PyThreadState *tstate = __Pyx_PyThreadState_Current;
+ PyObject* tmp_tb = tstate->curexc_traceback;
+ if (tb != tmp_tb) {
+ Py_INCREF(tb);
+ tstate->curexc_traceback = tb;
+ Py_XDECREF(tmp_tb);
+ }
+#else
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+ PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb);
+ Py_INCREF(tb);
+ PyErr_Restore(tmp_type, tmp_value, tb);
+ Py_XDECREF(tmp_tb);
+#endif
+ }
+bad:
+ Py_XDECREF(owned_instance);
+ return;
+}
+#endif
+
+/* ValidateBasesTuple */
+#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS
+static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases) {
+ Py_ssize_t i, n;
+#if CYTHON_ASSUME_SAFE_MACROS
+ n = PyTuple_GET_SIZE(bases);
+#else
+ n = PyTuple_Size(bases);
+ if (n < 0) return -1;
+#endif
+ for (i = 1; i < n; i++)
+ {
+#if CYTHON_AVOID_BORROWED_REFS
+ PyObject *b0 = PySequence_GetItem(bases, i);
+ if (!b0) return -1;
+#elif CYTHON_ASSUME_SAFE_MACROS
+ PyObject *b0 = PyTuple_GET_ITEM(bases, i);
+#else
+ PyObject *b0 = PyTuple_GetItem(bases, i);
+ if (!b0) return -1;
+#endif
+ PyTypeObject *b;
+#if PY_MAJOR_VERSION < 3
+ if (PyClass_Check(b0))
+ {
+ PyErr_Format(PyExc_TypeError, "base class '%.200s' is an old-style class",
+ PyString_AS_STRING(((PyClassObject*)b0)->cl_name));
+#if CYTHON_AVOID_BORROWED_REFS
+ Py_DECREF(b0);
+#endif
+ return -1;
+ }
+#endif
+ b = (PyTypeObject*) b0;
+ if (!__Pyx_PyType_HasFeature(b, Py_TPFLAGS_HEAPTYPE))
+ {
+ __Pyx_TypeName b_name = __Pyx_PyType_GetName(b);
+ PyErr_Format(PyExc_TypeError,
+ "base class '" __Pyx_FMT_TYPENAME "' is not a heap type", b_name);
+ __Pyx_DECREF_TypeName(b_name);
+#if CYTHON_AVOID_BORROWED_REFS
+ Py_DECREF(b0);
+#endif
+ return -1;
+ }
+ if (dictoffset == 0)
+ {
+ Py_ssize_t b_dictoffset = 0;
+#if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY
+ b_dictoffset = b->tp_dictoffset;
+#else
+ PyObject *py_b_dictoffset = PyObject_GetAttrString((PyObject*)b, "__dictoffset__");
+ if (!py_b_dictoffset) goto dictoffset_return;
+ b_dictoffset = PyLong_AsSsize_t(py_b_dictoffset);
+ Py_DECREF(py_b_dictoffset);
+ if (b_dictoffset == -1 && PyErr_Occurred()) goto dictoffset_return;
+#endif
+ if (b_dictoffset) {
+ {
+ __Pyx_TypeName b_name = __Pyx_PyType_GetName(b);
+ PyErr_Format(PyExc_TypeError,
+ "extension type '%.200s' has no __dict__ slot, "
+ "but base type '" __Pyx_FMT_TYPENAME "' has: "
+ "either add 'cdef dict __dict__' to the extension type "
+ "or add '__slots__ = [...]' to the base type",
+ type_name, b_name);
+ __Pyx_DECREF_TypeName(b_name);
+ }
+#if !(CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY)
+ dictoffset_return:
+#endif
+#if CYTHON_AVOID_BORROWED_REFS
+ Py_DECREF(b0);
+#endif
+ return -1;
+ }
+ }
+#if CYTHON_AVOID_BORROWED_REFS
+ Py_DECREF(b0);
+#endif
+ }
+ return 0;
+}
+#endif
+
+/* PyType_Ready */
+static int __Pyx_PyType_Ready(PyTypeObject *t) {
+#if CYTHON_USE_TYPE_SPECS || !(CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API) || defined(PYSTON_MAJOR_VERSION)
+ (void)__Pyx_PyObject_CallMethod0;
+#if CYTHON_USE_TYPE_SPECS
+ (void)__Pyx_validate_bases_tuple;
+#endif
+ return PyType_Ready(t);
+#else
+ int r;
+ PyObject *bases = __Pyx_PyType_GetSlot(t, tp_bases, PyObject*);
+ if (bases && unlikely(__Pyx_validate_bases_tuple(t->tp_name, t->tp_dictoffset, bases) == -1))
+ return -1;
+#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION)
+ {
+ int gc_was_enabled;
+ #if PY_VERSION_HEX >= 0x030A00b1
+ gc_was_enabled = PyGC_Disable();
+ (void)__Pyx_PyObject_CallMethod0;
+ #else
+ PyObject *ret, *py_status;
+ PyObject *gc = NULL;
+ #if PY_VERSION_HEX >= 0x030700a1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM+0 >= 0x07030400)
+ gc = PyImport_GetModule(__pyx_kp_u_gc);
+ #endif
+ if (unlikely(!gc)) gc = PyImport_Import(__pyx_kp_u_gc);
+ if (unlikely(!gc)) return -1;
+ py_status = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_isenabled);
+ if (unlikely(!py_status)) {
+ Py_DECREF(gc);
+ return -1;
+ }
+ gc_was_enabled = __Pyx_PyObject_IsTrue(py_status);
+ Py_DECREF(py_status);
+ if (gc_was_enabled > 0) {
+ ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_disable);
+ if (unlikely(!ret)) {
+ Py_DECREF(gc);
+ return -1;
+ }
+ Py_DECREF(ret);
+ } else if (unlikely(gc_was_enabled == -1)) {
+ Py_DECREF(gc);
+ return -1;
+ }
+ #endif
+ t->tp_flags |= Py_TPFLAGS_HEAPTYPE;
+#if PY_VERSION_HEX >= 0x030A0000
+ t->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE;
+#endif
+#else
+ (void)__Pyx_PyObject_CallMethod0;
+#endif
+ r = PyType_Ready(t);
+#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION)
+ t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE;
+ #if PY_VERSION_HEX >= 0x030A00b1
+ if (gc_was_enabled)
+ PyGC_Enable();
+ #else
+ if (gc_was_enabled) {
+ PyObject *tp, *v, *tb;
+ PyErr_Fetch(&tp, &v, &tb);
+ ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_enable);
+ if (likely(ret || r == -1)) {
+ Py_XDECREF(ret);
+ PyErr_Restore(tp, v, tb);
+ } else {
+ Py_XDECREF(tp);
+ Py_XDECREF(v);
+ Py_XDECREF(tb);
+ r = -1;
+ }
+ }
+ Py_DECREF(gc);
+ #endif
+ }
+#endif
+ return r;
+#endif
+}
+
+/* PyObject_GenericGetAttrNoDict */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) {
+ __Pyx_TypeName type_name = __Pyx_PyType_GetName(tp);
+ PyErr_Format(PyExc_AttributeError,
+#if PY_MAJOR_VERSION >= 3
+ "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'",
+ type_name, attr_name);
+#else
+ "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'",
+ type_name, PyString_AS_STRING(attr_name));
+#endif
+ __Pyx_DECREF_TypeName(type_name);
+ return NULL;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) {
+ PyObject *descr;
+ PyTypeObject *tp = Py_TYPE(obj);
+ if (unlikely(!PyString_Check(attr_name))) {
+ return PyObject_GenericGetAttr(obj, attr_name);
+ }
+ assert(!tp->tp_dictoffset);
+ descr = _PyType_Lookup(tp, attr_name);
+ if (unlikely(!descr)) {
+ return __Pyx_RaiseGenericGetAttributeError(tp, attr_name);
+ }
+ Py_INCREF(descr);
+ #if PY_MAJOR_VERSION < 3
+ if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS)))
+ #endif
+ {
+ descrgetfunc f = Py_TYPE(descr)->tp_descr_get;
+ if (unlikely(f)) {
+ PyObject *res = f(descr, obj, (PyObject *)tp);
+ Py_DECREF(descr);
+ return res;
+ }
+ }
+ return descr;
+}
+#endif
+
+/* PyObject_GenericGetAttr */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) {
+ if (unlikely(Py_TYPE(obj)->tp_dictoffset)) {
+ return PyObject_GenericGetAttr(obj, attr_name);
+ }
+ return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name);
+}
+#endif
+
+/* SetVTable */
+static int __Pyx_SetVtable(PyTypeObject *type, void *vtable) {
+ PyObject *ob = PyCapsule_New(vtable, 0, 0);
+ if (unlikely(!ob))
+ goto bad;
+#if CYTHON_COMPILING_IN_LIMITED_API
+ if (unlikely(PyObject_SetAttr((PyObject *) type, __pyx_n_s_pyx_vtable, ob) < 0))
+#else
+ if (unlikely(PyDict_SetItem(type->tp_dict, __pyx_n_s_pyx_vtable, ob) < 0))
+#endif
+ goto bad;
+ Py_DECREF(ob);
+ return 0;
+bad:
+ Py_XDECREF(ob);
+ return -1;
+}
+
+/* GetVTable */
+static void* __Pyx_GetVtable(PyTypeObject *type) {
+ void* ptr;
+#if CYTHON_COMPILING_IN_LIMITED_API
+ PyObject *ob = PyObject_GetAttr((PyObject *)type, __pyx_n_s_pyx_vtable);
+#else
+ PyObject *ob = PyObject_GetItem(type->tp_dict, __pyx_n_s_pyx_vtable);
+#endif
+ if (!ob)
+ goto bad;
+ ptr = PyCapsule_GetPointer(ob, 0);
+ if (!ptr && !PyErr_Occurred())
+ PyErr_SetString(PyExc_RuntimeError, "invalid vtable found for imported type");
+ Py_DECREF(ob);
+ return ptr;
+bad:
+ Py_XDECREF(ob);
+ return NULL;
+}
+
+/* MergeVTables */
+#if !CYTHON_COMPILING_IN_LIMITED_API
+static int __Pyx_MergeVtables(PyTypeObject *type) {
+ int i;
+ void** base_vtables;
+ __Pyx_TypeName tp_base_name;
+ __Pyx_TypeName base_name;
+ void* unknown = (void*)-1;
+ PyObject* bases = type->tp_bases;
+ int base_depth = 0;
+ {
+ PyTypeObject* base = type->tp_base;
+ while (base) {
+ base_depth += 1;
+ base = base->tp_base;
+ }
+ }
+ base_vtables = (void**) malloc(sizeof(void*) * (size_t)(base_depth + 1));
+ base_vtables[0] = unknown;
+ for (i = 1; i < PyTuple_GET_SIZE(bases); i++) {
+ void* base_vtable = __Pyx_GetVtable(((PyTypeObject*)PyTuple_GET_ITEM(bases, i)));
+ if (base_vtable != NULL) {
+ int j;
+ PyTypeObject* base = type->tp_base;
+ for (j = 0; j < base_depth; j++) {
+ if (base_vtables[j] == unknown) {
+ base_vtables[j] = __Pyx_GetVtable(base);
+ base_vtables[j + 1] = unknown;
+ }
+ if (base_vtables[j] == base_vtable) {
+ break;
+ } else if (base_vtables[j] == NULL) {
+ goto bad;
+ }
+ base = base->tp_base;
+ }
+ }
+ }
+ PyErr_Clear();
+ free(base_vtables);
+ return 0;
+bad:
+ tp_base_name = __Pyx_PyType_GetName(type->tp_base);
+ base_name = __Pyx_PyType_GetName((PyTypeObject*)PyTuple_GET_ITEM(bases, i));
+ PyErr_Format(PyExc_TypeError,
+ "multiple bases have vtable conflict: '" __Pyx_FMT_TYPENAME "' and '" __Pyx_FMT_TYPENAME "'", tp_base_name, base_name);
+ __Pyx_DECREF_TypeName(tp_base_name);
+ __Pyx_DECREF_TypeName(base_name);
+ free(base_vtables);
+ return -1;
+}
+#endif
+
+/* SetupReduce */
+#if !CYTHON_COMPILING_IN_LIMITED_API
+static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) {
+ int ret;
+ PyObject *name_attr;
+ name_attr = __Pyx_PyObject_GetAttrStrNoError(meth, __pyx_n_s_name);
+ if (likely(name_attr)) {
+ ret = PyObject_RichCompareBool(name_attr, name, Py_EQ);
+ } else {
+ ret = -1;
+ }
+ if (unlikely(ret < 0)) {
+ PyErr_Clear();
+ ret = 0;
+ }
+ Py_XDECREF(name_attr);
+ return ret;
+}
+static int __Pyx_setup_reduce(PyObject* type_obj) {
+ int ret = 0;
+ PyObject *object_reduce = NULL;
+ PyObject *object_getstate = NULL;
+ PyObject *object_reduce_ex = NULL;
+ PyObject *reduce = NULL;
+ PyObject *reduce_ex = NULL;
+ PyObject *reduce_cython = NULL;
+ PyObject *setstate = NULL;
+ PyObject *setstate_cython = NULL;
+ PyObject *getstate = NULL;
+#if CYTHON_USE_PYTYPE_LOOKUP
+ getstate = _PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate);
+#else
+ getstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_getstate);
+ if (!getstate && PyErr_Occurred()) {
+ goto __PYX_BAD;
+ }
+#endif
+ if (getstate) {
+#if CYTHON_USE_PYTYPE_LOOKUP
+ object_getstate = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_getstate);
+#else
+ object_getstate = __Pyx_PyObject_GetAttrStrNoError((PyObject*)&PyBaseObject_Type, __pyx_n_s_getstate);
+ if (!object_getstate && PyErr_Occurred()) {
+ goto __PYX_BAD;
+ }
+#endif
+ if (object_getstate != getstate) {
+ goto __PYX_GOOD;
+ }
+ }
+#if CYTHON_USE_PYTYPE_LOOKUP
+ object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD;
+#else
+ object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD;
+#endif
+ reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD;
+ if (reduce_ex == object_reduce_ex) {
+#if CYTHON_USE_PYTYPE_LOOKUP
+ object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD;
+#else
+ object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD;
+#endif
+ reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD;
+ if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) {
+ reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython);
+ if (likely(reduce_cython)) {
+ ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+ ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+ } else if (reduce == object_reduce || PyErr_Occurred()) {
+ goto __PYX_BAD;
+ }
+ setstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate);
+ if (!setstate) PyErr_Clear();
+ if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) {
+ setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython);
+ if (likely(setstate_cython)) {
+ ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+ ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+ } else if (!setstate || PyErr_Occurred()) {
+ goto __PYX_BAD;
+ }
+ }
+ PyType_Modified((PyTypeObject*)type_obj);
+ }
+ }
+ goto __PYX_GOOD;
+__PYX_BAD:
+ if (!PyErr_Occurred()) {
+ __Pyx_TypeName type_obj_name =
+ __Pyx_PyType_GetName((PyTypeObject*)type_obj);
+ PyErr_Format(PyExc_RuntimeError,
+ "Unable to initialize pickling for " __Pyx_FMT_TYPENAME, type_obj_name);
+ __Pyx_DECREF_TypeName(type_obj_name);
+ }
+ ret = -1;
+__PYX_GOOD:
+#if !CYTHON_USE_PYTYPE_LOOKUP
+ Py_XDECREF(object_reduce);
+ Py_XDECREF(object_reduce_ex);
+ Py_XDECREF(object_getstate);
+ Py_XDECREF(getstate);
+#endif
+ Py_XDECREF(reduce);
+ Py_XDECREF(reduce_ex);
+ Py_XDECREF(reduce_cython);
+ Py_XDECREF(setstate);
+ Py_XDECREF(setstate_cython);
+ return ret;
+}
+#endif
+
+/* Import */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
+ PyObject *module = 0;
+ PyObject *empty_dict = 0;
+ PyObject *empty_list = 0;
+ #if PY_MAJOR_VERSION < 3
+ PyObject *py_import;
+ py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
+ if (unlikely(!py_import))
+ goto bad;
+ if (!from_list) {
+ empty_list = PyList_New(0);
+ if (unlikely(!empty_list))
+ goto bad;
+ from_list = empty_list;
+ }
+ #endif
+ empty_dict = PyDict_New();
+ if (unlikely(!empty_dict))
+ goto bad;
+ {
+ #if PY_MAJOR_VERSION >= 3
+ if (level == -1) {
+ if (strchr(__Pyx_MODULE_NAME, '.') != NULL) {
+ module = PyImport_ImportModuleLevelObject(
+ name, __pyx_d, empty_dict, from_list, 1);
+ if (unlikely(!module)) {
+ if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError)))
+ goto bad;
+ PyErr_Clear();
+ }
+ }
+ level = 0;
+ }
+ #endif
+ if (!module) {
+ #if PY_MAJOR_VERSION < 3
+ PyObject *py_level = PyInt_FromLong(level);
+ if (unlikely(!py_level))
+ goto bad;
+ module = PyObject_CallFunctionObjArgs(py_import,
+ name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL);
+ Py_DECREF(py_level);
+ #else
+ module = PyImport_ImportModuleLevelObject(
+ name, __pyx_d, empty_dict, from_list, level);
+ #endif
+ }
+ }
+bad:
+ Py_XDECREF(empty_dict);
+ Py_XDECREF(empty_list);
+ #if PY_MAJOR_VERSION < 3
+ Py_XDECREF(py_import);
+ #endif
+ return module;
+}
+
+/* ImportDottedModule */
+#if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) {
+ PyObject *partial_name = NULL, *slice = NULL, *sep = NULL;
+ if (unlikely(PyErr_Occurred())) {
+ PyErr_Clear();
+ }
+ if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) {
+ partial_name = name;
+ } else {
+ slice = PySequence_GetSlice(parts_tuple, 0, count);
+ if (unlikely(!slice))
+ goto bad;
+ sep = PyUnicode_FromStringAndSize(".", 1);
+ if (unlikely(!sep))
+ goto bad;
+ partial_name = PyUnicode_Join(sep, slice);
+ }
+ PyErr_Format(
+#if PY_MAJOR_VERSION < 3
+ PyExc_ImportError,
+ "No module named '%s'", PyString_AS_STRING(partial_name));
+#else
+#if PY_VERSION_HEX >= 0x030600B1
+ PyExc_ModuleNotFoundError,
+#else
+ PyExc_ImportError,
+#endif
+ "No module named '%U'", partial_name);
+#endif
+bad:
+ Py_XDECREF(sep);
+ Py_XDECREF(slice);
+ Py_XDECREF(partial_name);
+ return NULL;
+}
+#endif
+#if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) {
+ PyObject *imported_module;
+#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400)
+ PyObject *modules = PyImport_GetModuleDict();
+ if (unlikely(!modules))
+ return NULL;
+ imported_module = __Pyx_PyDict_GetItemStr(modules, name);
+ Py_XINCREF(imported_module);
+#else
+ imported_module = PyImport_GetModule(name);
+#endif
+ return imported_module;
+}
+#endif
+#if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) {
+ Py_ssize_t i, nparts;
+ nparts = PyTuple_GET_SIZE(parts_tuple);
+ for (i=1; i < nparts && module; i++) {
+ PyObject *part, *submodule;
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+ part = PyTuple_GET_ITEM(parts_tuple, i);
+#else
+ part = PySequence_ITEM(parts_tuple, i);
+#endif
+ submodule = __Pyx_PyObject_GetAttrStrNoError(module, part);
+#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
+ Py_DECREF(part);
+#endif
+ Py_DECREF(module);
+ module = submodule;
+ }
+ if (unlikely(!module)) {
+ return __Pyx__ImportDottedModule_Error(name, parts_tuple, i);
+ }
+ return module;
+}
+#endif
+static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) {
+#if PY_MAJOR_VERSION < 3
+ PyObject *module, *from_list, *star = __pyx_n_s__171;
+ CYTHON_UNUSED_VAR(parts_tuple);
+ from_list = PyList_New(1);
+ if (unlikely(!from_list))
+ return NULL;
+ Py_INCREF(star);
+ PyList_SET_ITEM(from_list, 0, star);
+ module = __Pyx_Import(name, from_list, 0);
+ Py_DECREF(from_list);
+ return module;
+#else
+ PyObject *imported_module;
+ PyObject *module = __Pyx_Import(name, NULL, 0);
+ if (!parts_tuple || unlikely(!module))
+ return module;
+ imported_module = __Pyx__ImportDottedModule_Lookup(name);
+ if (likely(imported_module)) {
+ Py_DECREF(module);
+ return imported_module;
+ }
+ PyErr_Clear();
+ return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple);
+#endif
+}
+static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) {
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1
+ PyObject *module = __Pyx__ImportDottedModule_Lookup(name);
+ if (likely(module)) {
+ PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec);
+ if (likely(spec)) {
+ PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing);
+ if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) {
+ Py_DECREF(spec);
+ spec = NULL;
+ }
+ Py_XDECREF(unsafe);
+ }
+ if (likely(!spec)) {
+ PyErr_Clear();
+ return module;
+ }
+ Py_DECREF(spec);
+ Py_DECREF(module);
+ } else if (PyErr_Occurred()) {
+ PyErr_Clear();
+ }
+#endif
+ return __Pyx__ImportDottedModule(name, parts_tuple);
+}
+
+/* ImportFrom */
+static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
+ PyObject* value = __Pyx_PyObject_GetAttrStr(module, name);
+ if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ const char* module_name_str = 0;
+ PyObject* module_name = 0;
+ PyObject* module_dot = 0;
+ PyObject* full_name = 0;
+ PyErr_Clear();
+ module_name_str = PyModule_GetName(module);
+ if (unlikely(!module_name_str)) { goto modbad; }
+ module_name = PyUnicode_FromString(module_name_str);
+ if (unlikely(!module_name)) { goto modbad; }
+ module_dot = PyUnicode_Concat(module_name, __pyx_kp_u__140);
+ if (unlikely(!module_dot)) { goto modbad; }
+ full_name = PyUnicode_Concat(module_dot, name);
+ if (unlikely(!full_name)) { goto modbad; }
+ #if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400)
+ {
+ PyObject *modules = PyImport_GetModuleDict();
+ if (unlikely(!modules))
+ goto modbad;
+ value = PyObject_GetItem(modules, full_name);
+ }
+ #else
+ value = PyImport_GetModule(full_name);
+ #endif
+ modbad:
+ Py_XDECREF(full_name);
+ Py_XDECREF(module_dot);
+ Py_XDECREF(module_name);
+ }
+ if (unlikely(!value)) {
+ PyErr_Format(PyExc_ImportError,
+ #if PY_MAJOR_VERSION < 3
+ "cannot import name %.230s", PyString_AS_STRING(name));
+ #else
+ "cannot import name %S", name);
+ #endif
+ }
+ return value;
+}
+
+/* CLineInTraceback */
+#ifndef CYTHON_CLINE_IN_TRACEBACK
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) {
+ PyObject *use_cline;
+ PyObject *ptype, *pvalue, *ptraceback;
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyObject **cython_runtime_dict;
+#endif
+ CYTHON_MAYBE_UNUSED_VAR(tstate);
+ if (unlikely(!__pyx_cython_runtime)) {
+ return c_line;
+ }
+ __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
+#if CYTHON_COMPILING_IN_CPYTHON
+ cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime);
+ if (likely(cython_runtime_dict)) {
+ __PYX_PY_DICT_LOOKUP_IF_MODIFIED(
+ use_cline, *cython_runtime_dict,
+ __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback))
+ } else
+#endif
+ {
+ PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback);
+ if (use_cline_obj) {
+ use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True;
+ Py_DECREF(use_cline_obj);
+ } else {
+ PyErr_Clear();
+ use_cline = NULL;
+ }
+ }
+ if (!use_cline) {
+ c_line = 0;
+ (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False);
+ }
+ else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) {
+ c_line = 0;
+ }
+ __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
+ return c_line;
+}
+#endif
+
+/* CodeObjectCache */
+#if !CYTHON_COMPILING_IN_LIMITED_API
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+ int start = 0, mid = 0, end = count - 1;
+ if (end >= 0 && code_line > entries[end].code_line) {
+ return count;
+ }
+ while (start < end) {
+ mid = start + (end - start) / 2;
+ if (code_line < entries[mid].code_line) {
+ end = mid;
+ } else if (code_line > entries[mid].code_line) {
+ start = mid + 1;
+ } else {
+ return mid;
+ }
+ }
+ if (code_line <= entries[mid].code_line) {
+ return mid;
+ } else {
+ return mid + 1;
+ }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+ PyCodeObject* code_object;
+ int pos;
+ if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+ return NULL;
+ }
+ pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+ if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+ return NULL;
+ }
+ code_object = __pyx_code_cache.entries[pos].code_object;
+ Py_INCREF(code_object);
+ return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+ int pos, i;
+ __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+ if (unlikely(!code_line)) {
+ return;
+ }
+ if (unlikely(!entries)) {
+ entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+ if (likely(entries)) {
+ __pyx_code_cache.entries = entries;
+ __pyx_code_cache.max_count = 64;
+ __pyx_code_cache.count = 1;
+ entries[0].code_line = code_line;
+ entries[0].code_object = code_object;
+ Py_INCREF(code_object);
+ }
+ return;
+ }
+ pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+ if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+ PyCodeObject* tmp = entries[pos].code_object;
+ entries[pos].code_object = code_object;
+ Py_DECREF(tmp);
+ return;
+ }
+ if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+ int new_max = __pyx_code_cache.max_count + 64;
+ entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+ __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
+ if (unlikely(!entries)) {
+ return;
+ }
+ __pyx_code_cache.entries = entries;
+ __pyx_code_cache.max_count = new_max;
+ }
+ for (i=__pyx_code_cache.count; i>pos; i--) {
+ entries[i] = entries[i-1];
+ }
+ entries[pos].code_line = code_line;
+ entries[pos].code_object = code_object;
+ __pyx_code_cache.count++;
+ Py_INCREF(code_object);
+}
+#endif
+
+/* AddTraceback */
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API && !defined(PYPY_VERSION)
+ #ifndef Py_BUILD_CORE
+ #define Py_BUILD_CORE 1
+ #endif
+ #include "internal/pycore_frame.h"
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict,
+ PyObject *firstlineno, PyObject *name) {
+ PyObject *replace = NULL;
+ if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL;
+ if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL;
+ replace = PyObject_GetAttrString(code, "replace");
+ if (likely(replace)) {
+ PyObject *result;
+ result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict);
+ Py_DECREF(replace);
+ return result;
+ }
+ PyErr_Clear();
+ #if __PYX_LIMITED_VERSION_HEX < 0x030780000
+ {
+ PyObject *compiled = NULL, *result = NULL;
+ if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL;
+ if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL;
+ compiled = Py_CompileString(
+ "out = type(code)(\n"
+ " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n"
+ " code.co_flags, code.co_code, code.co_consts, code.co_names,\n"
+ " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n"
+ " code.co_lnotab)\n", "", Py_file_input);
+ if (!compiled) return NULL;
+ result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict);
+ Py_DECREF(compiled);
+ if (!result) PyErr_Print();
+ Py_DECREF(result);
+ result = PyDict_GetItemString(scratch_dict, "out");
+ if (result) Py_INCREF(result);
+ return result;
+ }
+ #else
+ return NULL;
+ #endif
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename) {
+ PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL;
+ PyObject *replace = NULL, *getframe = NULL, *frame = NULL;
+ PyObject *exc_type, *exc_value, *exc_traceback;
+ int success = 0;
+ if (c_line) {
+ (void) __pyx_cfilenm;
+ (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line);
+ }
+ PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
+ code_object = Py_CompileString("_getframe()", filename, Py_eval_input);
+ if (unlikely(!code_object)) goto bad;
+ py_py_line = PyLong_FromLong(py_line);
+ if (unlikely(!py_py_line)) goto bad;
+ py_funcname = PyUnicode_FromString(funcname);
+ if (unlikely(!py_funcname)) goto bad;
+ dict = PyDict_New();
+ if (unlikely(!dict)) goto bad;
+ {
+ PyObject *old_code_object = code_object;
+ code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname);
+ Py_DECREF(old_code_object);
+ }
+ if (unlikely(!code_object)) goto bad;
+ getframe = PySys_GetObject("_getframe");
+ if (unlikely(!getframe)) goto bad;
+ if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad;
+ frame = PyEval_EvalCode(code_object, dict, dict);
+ if (unlikely(!frame) || frame == Py_None) goto bad;
+ success = 1;
+ bad:
+ PyErr_Restore(exc_type, exc_value, exc_traceback);
+ Py_XDECREF(code_object);
+ Py_XDECREF(py_py_line);
+ Py_XDECREF(py_funcname);
+ Py_XDECREF(dict);
+ Py_XDECREF(replace);
+ if (success) {
+ PyTraceBack_Here(
+ (struct _frame*)frame);
+ }
+ Py_XDECREF(frame);
+}
+#else
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+ const char *funcname, int c_line,
+ int py_line, const char *filename) {
+ PyCodeObject *py_code = NULL;
+ PyObject *py_funcname = NULL;
+ #if PY_MAJOR_VERSION < 3
+ PyObject *py_srcfile = NULL;
+ py_srcfile = PyString_FromString(filename);
+ if (!py_srcfile) goto bad;
+ #endif
+ if (c_line) {
+ #if PY_MAJOR_VERSION < 3
+ py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+ if (!py_funcname) goto bad;
+ #else
+ py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+ if (!py_funcname) goto bad;
+ funcname = PyUnicode_AsUTF8(py_funcname);
+ if (!funcname) goto bad;
+ #endif
+ }
+ else {
+ #if PY_MAJOR_VERSION < 3
+ py_funcname = PyString_FromString(funcname);
+ if (!py_funcname) goto bad;
+ #endif
+ }
+ #if PY_MAJOR_VERSION < 3
+ py_code = __Pyx_PyCode_New(
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ __pyx_empty_bytes, /*PyObject *code,*/
+ __pyx_empty_tuple, /*PyObject *consts,*/
+ __pyx_empty_tuple, /*PyObject *names,*/
+ __pyx_empty_tuple, /*PyObject *varnames,*/
+ __pyx_empty_tuple, /*PyObject *freevars,*/
+ __pyx_empty_tuple, /*PyObject *cellvars,*/
+ py_srcfile, /*PyObject *filename,*/
+ py_funcname, /*PyObject *name,*/
+ py_line,
+ __pyx_empty_bytes /*PyObject *lnotab*/
+ );
+ Py_DECREF(py_srcfile);
+ #else
+ py_code = PyCode_NewEmpty(filename, funcname, py_line);
+ #endif
+ Py_XDECREF(py_funcname);
+ return py_code;
+bad:
+ Py_XDECREF(py_funcname);
+ #if PY_MAJOR_VERSION < 3
+ Py_XDECREF(py_srcfile);
+ #endif
+ return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename) {
+ PyCodeObject *py_code = 0;
+ PyFrameObject *py_frame = 0;
+ PyThreadState *tstate = __Pyx_PyThreadState_Current;
+ PyObject *ptype, *pvalue, *ptraceback;
+ if (c_line) {
+ c_line = __Pyx_CLineForTraceback(tstate, c_line);
+ }
+ py_code = __pyx_find_code_object(c_line ? -c_line : py_line);
+ if (!py_code) {
+ __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
+ py_code = __Pyx_CreateCodeObjectForTraceback(
+ funcname, c_line, py_line, filename);
+ if (!py_code) {
+ /* If the code object creation fails, then we should clear the
+ fetched exception references and propagate the new exception */
+ Py_XDECREF(ptype);
+ Py_XDECREF(pvalue);
+ Py_XDECREF(ptraceback);
+ goto bad;
+ }
+ __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
+ __pyx_insert_code_object(c_line ? -c_line : py_line, py_code);
+ }
+ py_frame = PyFrame_New(
+ tstate, /*PyThreadState *tstate,*/
+ py_code, /*PyCodeObject *code,*/
+ __pyx_d, /*PyObject *globals,*/
+ 0 /*PyObject *locals*/
+ );
+ if (!py_frame) goto bad;
+ __Pyx_PyFrame_SetLineNumber(py_frame, py_line);
+ PyTraceBack_Here(py_frame);
+bad:
+ Py_XDECREF(py_code);
+ Py_XDECREF(py_frame);
+}
+#endif
+
+/* CIntFromPyVerify */
+#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\
+ __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
+#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\
+ __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
+#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\
+ {\
+ func_type value = func_value;\
+ if (sizeof(target_type) < sizeof(func_type)) {\
+ if (unlikely(value != (func_type) (target_type) value)) {\
+ func_type zero = 0;\
+ if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\
+ return (target_type) -1;\
+ if (is_unsigned && unlikely(value < zero))\
+ goto raise_neg_overflow;\
+ else\
+ goto raise_overflow;\
+ }\
+ }\
+ return (target_type) value;\
+ }
+
+/* CIntToPy */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+ const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+ const int is_unsigned = neg_one > const_zero;
+ if (is_unsigned) {
+ if (sizeof(long) < sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(long) <= sizeof(unsigned long)) {
+ return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+ } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+ return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+ }
+ } else {
+ if (sizeof(long) <= sizeof(long)) {
+ return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+ } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+ return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+ }
+ }
+ {
+ unsigned char *bytes = (unsigned char *)&value;
+#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4
+ if (is_unsigned) {
+ return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1);
+ } else {
+ return PyLong_FromNativeBytes(bytes, sizeof(value), -1);
+ }
+#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ return _PyLong_FromByteArray(bytes, sizeof(long),
+ little, !is_unsigned);
+#else
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ PyObject *from_bytes, *result = NULL;
+ PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL;
+ from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes");
+ if (!from_bytes) return NULL;
+ py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long));
+ if (!py_bytes) goto limited_bad;
+ order_str = PyUnicode_FromString(little ? "little" : "big");
+ if (!order_str) goto limited_bad;
+ arg_tuple = PyTuple_Pack(2, py_bytes, order_str);
+ if (!arg_tuple) goto limited_bad;
+ if (!is_unsigned) {
+ kwds = PyDict_New();
+ if (!kwds) goto limited_bad;
+ if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad;
+ }
+ result = PyObject_Call(from_bytes, arg_tuple, kwds);
+ limited_bad:
+ Py_XDECREF(kwds);
+ Py_XDECREF(arg_tuple);
+ Py_XDECREF(order_str);
+ Py_XDECREF(py_bytes);
+ Py_XDECREF(from_bytes);
+ return result;
+#endif
+ }
+}
+
+/* CIntFromPy */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+ const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+ const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+ if (likely(PyInt_Check(x))) {
+ if ((sizeof(long) < sizeof(long))) {
+ __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
+ } else {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ goto raise_neg_overflow;
+ }
+ return (long) val;
+ }
+ }
+#endif
+ if (unlikely(!PyLong_Check(x))) {
+ long val;
+ PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+ if (!tmp) return (long) -1;
+ val = __Pyx_PyInt_As_long(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+ if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+ if (unlikely(__Pyx_PyLong_IsNeg(x))) {
+ goto raise_neg_overflow;
+ } else if (__Pyx_PyLong_IsCompact(x)) {
+ __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x))
+ } else {
+ const digit* digits = __Pyx_PyLong_Digits(x);
+ assert(__Pyx_PyLong_DigitCount(x) > 1);
+ switch (__Pyx_PyLong_DigitCount(x)) {
+ case 2:
+ if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) {
+ return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+ }
+ }
+ break;
+ case 3:
+ if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) {
+ return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+ }
+ }
+ break;
+ case 4:
+ if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) {
+ return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+ }
+ }
+ break;
+ }
+ }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7
+ if (unlikely(Py_SIZE(x) < 0)) {
+ goto raise_neg_overflow;
+ }
+#else
+ {
+ int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+ if (unlikely(result < 0))
+ return (long) -1;
+ if (unlikely(result == 1))
+ goto raise_neg_overflow;
+ }
+#endif
+ if ((sizeof(long) <= sizeof(unsigned long))) {
+ __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+ } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) {
+ __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+ }
+ } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+ if (__Pyx_PyLong_IsCompact(x)) {
+ __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x))
+ } else {
+ const digit* digits = __Pyx_PyLong_Digits(x);
+ assert(__Pyx_PyLong_DigitCount(x) > 1);
+ switch (__Pyx_PyLong_SignedDigitCount(x)) {
+ case -2:
+ if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) {
+ return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+ }
+ }
+ break;
+ case 2:
+ if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) {
+ return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+ }
+ }
+ break;
+ case -3:
+ if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) {
+ return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+ }
+ }
+ break;
+ case 3:
+ if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) {
+ return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+ }
+ }
+ break;
+ case -4:
+ if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) {
+ return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+ }
+ }
+ break;
+ case 4:
+ if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) {
+ return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+ }
+ }
+ break;
+ }
+ }
+#endif
+ if ((sizeof(long) <= sizeof(long))) {
+ __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+ } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) {
+ __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+ }
+ }
+ {
+ long val;
+ int ret = -1;
+#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API
+ Py_ssize_t bytes_copied = PyLong_AsNativeBytes(
+ x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0));
+ if (unlikely(bytes_copied == -1)) {
+ } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) {
+ goto raise_overflow;
+ } else {
+ ret = 0;
+ }
+#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray)
+ int one = 1; int is_little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&val;
+ ret = _PyLong_AsByteArray((PyLongObject *)x,
+ bytes, sizeof(val),
+ is_little, !is_unsigned);
+#else
+ PyObject *v;
+ PyObject *stepval = NULL, *mask = NULL, *shift = NULL;
+ int bits, remaining_bits, is_negative = 0;
+ int chunk_size = (sizeof(long) < 8) ? 30 : 62;
+ if (likely(PyLong_CheckExact(x))) {
+ v = __Pyx_NewRef(x);
+ } else {
+ v = PyNumber_Long(x);
+ if (unlikely(!v)) return (long) -1;
+ assert(PyLong_CheckExact(v));
+ }
+ {
+ int result = PyObject_RichCompareBool(v, Py_False, Py_LT);
+ if (unlikely(result < 0)) {
+ Py_DECREF(v);
+ return (long) -1;
+ }
+ is_negative = result == 1;
+ }
+ if (is_unsigned && unlikely(is_negative)) {
+ Py_DECREF(v);
+ goto raise_neg_overflow;
+ } else if (is_negative) {
+ stepval = PyNumber_Invert(v);
+ Py_DECREF(v);
+ if (unlikely(!stepval))
+ return (long) -1;
+ } else {
+ stepval = v;
+ }
+ v = NULL;
+ val = (long) 0;
+ mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done;
+ shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done;
+ for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) {
+ PyObject *tmp, *digit;
+ long idigit;
+ digit = PyNumber_And(stepval, mask);
+ if (unlikely(!digit)) goto done;
+ idigit = PyLong_AsLong(digit);
+ Py_DECREF(digit);
+ if (unlikely(idigit < 0)) goto done;
+ val |= ((long) idigit) << bits;
+ tmp = PyNumber_Rshift(stepval, shift);
+ if (unlikely(!tmp)) goto done;
+ Py_DECREF(stepval); stepval = tmp;
+ }
+ Py_DECREF(shift); shift = NULL;
+ Py_DECREF(mask); mask = NULL;
+ {
+ long idigit = PyLong_AsLong(stepval);
+ if (unlikely(idigit < 0)) goto done;
+ remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1);
+ if (unlikely(idigit >= (1L << remaining_bits)))
+ goto raise_overflow;
+ val |= ((long) idigit) << bits;
+ }
+ if (!is_unsigned) {
+ if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1))))
+ goto raise_overflow;
+ if (is_negative)
+ val = ~val;
+ }
+ ret = 0;
+ done:
+ Py_XDECREF(shift);
+ Py_XDECREF(mask);
+ Py_XDECREF(stepval);
+#endif
+ if (unlikely(ret))
+ return (long) -1;
+ return val;
+ }
+raise_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "value too large to convert to long");
+ return (long) -1;
+raise_neg_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to long");
+ return (long) -1;
+}
+
+/* CIntFromPy */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+ const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+ const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+ if (likely(PyInt_Check(x))) {
+ if ((sizeof(int) < sizeof(long))) {
+ __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
+ } else {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ goto raise_neg_overflow;
+ }
+ return (int) val;
+ }
+ }
+#endif
+ if (unlikely(!PyLong_Check(x))) {
+ int val;
+ PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+ if (!tmp) return (int) -1;
+ val = __Pyx_PyInt_As_int(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+ if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+ if (unlikely(__Pyx_PyLong_IsNeg(x))) {
+ goto raise_neg_overflow;
+ } else if (__Pyx_PyLong_IsCompact(x)) {
+ __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x))
+ } else {
+ const digit* digits = __Pyx_PyLong_Digits(x);
+ assert(__Pyx_PyLong_DigitCount(x) > 1);
+ switch (__Pyx_PyLong_DigitCount(x)) {
+ case 2:
+ if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) {
+ return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+ }
+ }
+ break;
+ case 3:
+ if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) {
+ return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+ }
+ }
+ break;
+ case 4:
+ if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) {
+ return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+ }
+ }
+ break;
+ }
+ }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7
+ if (unlikely(Py_SIZE(x) < 0)) {
+ goto raise_neg_overflow;
+ }
+#else
+ {
+ int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+ if (unlikely(result < 0))
+ return (int) -1;
+ if (unlikely(result == 1))
+ goto raise_neg_overflow;
+ }
+#endif
+ if ((sizeof(int) <= sizeof(unsigned long))) {
+ __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+ } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) {
+ __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+ }
+ } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+ if (__Pyx_PyLong_IsCompact(x)) {
+ __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x))
+ } else {
+ const digit* digits = __Pyx_PyLong_Digits(x);
+ assert(__Pyx_PyLong_DigitCount(x) > 1);
+ switch (__Pyx_PyLong_SignedDigitCount(x)) {
+ case -2:
+ if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) {
+ return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+ }
+ }
+ break;
+ case 2:
+ if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) {
+ return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+ }
+ }
+ break;
+ case -3:
+ if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) {
+ return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+ }
+ }
+ break;
+ case 3:
+ if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) {
+ return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+ }
+ }
+ break;
+ case -4:
+ if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) {
+ return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+ }
+ }
+ break;
+ case 4:
+ if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) {
+ if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+ } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) {
+ return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+ }
+ }
+ break;
+ }
+ }
+#endif
+ if ((sizeof(int) <= sizeof(long))) {
+ __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+ } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) {
+ __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+ }
+ }
+ {
+ int val;
+ int ret = -1;
+#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API
+ Py_ssize_t bytes_copied = PyLong_AsNativeBytes(
+ x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0));
+ if (unlikely(bytes_copied == -1)) {
+ } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) {
+ goto raise_overflow;
+ } else {
+ ret = 0;
+ }
+#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray)
+ int one = 1; int is_little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&val;
+ ret = _PyLong_AsByteArray((PyLongObject *)x,
+ bytes, sizeof(val),
+ is_little, !is_unsigned);
+#else
+ PyObject *v;
+ PyObject *stepval = NULL, *mask = NULL, *shift = NULL;
+ int bits, remaining_bits, is_negative = 0;
+ int chunk_size = (sizeof(long) < 8) ? 30 : 62;
+ if (likely(PyLong_CheckExact(x))) {
+ v = __Pyx_NewRef(x);
+ } else {
+ v = PyNumber_Long(x);
+ if (unlikely(!v)) return (int) -1;
+ assert(PyLong_CheckExact(v));
+ }
+ {
+ int result = PyObject_RichCompareBool(v, Py_False, Py_LT);
+ if (unlikely(result < 0)) {
+ Py_DECREF(v);
+ return (int) -1;
+ }
+ is_negative = result == 1;
+ }
+ if (is_unsigned && unlikely(is_negative)) {
+ Py_DECREF(v);
+ goto raise_neg_overflow;
+ } else if (is_negative) {
+ stepval = PyNumber_Invert(v);
+ Py_DECREF(v);
+ if (unlikely(!stepval))
+ return (int) -1;
+ } else {
+ stepval = v;
+ }
+ v = NULL;
+ val = (int) 0;
+ mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done;
+ shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done;
+ for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) {
+ PyObject *tmp, *digit;
+ long idigit;
+ digit = PyNumber_And(stepval, mask);
+ if (unlikely(!digit)) goto done;
+ idigit = PyLong_AsLong(digit);
+ Py_DECREF(digit);
+ if (unlikely(idigit < 0)) goto done;
+ val |= ((int) idigit) << bits;
+ tmp = PyNumber_Rshift(stepval, shift);
+ if (unlikely(!tmp)) goto done;
+ Py_DECREF(stepval); stepval = tmp;
+ }
+ Py_DECREF(shift); shift = NULL;
+ Py_DECREF(mask); mask = NULL;
+ {
+ long idigit = PyLong_AsLong(stepval);
+ if (unlikely(idigit < 0)) goto done;
+ remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1);
+ if (unlikely(idigit >= (1L << remaining_bits)))
+ goto raise_overflow;
+ val |= ((int) idigit) << bits;
+ }
+ if (!is_unsigned) {
+ if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1))))
+ goto raise_overflow;
+ if (is_negative)
+ val = ~val;
+ }
+ ret = 0;
+ done:
+ Py_XDECREF(shift);
+ Py_XDECREF(mask);
+ Py_XDECREF(stepval);
+#endif
+ if (unlikely(ret))
+ return (int) -1;
+ return val;
+ }
+raise_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "value too large to convert to int");
+ return (int) -1;
+raise_neg_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to int");
+ return (int) -1;
+}
+
+/* CIntToPy */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+ const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+ const int is_unsigned = neg_one > const_zero;
+ if (is_unsigned) {
+ if (sizeof(int) < sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(int) <= sizeof(unsigned long)) {
+ return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+ } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
+ return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+ }
+ } else {
+ if (sizeof(int) <= sizeof(long)) {
+ return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+ } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
+ return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+ }
+ }
+ {
+ unsigned char *bytes = (unsigned char *)&value;
+#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4
+ if (is_unsigned) {
+ return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1);
+ } else {
+ return PyLong_FromNativeBytes(bytes, sizeof(value), -1);
+ }
+#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ return _PyLong_FromByteArray(bytes, sizeof(int),
+ little, !is_unsigned);
+#else
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ PyObject *from_bytes, *result = NULL;
+ PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL;
+ from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes");
+ if (!from_bytes) return NULL;
+ py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int));
+ if (!py_bytes) goto limited_bad;
+ order_str = PyUnicode_FromString(little ? "little" : "big");
+ if (!order_str) goto limited_bad;
+ arg_tuple = PyTuple_Pack(2, py_bytes, order_str);
+ if (!arg_tuple) goto limited_bad;
+ if (!is_unsigned) {
+ kwds = PyDict_New();
+ if (!kwds) goto limited_bad;
+ if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad;
+ }
+ result = PyObject_Call(from_bytes, arg_tuple, kwds);
+ limited_bad:
+ Py_XDECREF(kwds);
+ Py_XDECREF(arg_tuple);
+ Py_XDECREF(order_str);
+ Py_XDECREF(py_bytes);
+ Py_XDECREF(from_bytes);
+ return result;
+#endif
+ }
+}
+
+/* FormatTypeName */
+#if CYTHON_COMPILING_IN_LIMITED_API
+static __Pyx_TypeName
+__Pyx_PyType_GetName(PyTypeObject* tp)
+{
+ PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp,
+ __pyx_n_s_name);
+ if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) {
+ PyErr_Clear();
+ Py_XDECREF(name);
+ name = __Pyx_NewRef(__pyx_n_s__182);
+ }
+ return name;
+}
+#endif
+
+/* FastTypeChecks */
+#if CYTHON_COMPILING_IN_CPYTHON
+static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) {
+ while (a) {
+ a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*);
+ if (a == b)
+ return 1;
+ }
+ return b == &PyBaseObject_Type;
+}
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) {
+ PyObject *mro;
+ if (a == b) return 1;
+ mro = a->tp_mro;
+ if (likely(mro)) {
+ Py_ssize_t i, n;
+ n = PyTuple_GET_SIZE(mro);
+ for (i = 0; i < n; i++) {
+ if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b)
+ return 1;
+ }
+ return 0;
+ }
+ return __Pyx_InBases(a, b);
+}
+static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) {
+ PyObject *mro;
+ if (cls == a || cls == b) return 1;
+ mro = cls->tp_mro;
+ if (likely(mro)) {
+ Py_ssize_t i, n;
+ n = PyTuple_GET_SIZE(mro);
+ for (i = 0; i < n; i++) {
+ PyObject *base = PyTuple_GET_ITEM(mro, i);
+ if (base == (PyObject *)a || base == (PyObject *)b)
+ return 1;
+ }
+ return 0;
+ }
+ return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b);
+}
+#if PY_MAJOR_VERSION == 2
+static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) {
+ PyObject *exception, *value, *tb;
+ int res;
+ __Pyx_PyThreadState_declare
+ __Pyx_PyThreadState_assign
+ __Pyx_ErrFetch(&exception, &value, &tb);
+ res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0;
+ if (unlikely(res == -1)) {
+ PyErr_WriteUnraisable(err);
+ res = 0;
+ }
+ if (!res) {
+ res = PyObject_IsSubclass(err, exc_type2);
+ if (unlikely(res == -1)) {
+ PyErr_WriteUnraisable(err);
+ res = 0;
+ }
+ }
+ __Pyx_ErrRestore(exception, value, tb);
+ return res;
+}
+#else
+static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) {
+ if (exc_type1) {
+ return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2);
+ } else {
+ return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2);
+ }
+}
+#endif
+static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+ Py_ssize_t i, n;
+ assert(PyExceptionClass_Check(exc_type));
+ n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+ for (i=0; i= 0x030B00A4
+ return Py_Version & ~0xFFUL;
+#else
+ const char* rt_version = Py_GetVersion();
+ unsigned long version = 0;
+ unsigned long factor = 0x01000000UL;
+ unsigned int digit = 0;
+ int i = 0;
+ while (factor) {
+ while ('0' <= rt_version[i] && rt_version[i] <= '9') {
+ digit = digit * 10 + (unsigned int) (rt_version[i] - '0');
+ ++i;
+ }
+ version += factor * digit;
+ if (rt_version[i] != '.')
+ break;
+ digit = 0;
+ factor >>= 8;
+ ++i;
+ }
+ return version;
+#endif
+}
+static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) {
+ const unsigned long MAJOR_MINOR = 0xFFFF0000UL;
+ if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR))
+ return 0;
+ if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR)))
+ return 1;
+ {
+ char message[200];
+ PyOS_snprintf(message, sizeof(message),
+ "compile time Python version %d.%d "
+ "of module '%.100s' "
+ "%s "
+ "runtime version %d.%d",
+ (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF),
+ __Pyx_MODULE_NAME,
+ (allow_newer) ? "was newer than" : "does not match",
+ (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF)
+ );
+ return PyErr_WarnEx(NULL, message, 1);
+ }
+}
+
+/* InitStrings */
+#if PY_MAJOR_VERSION >= 3
+static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) {
+ if (t.is_unicode | t.is_str) {
+ if (t.intern) {
+ *str = PyUnicode_InternFromString(t.s);
+ } else if (t.encoding) {
+ *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL);
+ } else {
+ *str = PyUnicode_FromStringAndSize(t.s, t.n - 1);
+ }
+ } else {
+ *str = PyBytes_FromStringAndSize(t.s, t.n - 1);
+ }
+ if (!*str)
+ return -1;
+ if (PyObject_Hash(*str) == -1)
+ return -1;
+ return 0;
+}
+#endif
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+ while (t->p) {
+ #if PY_MAJOR_VERSION >= 3
+ __Pyx_InitString(*t, t->p);
+ #else
+ if (t->is_unicode) {
+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+ } else if (t->intern) {
+ *t->p = PyString_InternFromString(t->s);
+ } else {
+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+ }
+ if (!*t->p)
+ return -1;
+ if (PyObject_Hash(*t->p) == -1)
+ return -1;
+ #endif
+ ++t;
+ }
+ return 0;
+}
+
+#include
+static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) {
+ size_t len = strlen(s);
+ if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) {
+ PyErr_SetString(PyExc_OverflowError, "byte string is too long");
+ return -1;
+ }
+ return (Py_ssize_t) len;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
+ Py_ssize_t len = __Pyx_ssize_strlen(c_str);
+ if (unlikely(len < 0)) return NULL;
+ return __Pyx_PyUnicode_FromStringAndSize(c_str, len);
+}
+static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) {
+ Py_ssize_t len = __Pyx_ssize_strlen(c_str);
+ if (unlikely(len < 0)) return NULL;
+ return PyByteArray_FromStringAndSize(c_str, len);
+}
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) {
+ Py_ssize_t ignore;
+ return __Pyx_PyObject_AsStringAndSize(o, &ignore);
+}
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+#if !CYTHON_PEP393_ENABLED
+static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+ char* defenc_c;
+ PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
+ if (!defenc) return NULL;
+ defenc_c = PyBytes_AS_STRING(defenc);
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+ {
+ char* end = defenc_c + PyBytes_GET_SIZE(defenc);
+ char* c;
+ for (c = defenc_c; c < end; c++) {
+ if ((unsigned char) (*c) >= 128) {
+ PyUnicode_AsASCIIString(o);
+ return NULL;
+ }
+ }
+ }
+#endif
+ *length = PyBytes_GET_SIZE(defenc);
+ return defenc_c;
+}
+#else
+static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+ if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL;
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+ if (likely(PyUnicode_IS_ASCII(o))) {
+ *length = PyUnicode_GET_LENGTH(o);
+ return PyUnicode_AsUTF8(o);
+ } else {
+ PyUnicode_AsASCIIString(o);
+ return NULL;
+ }
+#else
+ return PyUnicode_AsUTF8AndSize(o, length);
+#endif
+}
+#endif
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+ if (
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+ __Pyx_sys_getdefaultencoding_not_ascii &&
+#endif
+ PyUnicode_Check(o)) {
+ return __Pyx_PyUnicode_AsStringAndSize(o, length);
+ } else
+#endif
+#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
+ if (PyByteArray_Check(o)) {
+ *length = PyByteArray_GET_SIZE(o);
+ return PyByteArray_AS_STRING(o);
+ } else
+#endif
+ {
+ char* result;
+ int r = PyBytes_AsStringAndSize(o, &result, length);
+ if (unlikely(r < 0)) {
+ return NULL;
+ } else {
+ return result;
+ }
+ }
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+ int is_true = x == Py_True;
+ if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+ else return PyObject_IsTrue(x);
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) {
+ int retval;
+ if (unlikely(!x)) return -1;
+ retval = __Pyx_PyObject_IsTrue(x);
+ Py_DECREF(x);
+ return retval;
+}
+static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) {
+ __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result));
+#if PY_MAJOR_VERSION >= 3
+ if (PyLong_Check(result)) {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). "
+ "The ability to return an instance of a strict subclass of int is deprecated, "
+ "and may be removed in a future version of Python.",
+ result_type_name)) {
+ __Pyx_DECREF_TypeName(result_type_name);
+ Py_DECREF(result);
+ return NULL;
+ }
+ __Pyx_DECREF_TypeName(result_type_name);
+ return result;
+ }
+#endif
+ PyErr_Format(PyExc_TypeError,
+ "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")",
+ type_name, type_name, result_type_name);
+ __Pyx_DECREF_TypeName(result_type_name);
+ Py_DECREF(result);
+ return NULL;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) {
+#if CYTHON_USE_TYPE_SLOTS
+ PyNumberMethods *m;
+#endif
+ const char *name = NULL;
+ PyObject *res = NULL;
+#if PY_MAJOR_VERSION < 3
+ if (likely(PyInt_Check(x) || PyLong_Check(x)))
+#else
+ if (likely(PyLong_Check(x)))
+#endif
+ return __Pyx_NewRef(x);
+#if CYTHON_USE_TYPE_SLOTS
+ m = Py_TYPE(x)->tp_as_number;
+ #if PY_MAJOR_VERSION < 3
+ if (m && m->nb_int) {
+ name = "int";
+ res = m->nb_int(x);
+ }
+ else if (m && m->nb_long) {
+ name = "long";
+ res = m->nb_long(x);
+ }
+ #else
+ if (likely(m && m->nb_int)) {
+ name = "int";
+ res = m->nb_int(x);
+ }
+ #endif
+#else
+ if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) {
+ res = PyNumber_Int(x);
+ }
+#endif
+ if (likely(res)) {
+#if PY_MAJOR_VERSION < 3
+ if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) {
+#else
+ if (unlikely(!PyLong_CheckExact(res))) {
+#endif
+ return __Pyx_PyNumber_IntOrLongWrongResultType(res, name);
+ }
+ }
+ else if (!PyErr_Occurred()) {
+ PyErr_SetString(PyExc_TypeError,
+ "an integer is required");
+ }
+ return res;
+}
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+ Py_ssize_t ival;
+ PyObject *x;
+#if PY_MAJOR_VERSION < 3
+ if (likely(PyInt_CheckExact(b))) {
+ if (sizeof(Py_ssize_t) >= sizeof(long))
+ return PyInt_AS_LONG(b);
+ else
+ return PyInt_AsSsize_t(b);
+ }
+#endif
+ if (likely(PyLong_CheckExact(b))) {
+ #if CYTHON_USE_PYLONG_INTERNALS
+ if (likely(__Pyx_PyLong_IsCompact(b))) {
+ return __Pyx_PyLong_CompactValue(b);
+ } else {
+ const digit* digits = __Pyx_PyLong_Digits(b);
+ const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b);
+ switch (size) {
+ case 2:
+ if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+ return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+ }
+ break;
+ case -2:
+ if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+ return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+ }
+ break;
+ case 3:
+ if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+ return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+ }
+ break;
+ case -3:
+ if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+ return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+ }
+ break;
+ case 4:
+ if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+ return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+ }
+ break;
+ case -4:
+ if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+ return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+ }
+ break;
+ }
+ }
+ #endif
+ return PyLong_AsSsize_t(b);
+ }
+ x = PyNumber_Index(b);
+ if (!x) return -1;
+ ival = PyInt_AsSsize_t(x);
+ Py_DECREF(x);
+ return ival;
+}
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) {
+ if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) {
+ return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o);
+#if PY_MAJOR_VERSION < 3
+ } else if (likely(PyInt_CheckExact(o))) {
+ return PyInt_AS_LONG(o);
+#endif
+ } else {
+ Py_ssize_t ival;
+ PyObject *x;
+ x = PyNumber_Index(o);
+ if (!x) return -1;
+ ival = PyInt_AsLong(x);
+ Py_DECREF(x);
+ return ival;
+ }
+}
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) {
+ return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False);
+}
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+ return PyInt_FromSize_t(ival);
+}
+
+
+/* #### Code section: utility_code_pragmas_end ### */
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
+
+
+/* #### Code section: end ### */
+#endif /* Py_PYTHON_H */
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cp310-win_amd64.pyd b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cp310-win_amd64.pyd
new file mode 100644
index 0000000000000000000000000000000000000000..e28feb0b49618fe4e97b13f9768244fed149eb6b
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cp310-win_amd64.pyd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba760d57f3accf3b24d9cc331dcda273d0612998a034d9250eb8c9db5b9f908a
+size 141312
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..b387863ea7c20b83a05339b2f3182116c779646f
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1a101ecb27adaf367f00c90b3f8e96e7fbda3bf0560d48c368fec3750a040a4
+size 229200
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.pyx b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..ee67546a79c1b8a21a338fc7f98f71ba63da7807
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/processor.pyx
@@ -0,0 +1,503 @@
+# cython: language_level=3, boundscheck=False, cdivision=True, wraparound=False
+"""
+Cython version of the IndicProcessor class with optimizations for performance.
+Only preprocess_batch and postprocess_batch are exposed as cpdef methods.
+All other methods are internal (cdef) for optimized Cython usage.
+"""
+
+import regex as re
+from tqdm import tqdm
+from queue import Queue
+from typing import List, Dict, Union
+
+# Importing Python objects since these libraries don't offer C-extensions
+from indicnlp.tokenize import indic_tokenize, indic_detokenize
+from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
+from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer
+from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator
+
+
+cdef class IndicProcessor:
+ cdef public bint inference
+
+ # Precompiled regex patterns and placeholders
+ cdef object _MULTISPACE_REGEX
+ cdef object _DIGIT_SPACE_PERCENT
+ cdef object _DOUBLE_QUOT_PUNC
+ cdef object _DIGIT_NBSP_DIGIT
+ cdef object _END_BRACKET_SPACE_PUNC_REGEX
+
+ cdef object _URL_PATTERN
+ cdef object _NUMERAL_PATTERN
+ cdef object _EMAIL_PATTERN
+ cdef object _OTHER_PATTERN
+
+ cdef list _PUNC_REPLACEMENTS
+ cdef list _INDIC_FAILURE_CASES
+
+ cdef dict _flores_codes
+ cdef dict _digits_translation_table
+
+ # Placeholder maps stored in a Python Queue (treated as `object` for Cython)
+ cdef object _placeholder_entity_maps
+
+ # Tools (also Python objects)
+ cdef object _en_tok
+ cdef object _en_normalizer
+ cdef object _en_detok
+ cdef object _xliterator
+
+ def __cinit__(self, bint inference=True):
+ """
+ Constructor for IndicProcessor. Initializes all necessary components.
+ """
+ self.inference = inference
+
+ ##############################
+ # FLORES -> ISO CODES
+ ##############################
+ self._flores_codes = {
+ "asm_Beng": "as",
+ "awa_Deva": "hi",
+ "ben_Beng": "bn",
+ "bho_Deva": "hi",
+ "brx_Deva": "hi",
+ "doi_Deva": "hi",
+ "eng_Latn": "en",
+ "gom_Deva": "kK",
+ "gon_Deva": "hi",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "hne_Deva": "hi",
+ "kan_Knda": "kn",
+ "kas_Arab": "ur",
+ "kas_Deva": "hi",
+ "kha_Latn": "en",
+ "lus_Latn": "en",
+ "mag_Deva": "hi",
+ "mai_Deva": "hi",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "bn",
+ "mni_Mtei": "hi",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "hi",
+ "sat_Olck": "or",
+ "snd_Arab": "ur",
+ "snd_Deva": "hi",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+ "unr_Deva": "hi",
+ }
+
+ ##############################
+ # INDIC DIGIT TRANSLATION (str.translate)
+ ##############################
+ self._digits_translation_table = {}
+ cdef dict digits_dict = {
+ "\u09e6": "0", "\u0ae6": "0", "\u0ce6": "0", "\u0966": "0",
+ "\u0660": "0", "\uabf0": "0", "\u0b66": "0", "\u0a66": "0",
+ "\u1c50": "0", "\u06f0": "0",
+
+ "\u09e7": "1", "\u0ae7": "1", "\u0967": "1", "\u0ce7": "1",
+ "\u06f1": "1", "\uabf1": "1", "\u0b67": "1", "\u0a67": "1",
+ "\u1c51": "1", "\u0c67": "1",
+
+ "\u09e8": "2", "\u0ae8": "2", "\u0968": "2", "\u0ce8": "2",
+ "\u06f2": "2", "\uabf2": "2", "\u0b68": "2", "\u0a68": "2",
+ "\u1c52": "2", "\u0c68": "2",
+
+ "\u09e9": "3", "\u0ae9": "3", "\u0969": "3", "\u0ce9": "3",
+ "\u06f3": "3", "\uabf3": "3", "\u0b69": "3", "\u0a69": "3",
+ "\u1c53": "3", "\u0c69": "3",
+
+ "\u09ea": "4", "\u0aea": "4", "\u096a": "4", "\u0cea": "4",
+ "\u06f4": "4", "\uabf4": "4", "\u0b6a": "4", "\u0a6a": "4",
+ "\u1c54": "4", "\u0c6a": "4",
+
+ "\u09eb": "5", "\u0aeb": "5", "\u096b": "5", "\u0ceb": "5",
+ "\u06f5": "5", "\uabf5": "5", "\u0b6b": "5", "\u0a6b": "5",
+ "\u1c55": "5", "\u0c6b": "5",
+
+ "\u09ec": "6", "\u0aec": "6", "\u096c": "6", "\u0cec": "6",
+ "\u06f6": "6", "\uabf6": "6", "\u0b6c": "6", "\u0a6c": "6",
+ "\u1c56": "6", "\u0c6c": "6",
+
+ "\u09ed": "7", "\u0aed": "7", "\u096d": "7", "\u0ced": "7",
+ "\u06f7": "7", "\uabf7": "7", "\u0b6d": "7", "\u0a6d": "7",
+ "\u1c57": "7", "\u0c6d": "7",
+
+ "\u09ee": "8", "\u0aee": "8", "\u096e": "8", "\u0cee": "8",
+ "\u06f8": "8", "\uabf8": "8", "\u0b6e": "8", "\u0a6e": "8",
+ "\u1c58": "8", "\u0c6e": "8",
+
+ "\u09ef": "9", "\u0aef": "9", "\u096f": "9", "\u0cef": "9",
+ "\u06f9": "9", "\uabf9": "9", "\u0b6f": "9", "\u0a6f": "9",
+ "\u1c59": "9", "\u0c6f": "9",
+ }
+ for k, v in digits_dict.items():
+ self._digits_translation_table[ord(k)] = v
+
+ # Also map ASCII '0'-'9'
+ for c in range(ord('0'), ord('9') + 1):
+ self._digits_translation_table[c] = chr(c)
+
+ ##############################
+ # PLACEHOLDER MAP QUEUE
+ ##############################
+ self._placeholder_entity_maps = Queue()
+
+ ##############################
+ # MOSES (as Python objects)
+ ##############################
+ self._en_tok = MosesTokenizer(lang="en")
+ self._en_normalizer = MosesPunctNormalizer()
+ self._en_detok = MosesDetokenizer(lang="en")
+
+ ##############################
+ # TRANSLITERATOR (Python object)
+ ##############################
+ self._xliterator = UnicodeIndicTransliterator()
+
+ ##############################
+ # Precompiled Patterns
+ ##############################
+ self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}")
+ self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %")
+ self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)")
+ self._DIGIT_NBSP_DIGIT = re.compile(r"(\d) (\d)")
+ self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])")
+
+ self._URL_PATTERN = re.compile(
+ r"\b(?"
+ # Map various placeholder formats to the matched text
+ placeholder_entity_map[f""] = match
+ placeholder_entity_map[f"< ID{serial_no} >"] = match
+ placeholder_entity_map[f"[ID{serial_no}]"] = match
+ placeholder_entity_map[f"[ ID{serial_no} ]"] = match
+ placeholder_entity_map[f"[ID {serial_no}]"] = match
+ placeholder_entity_map[f""] = match
+ placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match
+ placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match
+ placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match
+ placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match
+ placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match
+ placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match
+ placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match
+ placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match
+ placeholder_entity_map[f"{indic_case} {serial_no}"] = match
+ placeholder_entity_map[f"{indic_case}{serial_no}"] = match
+
+ # Replace the match with the base placeholder
+ text = text.replace(match, base_placeholder)
+ serial_no += 1
+
+ # Clean up any remaining placeholder artifacts
+ text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]")
+ self._placeholder_entity_maps.put(placeholder_entity_map)
+ return text
+
+ # Internal Method: Normalize Text
+ cdef str _normalize(self, str text) except *:
+ """
+ Normalizes numerals and optionally wraps placeholders.
+ """
+ # Single-pass digit translation
+ text = text.translate(self._digits_translation_table)
+
+ if self.inference:
+ text = self._wrap_with_placeholders(text)
+ return text
+
+ # Internal Method: Indic Tokenize and Transliterate
+ cdef str _do_indic_tokenize_and_transliterate(
+ self,
+ str sentence,
+ object normalizer,
+ str iso_lang,
+ bint transliterate
+ ) except *:
+ """
+ Helper method: normalizes, tokenizes, optionally transliterates from iso_lang -> 'hi'.
+ """
+ cdef str normed
+ cdef list tokens
+ cdef str joined
+ cdef str xlated
+
+ normed = normalizer.normalize(sentence.strip())
+ tokens = indic_tokenize.trivial_tokenize(normed, iso_lang)
+ joined = " ".join(tokens)
+ xlated = joined
+ if transliterate:
+ xlated = self._xliterator.transliterate(joined, iso_lang, "hi")
+ xlated = xlated.replace(" ् ", "्")
+ return xlated
+
+ # Internal Method: Preprocess a Single Sentence
+ cdef str _preprocess(
+ self,
+ str sent,
+ str src_lang,
+ str tgt_lang,
+ object normalizer,
+ bint is_target
+ ) except *:
+ """
+ Preprocess a single sentence: punctuation normalization, numeral normalization,
+ tokenization, transliteration, and adding language tags if necessary.
+ """
+ cdef str iso_lang = self._flores_codes.get(src_lang, "hi")
+ cdef str script_part = src_lang.split("_")[1]
+ cdef bint do_transliterate = True
+ cdef str e_strip
+ cdef str e_norm
+ cdef list e_tokens
+ cdef str processed_sent
+
+ # 1) Punctuation normalization
+ sent = self._punc_norm(sent)
+
+ # 2) Numerals & placeholders
+ sent = self._normalize(sent)
+
+ if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]:
+ do_transliterate = False
+
+ if iso_lang == "en":
+ # English path
+ e_strip = sent.strip()
+ e_norm = self._en_normalizer.normalize(e_strip)
+ e_tokens = self._en_tok.tokenize(e_norm, escape=False)
+ processed_sent = " ".join(e_tokens)
+ else:
+ # Indic path
+ processed_sent = self._do_indic_tokenize_and_transliterate(sent, normalizer, iso_lang, do_transliterate)
+
+ processed_sent = processed_sent.strip()
+ if not is_target:
+ return f"{src_lang} {tgt_lang} {processed_sent}"
+ else:
+ return processed_sent
+
+ # Internal Method: Postprocess a Single Sentence
+ cdef str _postprocess(self, object sent, str lang) except *:
+ """
+ Postprocess a single sentence:
+ 1) Pull placeholder map from queue
+ 2) Fix scripts for Perso-Arabic
+ 3) Restore placeholders
+ 4) Detokenize
+ """
+ cdef dict placeholder_entity_map
+ cdef str lang_code
+ cdef str script_code
+ cdef str iso_lang
+ cdef str k
+ cdef str v
+ cdef str xlated
+
+ # Unwrap if sent is a tuple or list
+ if isinstance(sent, (tuple, list)):
+ sent = sent[0]
+
+ placeholder_entity_map = self._placeholder_entity_maps.get()
+ lang_code, script_code = lang.split("_", 1)
+ iso_lang = self._flores_codes.get(lang, "hi")
+
+ # Fix for Perso-Arabic scripts
+ if script_code in ["Arab", "Aran"]:
+ sent = (
+ sent.replace(" ؟", "؟")
+ .replace(" ۔", "۔")
+ .replace(" ،", "،")
+ .replace("ٮ۪", "ؠ")
+ )
+
+ # Oriya fix
+ if lang_code == "ory":
+ sent = sent.replace("ଯ଼", "ୟ")
+
+ # Restore placeholders
+ for k, v in placeholder_entity_map.items():
+ sent = sent.replace(k, v)
+
+ # Detokenize
+ if lang == "eng_Latn":
+ return self._en_detok.detokenize(sent.split(" "))
+ else:
+ xlated = self._xliterator.transliterate(sent, "hi", iso_lang)
+ return indic_detokenize.trivial_detokenize(xlated, iso_lang)
+
+ # Exposed Method: Preprocess a Batch of Sentences
+ cpdef list preprocess_batch(
+ self,
+ List[str] batch,
+ str src_lang,
+ str tgt_lang=None,
+ bint is_target=False,
+ bint visualize=False
+ ):
+ """
+ Preprocess an array of sentences (normalize, tokenize, transliterate).
+ This is exposed for external use.
+ """
+ cdef object normalizer = None
+ cdef str iso_code = self._flores_codes.get(src_lang, "hi")
+ cdef object iterator
+ cdef list results
+ cdef int i
+ cdef int n = len(batch)
+
+ if src_lang != "eng_Latn":
+ normalizer = IndicNormalizerFactory().get_normalizer(iso_code)
+
+ if visualize:
+ iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line")
+ else:
+ iterator = batch
+
+ return [self._preprocess(s, src_lang, tgt_lang, normalizer, is_target) for s in iterator]
+
+ # Exposed Method: Postprocess a Batch of Sentences
+ cpdef list postprocess_batch(
+ self,
+ List[str] sents,
+ str lang="hin_Deva",
+ bint visualize=False
+ ):
+ """
+ Postprocess a batch of sentences:
+ Restore placeholders, fix script issues, and detokenize.
+ This is exposed for external use.
+ """
+ cdef object iterator
+ cdef list results
+ cdef int i
+ cdef int n = len(sents)
+
+ if visualize:
+ iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line")
+ else:
+ iterator = sents
+
+ results = [self._postprocess(s, lang) for s in iterator]
+ self._placeholder_entity_maps.queue.clear()
+
+ return results
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/version.py b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae174d2c7300c1eecc46072f4358e731667aed33
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/version.py
@@ -0,0 +1 @@
+__version__ = "1.0.3"
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/version.txt b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/version.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4c0d46e55ffb2237c9e900aa77172886f6c8aa5
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/IndicTransToolkit/version.txt
@@ -0,0 +1 @@
+1.0.3
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/LICENSE b/IndicTrans2/huggingface_interface/IndicTransToolkit/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..498691d06e5d9f57356c6f7f6930df80ec49b15f
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Varun Gumma.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/README.md b/IndicTrans2/huggingface_interface/IndicTransToolkit/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..692ece738b75029dfbf364fdeef428805db1e8b0
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/README.md
@@ -0,0 +1,97 @@
+# IndicTransToolkit
+
+## About
+The goal of this repository is to provide a simple, modular, and extendable toolkit for [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) and be compatible with the HuggingFace models released. Please refer to the `CHANGELOG.md` for latest developments.
+
+## Pre-requisites
+ - `Python 3.8+`
+ - [Indic NLP Library](https://github.com/VarunGumma/indic_nlp_library)
+ - Other requirements as listed in `requirements.txt`
+
+## Configuration
+ - Editable installation (Note, this may take a while):
+```bash
+git clone https://github.com/VarunGumma/IndicTransToolkit
+cd IndicTransToolkit
+
+pip install --editable . --use-pep517 # required for pip >= 25.0
+
+# in case it fails, try:
+# pip install --editable . --use-pep517 --config-settings editable_mode=compat
+```
+
+## Examples
+For the training usecase, please refer [here](https://github.com/AI4Bharat/IndicTrans2/tree/main/huggingface_interface).
+
+### PreTainedTokenizer
+```python
+import torch
+from IndicTransToolkit.processor import IndicProcessor # NOW IMPLEMENTED IN CYTHON !!
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+
+ip = IndicProcessor(inference=True)
+tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
+model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
+
+sentences = [
+ "This is a test sentence.",
+ "This is another longer different test sentence.",
+ "Please send an SMS to 9876543210 and an email on newemail123@xyz.com by 15th October, 2023.",
+]
+
+batch = ip.preprocess_batch(sentences, src_lang="eng_Latn", tgt_lang="hin_Deva", visualize=False) # set it to visualize=True to print a progress bar
+batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
+
+with torch.inference_mode():
+ outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)
+
+with tokenizer.as_target_tokenizer():
+ # This scoping is absolutely necessary, as it will instruct the tokenizer to tokenize using the target vocabulary.
+ # Failure to use this scoping will result in gibberish/unexpected predictions as the output will be de-tokenized with the source vocabulary instead.
+ outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+outputs = ip.postprocess_batch(outputs, lang="hin_Deva")
+print(outputs)
+
+>>> ['यह एक परीक्षण वाक्य है।', 'यह एक और लंबा अलग परीक्षण वाक्य है।', 'कृपया 9876543210 पर एक एस. एम. एस. भेजें और 15 अक्टूबर, 2023 तक newemail123@xyz.com पर एक ईमेल भेजें।']
+```
+
+### Evaluation
+- `IndicEvaluator` is a python implementation of [compute_metrics.sh](https://github.com/AI4Bharat/IndicTrans2/blob/main/compute_metrics.sh).
+- We have found that this python implementation gives slightly lower scores than the original `compute_metrics.sh`. So, please use this function cautiously, and feel free to raise a PR if you have found the bug/fix.
+```python
+from IndicTransToolkit import IndicEvaluator
+
+# this method returns a dictionary with BLEU and ChrF2++ scores with appropriate signatures
+evaluator = IndicEvaluator()
+scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=pred_file, refs=ref_file)
+
+# alternatively, you can pass the list of predictions and references instead of files
+# scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=preds, refs=refs)
+```
+
+## Authors
+ - Varun Gumma (varun230999@gmail.com)
+ - Jay Gala (jaygala24@gmail.com)
+ - Pranjal Agadh Chitale (pranjalchitale@gmail.com)
+ - Raj Dabre (prajdabre@gmail.com)
+
+
+## Bugs and Contribution
+Since this a bleeding-edge module, you may encounter broken stuff and import issues once in a while. In case you encounter any bugs or want additional functionalities, please feel free to raise `Issues`/`Pull Requests` or contact the authors.
+
+
+## Citation
+If you use our codebase, or models, please do cite the following paper:
+```bibtex
+@article{
+ gala2023indictrans,
+ title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages},
+ author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan},
+ journal={Transactions on Machine Learning Research},
+ issn={2835-8856},
+ year={2023},
+ url={https://openreview.net/forum?id=vfT4YuzAYA},
+ note={}
+}
+```
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/app.py b/IndicTrans2/huggingface_interface/IndicTransToolkit/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a2420ec8860703fbb194fd8716db9140b540c4d
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/app.py
@@ -0,0 +1,118 @@
+import gradio as gr
+import torch
+from transformers import AutoModelForSeq2SeqLM, BitsAndBytesConfig, AutoTokenizer
+from IndicTransToolkit import IndicProcessor
+import speech_recognition as sr
+
+# Constants
+BATCH_SIZE = 4
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+quantization = None
+
+# ---- IndicTrans2 Model Initialization ----
+def initialize_model_and_tokenizer(ckpt_dir, quantization):
+ if quantization == "4-bit":
+ qconfig = BitsAndBytesConfig(
+ load_in_4bit=True,
+ bnb_4bit_use_double_quant=True,
+ bnb_4bit_compute_dtype=torch.bfloat16,
+ )
+ elif quantization == "8-bit":
+ qconfig = BitsAndBytesConfig(
+ load_in_8bit=True,
+ bnb_8bit_use_double_quant=True,
+ bnb_8bit_compute_dtype=torch.bfloat16,
+ )
+ else:
+ qconfig = None
+
+ tokenizer = AutoTokenizer.from_pretrained(ckpt_dir, trust_remote_code=True)
+ model = AutoModelForSeq2SeqLM.from_pretrained(
+ ckpt_dir,
+ trust_remote_code=True,
+ low_cpu_mem_usage=True,
+ quantization_config=qconfig,
+ )
+
+ if qconfig is None:
+ model = model.to(DEVICE)
+ if DEVICE == "cuda":
+ model.half()
+
+ model.eval()
+ return tokenizer, model
+
+def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip):
+ translations = []
+ for i in range(0, len(input_sentences), BATCH_SIZE):
+ batch = input_sentences[i : i + BATCH_SIZE]
+ batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)
+ inputs = tokenizer(
+ batch,
+ truncation=True,
+ padding="longest",
+ return_tensors="pt",
+ return_attention_mask=True,
+ ).to(DEVICE)
+
+ with torch.no_grad():
+ generated_tokens = model.generate(
+ **inputs,
+ use_cache=True,
+ min_length=0,
+ max_length=256,
+ num_beams=5,
+ num_return_sequences=1,
+ )
+
+ with tokenizer.as_target_tokenizer():
+ generated_tokens = tokenizer.batch_decode(
+ generated_tokens.detach().cpu().tolist(),
+ skip_special_tokens=True,
+ clean_up_tokenization_spaces=True,
+ )
+
+ translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang)
+ del inputs
+ torch.cuda.empty_cache()
+
+ return translations
+
+# Initialize IndicTrans2
+en_indic_ckpt_dir = "ai4bharat/indictrans2-indic-en-1B"
+en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, quantization)
+ip = IndicProcessor(inference=True)
+
+# ---- Gradio Function ----
+def transcribe_and_translate(audio):
+ recognizer = sr.Recognizer()
+ with sr.AudioFile(audio) as source:
+ audio_data = recognizer.record(source)
+ try:
+ # Malayalam transcription using Google API
+ malayalam_text = recognizer.recognize_google(audio_data, language="ml-IN")
+ except sr.UnknownValueError:
+ return "Could not understand audio", ""
+ except sr.RequestError as e:
+ return f"Google API Error: {e}", ""
+
+ # Translation
+ en_sents = [malayalam_text]
+ src_lang, tgt_lang = "mal_Mlym", "eng_Latn"
+ translations = batch_translate(en_sents, src_lang, tgt_lang, en_indic_model, en_indic_tokenizer, ip)
+
+ return malayalam_text, translations[0]
+
+# ---- Gradio Interface ----
+iface = gr.Interface(
+ fn=transcribe_and_translate,
+ inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
+ outputs=[
+ gr.Textbox(label="Malayalam Transcription"),
+ gr.Textbox(label="English Translation")
+ ],
+ title="Malayalam Speech Recognition & Translation",
+ description="Speak in Malayalam → Transcribe using Google Speech Recognition → Translate to English using IndicTrans2."
+)
+
+iface.launch(debug=True)
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..01765caee820c9b115af62dd43c74a61694f2856
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d57d4239b3638a272e4b70292f10494ee4a0fee201a9d74c62fc35a3d263a45
+size 260304
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..b387863ea7c20b83a05339b2f3182116c779646f
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1a101ecb27adaf367f00c90b3f8e96e7fbda3bf0560d48c368fec3750a040a4
+size 229200
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o
new file mode 100644
index 0000000000000000000000000000000000000000..27265c8301ef533f6d71fae9155f794ab6a143c3
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9e82df38b208dc0a9b468ff669c9da159c7deaabcb389fcfacd43e038504fec
+size 347184
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o
new file mode 100644
index 0000000000000000000000000000000000000000..1458e809a01af013191e526c0f7f45142fcb5bf6
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d27c2cc00c97a89f97f7c28bc9175c5c403a0e2a372a0b39f1c5fe8609adda09
+size 303696
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/main.py b/IndicTrans2/huggingface_interface/IndicTransToolkit/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6acc0a84e8a63f6e4b80785803a6a383b5e5712
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/main.py
@@ -0,0 +1,113 @@
+import torch
+from transformers import AutoModelForSeq2SeqLM, BitsAndBytesConfig, AutoTokenizer
+from IndicTransToolkit import IndicProcessor
+
+# Constants
+BATCH_SIZE = 4
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+quantization = None
+
+def initialize_model_and_tokenizer(ckpt_dir, quantization):
+ """Initialize the model and tokenizer with optional quantization."""
+ if quantization == "4-bit":
+ qconfig = BitsAndBytesConfig(
+ load_in_4bit=True,
+ bnb_4bit_use_double_quant=True,
+ bnb_4bit_compute_dtype=torch.bfloat16,
+ )
+ elif quantization == "8-bit":
+ qconfig = BitsAndBytesConfig(
+ load_in_8bit=True,
+ bnb_8bit_use_double_quant=True,
+ bnb_8bit_compute_dtype=torch.bfloat16,
+ )
+ else:
+ qconfig = None
+
+ tokenizer = AutoTokenizer.from_pretrained(ckpt_dir, trust_remote_code=True)
+ model = AutoModelForSeq2SeqLM.from_pretrained(
+ ckpt_dir,
+ trust_remote_code=True,
+ low_cpu_mem_usage=True,
+ quantization_config=qconfig,
+ )
+
+ if qconfig is None:
+ model = model.to(DEVICE)
+ if DEVICE == "cuda":
+ model.half()
+
+ model.eval()
+ return tokenizer, model
+
+
+def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip):
+ """Batch translate sentences from src_lang to tgt_lang."""
+ translations = []
+
+ for i in range(0, len(input_sentences), BATCH_SIZE):
+ batch = input_sentences[i : i + BATCH_SIZE]
+
+ # Preprocess the batch and extract entity mappings
+ batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)
+
+ # Tokenize the batch and generate input encodings
+ inputs = tokenizer(
+ batch,
+ truncation=True,
+ padding="longest",
+ return_tensors="pt",
+ return_attention_mask=True,
+ ).to(DEVICE)
+
+ # Generate translations using the model
+ with torch.no_grad():
+ generated_tokens = model.generate(
+ **inputs,
+ use_cache=True,
+ min_length=0,
+ max_length=256,
+ num_beams=5,
+ num_return_sequences=1,
+ )
+
+ # Decode the generated tokens into text
+ with tokenizer.as_target_tokenizer():
+ generated_tokens = tokenizer.batch_decode(
+ generated_tokens.detach().cpu().tolist(),
+ skip_special_tokens=True,
+ clean_up_tokenization_spaces=True,
+ )
+
+ # Postprocess the translations, including entity replacement
+ translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang)
+
+ del inputs
+ torch.cuda.empty_cache()
+
+ return translations
+
+
+# Initialize the model and processor
+en_indic_ckpt_dir = "ai4bharat/indictrans2-indic-en-1B"
+en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, quantization)
+ip = IndicProcessor(inference=True)
+
+# Sample sentences
+en_sents = [
+ """ഹലോ ഫ്രണ്ട്സ് കോളേജ് സ്കൂളിൻറെ മറ്റൊരു അധ്യായത്തിലേക്ക് ഏവർക്കും സ്വാഗതം ഇന്ന് ഞാൻ വന്നിരിക്കുന്നത് ചെറിയ കുട്ടികൾക്കായുള്ള ഒരു മലയാളം പ്രസംഗവും ആയിട്ടാണ് പ്രസംഗ വിഷയം ഇന്ത്യ എൻറെ രാജ്യം ആയിരക്കണക്കിന് വർഷങ്ങളുടെ പാരമ്പര്യം പേറുന്ന മഹത്തായ രാജ്യമാണ് ഇന്ത്യ 1947 ൽ ബ്രിട്ടീഷുകാരിൽ നിന്നും സ്വാതന്ത്ര്യം നേടിയ നമ്മുടെ ഭാരതം അനേകം നാട്ടുരാജ്യങ്ങൾ ചേർന്ന് ഏറ്റവും വലിയ ജനാധിപത്യ രാജ്യമായി ആശയുടെ അടിസ്ഥാനത്തിൽ നല്ല ഭരണത്തിന് സഹായകമാകും വിധം സംസ്ഥാനങ്ങൾ രൂപം കൊണ്ടും എന്ന് 28 സംസ്ഥാനങ്ങൾ ആണ് ഇന്ത്യയിൽ ഉള്ളത് നാനാത്വത്തിലെ ഏകത്വം എന്ന ചിന്ത വിവിധ ഭാഷകളും ജാതികളും മതങ്ങളും ആചാരങ്ങളും ജീവിതരീതികളും ഉള്ള ഒരു വലിയ ജനതയെ ഒറ്റക്കെട്ടായി നിർത്തുന്നു അതാണ് ഭാരതത്തിൻറെ വിജയം നേടിയ ലോകമേ തറവാട് എന്നതാണ് ഭാരത സംസ്കാരം അതുകൊണ്ട് തന്നെ ഇന്ത്യക്കാരെ മാത്രമല്ല ലോകം മുഴുവനും ഉള്ള എല്ലാവരെയും ഭാരതം സന്തോഷത്തോടെ ഉൾക്കൊള്ളുകയും സ്നേഹിക്കുകയും ചെയ്യുന്ന പ്രസിഡണ്ടും പ്രധാനമന്ത്രിയും മന്ത്രിമാരും ചേർന്ന് നമ്മുടെ രാജ്യം ഭരിക്കുന്നു മുഖ്യമന്ത്രിയും മന്ത്രിമാരും ചേർന്ന് സംസ്ഥാനങ്ങളെയും പരിപാലിക്കുന്നു എൻറെ ഇന്ത്യ അഭിമാനമാണ് സംസ്കാരങ്ങൾ ചേർന്ന് മനോഹരിയായി പുഞ്ചിരിക്കുന്ന എൻറെ അമ്മ ഭാരതമെന്നു കേട്ടാൽ തിളക്കണം ചോര നമുക്ക് ഞരമ്പുകളിൽ"""
+]
+
+# Translation
+src_lang, tgt_lang = "mal_Mlym", "eng_Latn"
+hi_translations = batch_translate(en_sents, src_lang, tgt_lang, en_indic_model, en_indic_tokenizer, ip)
+
+# Print translations
+print(f"\n{src_lang} - {tgt_lang}")
+for input_sentence, translation in zip(en_sents, hi_translations):
+ print(f"{src_lang}: {input_sentence}")
+ print(f"{tgt_lang}: {translation}")
+
+# Free GPU memory
+del en_indic_tokenizer, en_indic_model
+torch.cuda.empty_cache()
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/pyproject.toml b/IndicTrans2/huggingface_interface/IndicTransToolkit/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..3c285191a0288acb4b47e0744ac4bb01c28f9bcc
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+requires = [
+ "setuptools>=68.2.2",
+ "wheel",
+ "Cython",
+]
+build-backend = "setuptools.build_meta"
+
+[tool.black]
+# Black configuration for code formatting
+line-length = 88
+target-version = ['py38']
+exclude = '''
+/(
+ \.git
+ | \.hg
+ | \.mypy_cache
+ | \.tox
+ | \.venv
+ | _build
+ | buck-out
+ | build
+ | dist
+)/
+'''
\ No newline at end of file
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/requirements.txt b/IndicTrans2/huggingface_interface/IndicTransToolkit/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e22472b64d63e9f46b5d800696f64556f13692d2
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/requirements.txt
@@ -0,0 +1,8 @@
+setuptools>=68.2.2
+torch
+cython
+sacremoses
+sentencepiece
+transformers
+sacrebleu
+indic-nlp-library-IT2 @ git+https://github.com/VarunGumma/indic_nlp_library.git
diff --git a/IndicTrans2/huggingface_interface/IndicTransToolkit/setup.py b/IndicTrans2/huggingface_interface/IndicTransToolkit/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b9bef168181fd18e7b34b0bda2a43d8c3e14982
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/IndicTransToolkit/setup.py
@@ -0,0 +1,61 @@
+import os
+import pathlib
+from sys import version_info, exit
+from setuptools import setup, find_packages
+from Cython.Build import cythonize
+from pkg_resources import parse_requirements
+
+def write_version_py():
+ version_txt_path = os.path.join("IndicTransToolkit", "version.txt")
+ with open(version_txt_path, "r", encoding="utf-8") as f:
+ version = f.read().strip()
+
+ version_py_path = os.path.join("IndicTransToolkit", "version.py")
+ with open(version_py_path, "w", encoding="utf-8") as f:
+ f.write(f'__version__ = "{version}"\n')
+ return version
+
+# Enforce Python >= 3.8
+if version_info < (3, 8):
+ exit("Sorry, Python >= 3.8 is required for IndicTransToolkit.")
+
+# Read long description from README
+with open("README.md", "r", errors="ignore", encoding="utf-8") as fh:
+ long_description = fh.read().strip()
+
+# Write version.py from version.txt
+version = write_version_py()
+
+# Parse requirements.txt
+req_file = pathlib.Path("requirements.txt")
+requirements = [str(req) for req in parse_requirements(req_file.open())]
+
+# Cython files to compile (adjust if your .pyx name differs)
+cython_extensions = cythonize(
+ [
+ "IndicTransToolkit/processor.pyx",
+ ],
+ compiler_directives={"language_level": "3", "boundscheck": False},
+)
+
+setup(
+ name="IndicTransToolkit",
+ version=version,
+ author="Varun Gumma",
+ author_email="varun230999@gmail.com",
+ description="A simple, consistent, and extendable module for IndicTrans2 tokenizer compatible with HuggingFace models",
+ long_description=long_description,
+ long_description_content_type="text/markdown",
+ url="https://github.com/VarunGumma/IndicTransToolkit",
+ packages=find_packages(), # Auto-detect packages
+ license="MIT",
+ classifiers=[
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent",
+ ],
+ python_requires=">=3.8",
+ install_requires=requirements,
+ ext_modules=cython_extensions,
+ zip_safe=False,
+)
diff --git a/IndicTrans2/huggingface_interface/README.md b/IndicTrans2/huggingface_interface/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..03c5cc6e15251015d0f23f943740e40cf0204385
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/README.md
@@ -0,0 +1,119 @@
+# IndicTrans2 HF Compatible Models
+
+[](https://colab.research.google.com/github/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/colab_inference.ipynb)
+
+In this section, we provide details on how to use our [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) models which were originally trained with the [fairseq](https://github.com/facebookresearch/fairseq) to [HuggingFace transformers](https://huggingface.co/docs/transformers/index) for inference purpose. Our scripts for HuggingFace compatible models are adapted from [M2M100 repository](https://github.com/huggingface/transformers/tree/main/src/transformers/models/m2m_100).
+
+> Note: We have migrated IndicTrans2 tokenizer for HF compatible IndicTrans2 models to [IndicTransToolkit](https://github.com/VarunGumma/IndicTransToolkit) and will be maintained separately there from now onwards. This is automatically installed when you call `install.sh` script in `huggingface_interface`.
+
+### Setup
+
+To get started, follow these steps to set up the environment:
+
+```
+# Clone the github repository and navigate to the project directory.
+git clone https://github.com/AI4Bharat/IndicTrans2
+cd IndicTrans2/huggingface_interface
+
+# Install all the dependencies and requirements associated with the project for running HF compatible models.
+source install.sh
+```
+
+> Note: The `install.sh` script in this directory is specifically for running HF compatible models for inference.
+
+### Converting
+
+In order to convert the fairseq checkpoint to a PyTorch checkpoint that is compatible with HuggingFace Transformers, use the following command:
+
+```bash
+python3 convert_indictrans_checkpoint_to_pytorch.py --fairseq_path --pytorch_dump_folder_path
+```
+
+- ``: path to the fairseq `checkpoint_best.pt` that needs to be converted to HF compatible models
+- ``: path to the output directory where the HF compatible models will be saved
+
+### Models
+
+| Model | 🤗 HuggingFace Checkpoints |
+| -------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
+| En-Indic | [ai4bharat/indictrans2-en-indic-1B](https://huggingface.co/ai4bharat/indictrans2-en-indic-1B) |
+| Indic-En | [ai4bharat/indictrans2-indic-en-1B](https://huggingface.co/ai4bharat/indictrans2-indic-en-1B) |
+| Distilled En-Indic | [ai4bharat/indictrans2-en-indic-dist-200M](https://huggingface.co/ai4bharat/indictrans2-en-indic-dist-200M) |
+| Distilled Indic-En | [ai4bharat/indictrans2-indic-en-dist-200M](https://huggingface.co/ai4bharat/indictrans2-indic-en-dist-200M) |
+| Indic-Indic (Stitched) | [ai4bharat/indictrans2-indic-indic-1B](https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B) |
+| Distilled Indic-Indic (Stitched) | [ai4bharat/indictrans2-indic-indic-dist-320M](https://huggingface.co/ai4bharat/indictrans2-indic-indic-dist-320M) |
+
+### Inference
+
+With the conversion complete, you can now perform inference using the HuggingFace Transformers.
+
+You can start with the provided `example.py` script and customize it for your specific translation use case:
+
+```bash
+python3 example.py
+```
+
+Feel free to modify the `example.py` script to suit your translation needs.
+
+### Fine-tuning with LoRA
+
+Before starting with fine-tuning IndicTrans2 models, you will need to restructure the training data in the following format.
+
+```
+en-indic-exp
+├── train
+│ ├── eng_Latn-asm_Beng
+│ │ ├── train.eng_Latn
+│ │ └── train.asm_Beng
+│ ├── eng_Latn-ben_Beng
+│ │ └── ...
+│ └── {src_lang}-{tgt_lang}
+│ ├── train.{src_lang}
+│ └── train.{tgt_lang}
+└── dev
+ ├── eng_Latn-asm_Beng
+ │ ├── dev.eng_Latn
+ │ └── dev.asm_Beng
+ ├── eng_Latn-ben_Beng
+ │ └── ...
+ └── {src_lang}-{tgt_lang}
+ ├── dev.{src_lang}
+ └── dev.{tgt_lang}
+```
+
+Once you have data ready in above specified format, use the following command.
+
+```bash
+bash train_lora.sh
+```
+
+We recommend you to refer to `train_lora.sh` for defaults arguments for fine-tuning. Please note that the specified hyperparameters may not be optimal and might require tuning for your use case.
+
+### Inference with LoRA
+
+You can load the LoRA adapters with the base model for inference by modifying the model initialization in `example.py` script.
+
+```python
+from transformers import AutoModelForSeq2SeqLM
+from peft import PeftConfig, PeftModel
+
+base_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B" # you will need to change as per your use case
+base_model = AutoModelForSeq2SeqLM.from_pretrained(base_ckpt_dir, trust_remote_code=True)
+lora_model = PeftModel.from_pretrained(base_model, lora_ckpt_dir)
+```
+
+> Note: Please feel free to open issues on the GitHub repo in case of any queries/issues.
+
+### Citation
+
+```bibtex
+@article{gala2023indictrans,
+title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages},
+author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan},
+journal={Transactions on Machine Learning Research},
+issn={2835-8856},
+year={2023},
+url={https://openreview.net/forum?id=vfT4YuzAYA},
+note={}
+}
+```
diff --git a/IndicTrans2/huggingface_interface/colab_inference.ipynb b/IndicTrans2/huggingface_interface/colab_inference.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b4786e3c76fb905d960a2123a4748212f55b2948
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/colab_inference.ipynb
@@ -0,0 +1,458 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "8Aa-nRCzPVdF"
+ },
+ "source": [
+ "# IndicTrans2 HF Inference\n",
+ "\n",
+ "We provide an example notebook on how to use our IndicTrans2 models which were originally trained with the fairseq to HuggingFace transformers for inference purpose.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Cfsv02IeP2It"
+ },
+ "source": [
+ "## Setup\n",
+ "\n",
+ "Please run the cells below to install the necessary dependencies.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "qKcYlUZYGLrt"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!git clone https://github.com/AI4Bharat/IndicTrans2.git"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "U3vs7FkIGSxK"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "%cd /content/IndicTrans2/huggingface_interface"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ddkRAXQ2Git0"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!python3 -m pip install nltk sacremoses pandas regex mock transformers>=4.33.2 mosestokenizer\n",
+ "!python3 -c \"import nltk; nltk.download('punkt')\"\n",
+ "!python3 -m pip install bitsandbytes scipy accelerate datasets\n",
+ "!python3 -m pip install sentencepiece\n",
+ "\n",
+ "!git clone https://github.com/VarunGumma/IndicTransToolkit.git\n",
+ "%cd IndicTransToolkit\n",
+ "!python3 -m pip install --editable ./\n",
+ "%cd .."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hjN7ub1tO33H"
+ },
+ "source": [
+ "**IMPORTANT : Restart your run-time first and then run the cells below.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "_SLBIw6rQB-0"
+ },
+ "source": [
+ "## Inference\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "fYczM2U6G1Zv"
+ },
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from transformers import AutoModelForSeq2SeqLM, BitsAndBytesConfig, AutoTokenizer\n",
+ "from IndicTransToolkit import IndicProcessor\n",
+ "\n",
+ "BATCH_SIZE = 4\n",
+ "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+ "quantization = None"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "xj1WCNjuHG-d"
+ },
+ "outputs": [],
+ "source": [
+ "def initialize_model_and_tokenizer(ckpt_dir, quantization):\n",
+ " if quantization == \"4-bit\":\n",
+ " qconfig = BitsAndBytesConfig(\n",
+ " load_in_4bit=True,\n",
+ " bnb_4bit_use_double_quant=True,\n",
+ " bnb_4bit_compute_dtype=torch.bfloat16,\n",
+ " )\n",
+ " elif quantization == \"8-bit\":\n",
+ " qconfig = BitsAndBytesConfig(\n",
+ " load_in_8bit=True,\n",
+ " bnb_8bit_use_double_quant=True,\n",
+ " bnb_8bit_compute_dtype=torch.bfloat16,\n",
+ " )\n",
+ " else:\n",
+ " qconfig = None\n",
+ "\n",
+ " tokenizer = AutoTokenizer.from_pretrained(ckpt_dir, trust_remote_code=True)\n",
+ " model = AutoModelForSeq2SeqLM.from_pretrained(\n",
+ " ckpt_dir,\n",
+ " trust_remote_code=True,\n",
+ " low_cpu_mem_usage=True,\n",
+ " quantization_config=qconfig,\n",
+ " )\n",
+ "\n",
+ " if qconfig == None:\n",
+ " model = model.to(DEVICE)\n",
+ " if DEVICE == \"cuda\":\n",
+ " model.half()\n",
+ "\n",
+ " model.eval()\n",
+ "\n",
+ " return tokenizer, model\n",
+ "\n",
+ "\n",
+ "def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip):\n",
+ " translations = []\n",
+ " for i in range(0, len(input_sentences), BATCH_SIZE):\n",
+ " batch = input_sentences[i : i + BATCH_SIZE]\n",
+ "\n",
+ " # Preprocess the batch and extract entity mappings\n",
+ " batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)\n",
+ "\n",
+ " # Tokenize the batch and generate input encodings\n",
+ " inputs = tokenizer(\n",
+ " batch,\n",
+ " truncation=True,\n",
+ " padding=\"longest\",\n",
+ " return_tensors=\"pt\",\n",
+ " return_attention_mask=True,\n",
+ " ).to(DEVICE)\n",
+ "\n",
+ " # Generate translations using the model\n",
+ " with torch.no_grad():\n",
+ " generated_tokens = model.generate(\n",
+ " **inputs,\n",
+ " use_cache=True,\n",
+ " min_length=0,\n",
+ " max_length=256,\n",
+ " num_beams=5,\n",
+ " num_return_sequences=1,\n",
+ " )\n",
+ "\n",
+ " # Decode the generated tokens into text\n",
+ "\n",
+ " with tokenizer.as_target_tokenizer():\n",
+ " generated_tokens = tokenizer.batch_decode(\n",
+ " generated_tokens.detach().cpu().tolist(),\n",
+ " skip_special_tokens=True,\n",
+ " clean_up_tokenization_spaces=True,\n",
+ " )\n",
+ "\n",
+ " # Postprocess the translations, including entity replacement\n",
+ " translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang)\n",
+ "\n",
+ " del inputs\n",
+ " torch.cuda.empty_cache()\n",
+ "\n",
+ " return translations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "erNCuZTEMt49"
+ },
+ "source": [
+ "### English to Indic Example\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6OG3Bw-sHnf3",
+ "outputId": "a204f50e-9456-4fb1-900a-e60680b97b99"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "eng_Latn - hin_Deva\n",
+ "eng_Latn: When I was young, I used to go to the park every day.\n",
+ "hin_Deva: जब मैं छोटा था, मैं हर दिन पार्क जाता था।\n",
+ "eng_Latn: He has many old books, which he inherited from his ancestors.\n",
+ "hin_Deva: उनके पास कई पुरानी किताबें हैं, जो उन्हें अपने पूर्वजों से विरासत में मिली हैं।\n",
+ "eng_Latn: I can't figure out how to solve my problem.\n",
+ "hin_Deva: मुझे समझ नहीं आ रहा है कि मैं अपनी समस्या का समाधान कैसे करूं।\n",
+ "eng_Latn: She is very hardworking and intelligent, which is why she got all the good marks.\n",
+ "hin_Deva: वह बहुत मेहनती और बुद्धिमान है, यही कारण है कि उसे सभी अच्छे अंक मिले।\n",
+ "eng_Latn: We watched a new movie last week, which was very inspiring.\n",
+ "hin_Deva: हमने पिछले हफ्ते एक नई फिल्म देखी, जो बहुत प्रेरणादायक थी।\n",
+ "eng_Latn: If you had met me at that time, we would have gone out to eat.\n",
+ "hin_Deva: अगर आप उस समय मुझसे मिलते तो हम बाहर खाना खाने जाते।\n",
+ "eng_Latn: She went to the market with her sister to buy a new sari.\n",
+ "hin_Deva: वह अपनी बहन के साथ नई साड़ी खरीदने के लिए बाजार गई थी।\n",
+ "eng_Latn: Raj told me that he is going to his grandmother's house next month.\n",
+ "hin_Deva: राज ने मुझे बताया कि वह अगले महीने अपनी दादी के घर जा रहा है।\n",
+ "eng_Latn: All the kids were having fun at the party and were eating lots of sweets.\n",
+ "hin_Deva: पार्टी में सभी बच्चे खूब मस्ती कर रहे थे और खूब मिठाइयां खा रहे थे।\n",
+ "eng_Latn: My friend has invited me to his birthday party, and I will give him a gift.\n",
+ "hin_Deva: मेरे दोस्त ने मुझे अपने जन्मदिन की पार्टी में आमंत्रित किया है, और मैं उसे एक उपहार दूंगा।\n"
+ ]
+ }
+ ],
+ "source": [
+ "en_indic_ckpt_dir = \"ai4bharat/indictrans2-en-indic-1B\" # ai4bharat/indictrans2-en-indic-dist-200M\n",
+ "en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, quantization)\n",
+ "\n",
+ "ip = IndicProcessor(inference=True)\n",
+ "\n",
+ "en_sents = [\n",
+ " \"When I was young, I used to go to the park every day.\",\n",
+ " \"He has many old books, which he inherited from his ancestors.\",\n",
+ " \"I can't figure out how to solve my problem.\",\n",
+ " \"She is very hardworking and intelligent, which is why she got all the good marks.\",\n",
+ " \"We watched a new movie last week, which was very inspiring.\",\n",
+ " \"If you had met me at that time, we would have gone out to eat.\",\n",
+ " \"She went to the market with her sister to buy a new sari.\",\n",
+ " \"Raj told me that he is going to his grandmother's house next month.\",\n",
+ " \"All the kids were having fun at the party and were eating lots of sweets.\",\n",
+ " \"My friend has invited me to his birthday party, and I will give him a gift.\",\n",
+ "]\n",
+ "\n",
+ "src_lang, tgt_lang = \"eng_Latn\", \"hin_Deva\"\n",
+ "hi_translations = batch_translate(en_sents, src_lang, tgt_lang, en_indic_model, en_indic_tokenizer, ip)\n",
+ "\n",
+ "print(f\"\\n{src_lang} - {tgt_lang}\")\n",
+ "for input_sentence, translation in zip(en_sents, hi_translations):\n",
+ " print(f\"{src_lang}: {input_sentence}\")\n",
+ " print(f\"{tgt_lang}: {translation}\")\n",
+ "\n",
+ "# flush the models to free the GPU memory\n",
+ "del en_indic_tokenizer, en_indic_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OM_1pbPtMpV9"
+ },
+ "source": [
+ "### Indic to English Example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "PLCEWJKvGG9I",
+ "outputId": "ab9d8726-67c7-490b-ecb3-208df1c0f741"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "hin_Deva - eng_Latn\n",
+ "hin_Deva: जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।\n",
+ "eng_Latn: When I was young, I used to go to the park every day.\n",
+ "hin_Deva: उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।\n",
+ "eng_Latn: She has a lot of old books, which she inherited from her grandparents.\n",
+ "hin_Deva: मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।\n",
+ "eng_Latn: I don't know how to find a solution to my problem.\n",
+ "hin_Deva: वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।\n",
+ "eng_Latn: He is very hardworking and understanding, so he got all the good marks.\n",
+ "hin_Deva: हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।\n",
+ "eng_Latn: We saw a new movie last week that was very inspiring.\n",
+ "hin_Deva: अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।\n",
+ "eng_Latn: If you'd given me a pass at that time, we'd have gone out to eat.\n",
+ "hin_Deva: वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।\n",
+ "eng_Latn: She had gone to the market with her sister so that she could buy a new sari.\n",
+ "hin_Deva: राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।\n",
+ "eng_Latn: Raj told me that he was going to his grandmother's house next month.\n",
+ "hin_Deva: सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।\n",
+ "eng_Latn: All the children were having fun at the party and eating a lot of sweets.\n",
+ "hin_Deva: मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।\n",
+ "eng_Latn: My friend has invited me to her birthday party, and I'll give her a present.\n"
+ ]
+ }
+ ],
+ "source": [
+ "indic_en_ckpt_dir = \"ai4bharat/indictrans2-indic-en-1B\" # ai4bharat/indictrans2-indic-en-dist-200M\n",
+ "indic_en_tokenizer, indic_en_model = initialize_model_and_tokenizer(indic_en_ckpt_dir, quantization)\n",
+ "\n",
+ "ip = IndicProcessor(inference=True)\n",
+ "\n",
+ "hi_sents = [\n",
+ " \"जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।\",\n",
+ " \"उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।\",\n",
+ " \"मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।\",\n",
+ " \"वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।\",\n",
+ " \"हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।\",\n",
+ " \"अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।\",\n",
+ " \"वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।\",\n",
+ " \"राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।\",\n",
+ " \"सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।\",\n",
+ " \"मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।\",\n",
+ "]\n",
+ "src_lang, tgt_lang = \"hin_Deva\", \"eng_Latn\"\n",
+ "en_translations = batch_translate(hi_sents, src_lang, tgt_lang, indic_en_model, indic_en_tokenizer, ip)\n",
+ "\n",
+ "\n",
+ "print(f\"\\n{src_lang} - {tgt_lang}\")\n",
+ "for input_sentence, translation in zip(hi_sents, en_translations):\n",
+ " print(f\"{src_lang}: {input_sentence}\")\n",
+ " print(f\"{tgt_lang}: {translation}\")\n",
+ "\n",
+ "# flush the models to free the GPU memory\n",
+ "del indic_en_tokenizer, indic_en_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7VCAkyKBGtnV"
+ },
+ "source": [
+ "### Indic to Indic Example\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "_7TxTTCoKjti",
+ "outputId": "df1a750b-0f32-478d-cfc9-e445f669f3ee"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "hin_Deva - mar_Deva\n",
+ "hin_Deva: जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।\n",
+ "mar_Deva: मी लहान होतो तेव्हा मी दररोज उद्यानाला जायचे.\n",
+ "hin_Deva: उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।\n",
+ "mar_Deva: तिच्याकडे बरीच जुनी पुस्तके आहेत, जी तिला तिच्या आजोबांकडून वारशाने मिळाली आहेत.\n",
+ "hin_Deva: मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।\n",
+ "mar_Deva: माझ्या समस्येवर तोडगा कसा काढायचा हे मला समजत नाही.\n",
+ "hin_Deva: वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।\n",
+ "mar_Deva: तो खूप मेहनती आणि बुद्धिमान आहे, त्यामुळे त्याला सर्व चांगले गुण मिळाले.\n",
+ "hin_Deva: हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।\n",
+ "mar_Deva: आम्ही गेल्या आठवड्यात एक नवीन चित्रपट पाहिला जो खूप प्रेरणादायी होता.\n",
+ "hin_Deva: अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।\n",
+ "mar_Deva: जर तुम्हाला त्या वेळी मला पास मिळाला तर आम्ही बाहेर जेवायला जाऊ.\n",
+ "hin_Deva: वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।\n",
+ "mar_Deva: ती तिच्या बहिणीसोबत बाजारात गेली होती जेणेकरून ती नवीन साडी खरेदी करू शकेल.\n",
+ "hin_Deva: राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।\n",
+ "mar_Deva: राजने मला सांगितले की तो पुढच्या महिन्यात त्याच्या आजीच्या घरी जात आहे.\n",
+ "hin_Deva: सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।\n",
+ "mar_Deva: सर्व मुले पार्टीचा आनंद घेत होती आणि भरपूर मिठाई खात होती.\n",
+ "hin_Deva: मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।\n",
+ "mar_Deva: माझ्या मित्राने मला त्याच्या वाढदिवसाच्या मेजवानीसाठी आमंत्रित केले आहे आणि मी त्याला भेटवस्तू देईन.\n"
+ ]
+ }
+ ],
+ "source": [
+ "indic_indic_ckpt_dir = \"ai4bharat/indictrans2-indic-indic-1B\" # ai4bharat/indictrans2-indic-indic-dist-320M\n",
+ "indic_indic_tokenizer, indic_indic_model = initialize_model_and_tokenizer(indic_indic_ckpt_dir, quantization)\n",
+ "\n",
+ "ip = IndicProcessor(inference=True)\n",
+ "\n",
+ "hi_sents = [\n",
+ " \"जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।\",\n",
+ " \"उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।\",\n",
+ " \"मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।\",\n",
+ " \"वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।\",\n",
+ " \"हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।\",\n",
+ " \"अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।\",\n",
+ " \"वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।\",\n",
+ " \"राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।\",\n",
+ " \"सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।\",\n",
+ " \"मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।\",\n",
+ "]\n",
+ "src_lang, tgt_lang = \"hin_Deva\", \"mar_Deva\"\n",
+ "mr_translations = batch_translate(hi_sents, src_lang, tgt_lang, indic_indic_model, indic_indic_tokenizer, ip)\n",
+ "\n",
+ "print(f\"\\n{src_lang} - {tgt_lang}\")\n",
+ "for input_sentence, translation in zip(hi_sents, mr_translations):\n",
+ " print(f\"{src_lang}: {input_sentence}\")\n",
+ " print(f\"{tgt_lang}: {translation}\")\n",
+ "\n",
+ "# flush the models to free the GPU memory\n",
+ "del indic_indic_tokenizer, indic_indic_model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "uyxXpt--Ma6n"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/IndicTrans2/huggingface_interface/configuration_indictrans.py b/IndicTrans2/huggingface_interface/configuration_indictrans.py
new file mode 100644
index 0000000000000000000000000000000000000000..4232825e77b075b9ade6d7ade5315bd04eb54520
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/configuration_indictrans.py
@@ -0,0 +1,309 @@
+# coding=utf-8
+# Copyright 2023 The IndicTrans2 Authors and AI4Bharat team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch IndicTrans config."""
+
+
+from collections import OrderedDict
+from typing import Any, Mapping, Optional
+
+from transformers import PreTrainedTokenizer
+from transformers.configuration_utils import PretrainedConfig
+from transformers.onnx import OnnxConfig, OnnxSeq2SeqConfigWithPast
+from transformers.onnx.utils import compute_effective_axis_dimension
+from transformers.utils import TensorType, is_torch_available
+
+
+# Copied from transformers.models.m2m_100.configuration_m2m_100.M2M100Config->IndicTrans
+class IndicTransConfig(PretrainedConfig):
+ r"""
+ This is the configuration class to store the configuration of a [`IT2Model`]. It is used to instantiate an
+ IT2 model according to the specified arguments, defining the model architecture. Instantiating a configuration
+ with the defaults will yield a similar configuration to that of the IT2
+
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+ documentation from [`PretrainedConfig`] for more information.
+
+
+ Args:
+ vocab_size (`int`, *optional*, defaults to 50265):
+ Vocabulary size of the IT2 model. Defines the number of different tokens that can be represented by the
+ `inputs_ids` passed when calling [`IT2Model`] or
+ d_model (`int`, *optional*, defaults to 1024):
+ Dimensionality of the layers and the pooler layer.
+ encoder_layers (`int`, *optional*, defaults to 12):
+ Number of encoder layers.
+ decoder_layers (`int`, *optional*, defaults to 12):
+ Number of decoder layers.
+ encoder_attention_heads (`int`, *optional*, defaults to 16):
+ Number of attention heads for each attention layer in the Transformer encoder.
+ decoder_attention_heads (`int`, *optional*, defaults to 16):
+ Number of attention heads for each attention layer in the Transformer decoder.
+ decoder_ffn_dim (`int`, *optional*, defaults to 4096):
+ Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+ encoder_ffn_dim (`int`, *optional*, defaults to 4096):
+ Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+ activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
+ The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+ `"relu"`, `"silu"` and `"gelu_new"` are supported.
+ dropout (`float`, *optional*, defaults to 0.1):
+ The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+ attention_dropout (`float`, *optional*, defaults to 0.0):
+ The dropout ratio for the attention probabilities.
+ activation_dropout (`float`, *optional*, defaults to 0.0):
+ The dropout ratio for activations inside the fully connected layer.
+ classifier_dropout (`float`, *optional*, defaults to 0.0):
+ The dropout ratio for classifier.
+ max_position_embeddings (`int`, *optional*, defaults to 1024):
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
+ just in case (e.g., 512 or 1024 or 2048).
+ init_std (`float`, *optional*, defaults to 0.02):
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+ encoder_layerdrop (`float`, *optional*, defaults to 0.0):
+ The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+ for more details.
+ decoder_layerdrop (`float`, *optional*, defaults to 0.0):
+ The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+ for more details.
+ use_cache (`bool`, *optional*, defaults to `True`):
+ Whether or not the model should return the last key/values attentions (not used by all models).
+ ```"""
+ model_type = "IndicTrans"
+ keys_to_ignore_at_inference = ["past_key_values"]
+ attribute_map = {
+ "num_attention_heads": "encoder_attention_heads",
+ "hidden_size": "d_model",
+ }
+
+ def __init__(
+ self,
+ encoder_vocab_size=None,
+ decoder_vocab_size=None,
+ encoder_embed_dim=512,
+ decoder_embed_dim=512,
+ max_source_positions=210,
+ max_target_positions=210,
+ encoder_layers=6,
+ encoder_ffn_dim=2048,
+ encoder_attention_heads=8,
+ decoder_layers=6,
+ decoder_ffn_dim=2048,
+ decoder_attention_heads=8,
+ encoder_layerdrop=0.00,
+ decoder_layerdrop=0.00,
+ use_cache=True,
+ is_encoder_decoder=True,
+ activation_function="relu",
+ encoder_normalize_before=False,
+ decoder_normalize_before=False,
+ layernorm_embedding=False,
+ share_decoder_input_output_embed=False,
+ dropout=0.1,
+ attention_dropout=0.0,
+ activation_dropout=0.0,
+ init_std=0.02,
+ scale_embedding=True,
+ decoder_start_token_id=2,
+ pad_token_id=1,
+ bos_token_id=0,
+ eos_token_id=2,
+ attn_implementation="eager",
+ **kwargs,
+ ):
+ self.encoder_vocab_size = encoder_vocab_size
+ self.decoder_vocab_size = decoder_vocab_size
+ self.encoder_normalize_before = encoder_normalize_before
+ self.decoder_normalize_before = decoder_normalize_before
+ self.layernorm_embedding = layernorm_embedding
+ self.max_source_positions = max_source_positions
+ self.max_target_positions = max_target_positions
+ self.encoder_embed_dim = encoder_embed_dim
+ self.decoder_embed_dim = decoder_embed_dim
+ self.encoder_ffn_dim = encoder_ffn_dim
+ self.encoder_layers = encoder_layers
+ self.encoder_attention_heads = encoder_attention_heads
+ self.decoder_ffn_dim = decoder_ffn_dim
+ self.decoder_layers = decoder_layers
+ self.decoder_attention_heads = decoder_attention_heads
+ self.dropout = dropout
+ self.attention_dropout = attention_dropout
+ self.activation_dropout = activation_dropout
+ self.activation_function = activation_function
+ self.init_std = init_std
+ self.encoder_layerdrop = encoder_layerdrop
+ self.decoder_layerdrop = decoder_layerdrop
+ self.use_cache = use_cache
+ self.num_hidden_layers = encoder_layers
+ self.scale_embedding = scale_embedding
+ self.share_decoder_input_output_embed = share_decoder_input_output_embed
+ self.attn_implementation = attn_implementation
+
+ super().__init__(
+ pad_token_id=pad_token_id,
+ bos_token_id=bos_token_id,
+ eos_token_id=eos_token_id,
+ is_encoder_decoder=is_encoder_decoder,
+ decoder_start_token_id=decoder_start_token_id,
+ **kwargs,
+ )
+
+
+class IndicTransOnnxConfig(OnnxSeq2SeqConfigWithPast):
+ @property
+ def inputs(self) -> Mapping[str, Mapping[int, str]]:
+ common_inputs = OrderedDict(
+ [
+ ("input_ids", {0: "batch", 1: "encoder_sequence"}),
+ ("attention_mask", {0: "batch", 1: "encoder_sequence"}),
+ ]
+ )
+
+ if self.use_past:
+ common_inputs["decoder_input_ids"] = {0: "batch"}
+ common_inputs["decoder_attention_mask"] = {
+ 0: "batch",
+ 1: "past_decoder_sequence + sequence",
+ }
+ else:
+ common_inputs["decoder_input_ids"] = {0: "batch", 1: "decoder_sequence"}
+ common_inputs["decoder_attention_mask"] = {
+ 0: "batch",
+ 1: "decoder_sequence",
+ }
+
+ if self.use_past:
+ self.fill_with_past_key_values_(common_inputs, direction="inputs")
+ return common_inputs
+
+ # Copied from BartOnnxConfig._generate_dummy_inputs_for_sequence_classification_and_question_answering
+ # A better name would be _generate_dummy_inputs_for_encoder_and_decoder because sequence classification and question
+ # answering are not supported for IT2, but this name is preserved to be able to check that the copy matches what
+ # was done for BART so that it can be updated if need be.
+ def _generate_dummy_inputs_for_sequence_classification_and_question_answering(
+ self,
+ tokenizer: PreTrainedTokenizer,
+ batch_size: int = -1,
+ seq_length: int = -1,
+ is_pair: bool = False,
+ framework: Optional[TensorType] = None,
+ ) -> Mapping[str, Any]:
+ # Copied from OnnxConfig.generate_dummy_inputs
+ # Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
+ # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
+ batch_size = compute_effective_axis_dimension(
+ batch_size,
+ fixed_dimension=OnnxConfig.default_fixed_batch,
+ num_token_to_add=0,
+ )
+
+ # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
+ token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
+ seq_length = compute_effective_axis_dimension(
+ seq_length,
+ fixed_dimension=OnnxConfig.default_fixed_sequence,
+ num_token_to_add=token_to_add,
+ )
+
+ # Generate dummy inputs according to compute batch and sequence
+ dummy_input = [" ".join([tokenizer.unk_token]) * seq_length] * batch_size
+ common_inputs = dict(tokenizer(dummy_input, return_tensors=framework))
+ return common_inputs
+
+ # Copied from transformers.models.bart.configuration_bart.BartOnnxConfig._generate_dummy_inputs_for_default_and_seq2seq_lm
+ def _generate_dummy_inputs_for_default_and_seq2seq_lm(
+ self,
+ tokenizer: PreTrainedTokenizer,
+ batch_size: int = -1,
+ seq_length: int = -1,
+ is_pair: bool = False,
+ framework: Optional[TensorType] = None,
+ ) -> Mapping[str, Any]:
+ encoder_inputs = self._generate_dummy_inputs_for_sequence_classification_and_question_answering(
+ tokenizer, batch_size, seq_length, is_pair, framework
+ )
+
+ # Generate decoder inputs
+ decoder_seq_length = seq_length if not self.use_past else 1
+ decoder_inputs = self._generate_dummy_inputs_for_sequence_classification_and_question_answering(
+ tokenizer, batch_size, decoder_seq_length, is_pair, framework
+ )
+ decoder_inputs = {
+ f"decoder_{name}": tensor for name, tensor in decoder_inputs.items()
+ }
+ common_inputs = dict(**encoder_inputs, **decoder_inputs)
+
+ if self.use_past:
+ if not is_torch_available():
+ raise ValueError(
+ "Cannot generate dummy past_keys inputs without PyTorch installed."
+ )
+ else:
+ import torch
+ batch, encoder_seq_length = common_inputs["input_ids"].shape
+ decoder_seq_length = common_inputs["decoder_input_ids"].shape[1]
+ (
+ num_encoder_attention_heads,
+ num_decoder_attention_heads,
+ ) = self.num_attention_heads
+ encoder_shape = (
+ batch,
+ num_encoder_attention_heads,
+ encoder_seq_length,
+ self._config.hidden_size // num_encoder_attention_heads,
+ )
+ decoder_past_length = decoder_seq_length + 3
+ decoder_shape = (
+ batch,
+ num_decoder_attention_heads,
+ decoder_past_length,
+ self._config.hidden_size // num_decoder_attention_heads,
+ )
+
+ common_inputs["decoder_attention_mask"] = torch.cat(
+ [
+ common_inputs["decoder_attention_mask"],
+ torch.ones(batch, decoder_past_length),
+ ],
+ dim=1,
+ )
+
+ common_inputs["past_key_values"] = []
+ # If the number of encoder and decoder layers are present in the model configuration, both are considered
+ num_encoder_layers, num_decoder_layers = self.num_layers
+ min_num_layers = min(num_encoder_layers, num_decoder_layers)
+ max_num_layers = (
+ max(num_encoder_layers, num_decoder_layers) - min_num_layers
+ )
+ remaining_side_name = (
+ "encoder" if num_encoder_layers > num_decoder_layers else "decoder"
+ )
+
+ for _ in range(min_num_layers):
+ common_inputs["past_key_values"].append(
+ (
+ torch.zeros(decoder_shape),
+ torch.zeros(decoder_shape),
+ torch.zeros(encoder_shape),
+ torch.zeros(encoder_shape),
+ )
+ )
+ # TODO: test this.
+ shape = encoder_shape if remaining_side_name == "encoder" else decoder_shape
+ for _ in range(min_num_layers, max_num_layers):
+ common_inputs["past_key_values"].append(
+ (torch.zeros(shape), torch.zeros(shape))
+ )
+ return common_inputs
+
+ generate_dummy_inputs = _generate_dummy_inputs_for_default_and_seq2seq_lm
diff --git a/IndicTrans2/huggingface_interface/convert_indictrans_checkpoint_to_pytorch.py b/IndicTrans2/huggingface_interface/convert_indictrans_checkpoint_to_pytorch.py
new file mode 100644
index 0000000000000000000000000000000000000000..b28839c7e1f5aed0298a6e1a1e0865e190275b91
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/convert_indictrans_checkpoint_to_pytorch.py
@@ -0,0 +1,107 @@
+# Copyright 2021 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+import torch
+import torch.nn as nn
+
+from configuration_indictrans import IndicTransConfig
+from modeling_indictrans import IndicTransForConditionalGeneration
+
+
+def remove_ignore_keys_(state_dict):
+ ignore_keys = [
+ "encoder.version",
+ "decoder.version",
+ "model.encoder.version",
+ "model.decoder.version",
+ "_float_tensor",
+ "encoder.embed_positions._float_tensor",
+ "decoder.embed_positions._float_tensor",
+ ]
+ for k in ignore_keys:
+ state_dict.pop(k, None)
+
+
+def make_linear_from_emb(emb):
+ vocab_size, emb_size = emb.shape
+ lin_layer = nn.Linear(vocab_size, emb_size, bias=False)
+ lin_layer.weight.data = emb.data
+ return lin_layer
+
+
+def convert_fairseq_IT2_checkpoint_from_disk(checkpoint_path):
+ model = torch.load(checkpoint_path, map_location="cpu")
+ args = model["args"] or model["cfg"]["model"]
+ state_dict = model["model"]
+ remove_ignore_keys_(state_dict)
+ encoder_vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
+ decoder_vocab_size = state_dict["decoder.embed_tokens.weight"].shape[0]
+
+ config = IndicTransConfig(
+ encoder_vocab_size=encoder_vocab_size,
+ decoder_vocab_size=decoder_vocab_size,
+ max_source_positions=args.max_source_positions,
+ max_target_positions=args.max_target_positions,
+ encoder_layers=args.encoder_layers,
+ decoder_layers=args.decoder_layers,
+ layernorm_embedding=args.layernorm_embedding,
+ encoder_normalize_before=args.encoder_normalize_before,
+ decoder_normalize_before=args.decoder_normalize_before,
+ encoder_attention_heads=args.encoder_attention_heads,
+ decoder_attention_heads=args.decoder_attention_heads,
+ encoder_ffn_dim=args.encoder_ffn_embed_dim,
+ decoder_ffn_dim=args.decoder_ffn_embed_dim,
+ encoder_embed_dim=args.encoder_embed_dim,
+ decoder_embed_dim=args.decoder_embed_dim,
+ encoder_layerdrop=args.encoder_layerdrop,
+ decoder_layerdrop=args.decoder_layerdrop,
+ dropout=args.dropout,
+ attention_dropout=args.attention_dropout,
+ activation_dropout=args.activation_dropout,
+ activation_function=args.activation_fn,
+ share_decoder_input_output_embed=args.share_decoder_input_output_embed,
+ scale_embedding=not args.no_scale_embedding,
+ )
+
+ model = IndicTransForConditionalGeneration(config)
+ model.model.load_state_dict(state_dict, strict=False)
+ if not args.share_decoder_input_output_embed:
+ model.lm_head = make_linear_from_emb(
+ state_dict["decoder.output_projection.weight"]
+ )
+ print(model)
+ return model
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ # Required parameters
+ parser.add_argument(
+ "--fairseq_path",
+ default="indic-en/model/checkpoint_best.pt",
+ type=str,
+ help="path to a model.pt on local filesystem.",
+ )
+ parser.add_argument(
+ "--pytorch_dump_folder_path",
+ default="indic-en/hf_model",
+ type=str,
+ help="Path to the output PyTorch model.",
+ )
+
+ args = parser.parse_args()
+ model = convert_fairseq_IT2_checkpoint_from_disk(args.fairseq_path)
+ model.save_pretrained(args.pytorch_dump_folder_path)
diff --git a/IndicTrans2/huggingface_interface/example.py b/IndicTrans2/huggingface_interface/example.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f9fd6ac60767584db13ae9b24c6d13631eaa30d
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/example.py
@@ -0,0 +1,275 @@
+import sys
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
+from transformers.utils import is_flash_attn_2_available, is_flash_attn_greater_or_equal_2_10
+from IndicTransToolkit import IndicProcessor
+from mosestokenizer import MosesSentenceSplitter
+from nltk import sent_tokenize
+from indicnlp.tokenize.sentence_tokenize import sentence_split, DELIM_PAT_NO_DANDA
+
+
+en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B" # ai4bharat/indictrans2-en-indic-dist-200M
+indic_en_ckpt_dir = "ai4bharat/indictrans2-indic-en-1B" # ai4bharat/indictrans2-indic-en-dist-200M
+indic_indic_ckpt_dir = (
+ "ai4bharat/indictrans2-indic-indic-dist-320M" # ai4bharat/indictrans2-indic-indic-dist-320M
+)
+BATCH_SIZE = 4
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+if len(sys.argv) > 1:
+ quantization = sys.argv[1]
+ attn_implementation = sys.argv[2]
+else:
+ quantization = ""
+ attn_implementation = "eager"
+
+
+# FLORES language code mapping to 2 letter ISO language code for compatibility
+# with Indic NLP Library (https://github.com/anoopkunchukuttan/indic_nlp_library)
+flores_codes = {
+ "asm_Beng": "as",
+ "awa_Deva": "hi",
+ "ben_Beng": "bn",
+ "bho_Deva": "hi",
+ "brx_Deva": "hi",
+ "doi_Deva": "hi",
+ "eng_Latn": "en",
+ "gom_Deva": "kK",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "hne_Deva": "hi",
+ "kan_Knda": "kn",
+ "kas_Arab": "ur",
+ "kas_Deva": "hi",
+ "kha_Latn": "en",
+ "lus_Latn": "en",
+ "mag_Deva": "hi",
+ "mai_Deva": "hi",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "bn",
+ "mni_Mtei": "hi",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "hi",
+ "sat_Olck": "or",
+ "snd_Arab": "ur",
+ "snd_Deva": "hi",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+}
+
+
+def split_sentences(input_text, lang):
+ if lang == "eng_Latn":
+ input_sentences = sent_tokenize(input_text)
+ with MosesSentenceSplitter(flores_codes[lang]) as splitter:
+ sents_moses = splitter([input_text])
+ sents_nltk = sent_tokenize(input_text)
+ if len(sents_nltk) < len(sents_moses):
+ input_sentences = sents_nltk
+ else:
+ input_sentences = sents_moses
+ input_sentences = [sent.replace("\xad", "") for sent in input_sentences]
+ else:
+ input_sentences = sentence_split(
+ input_text, lang=flores_codes[lang], delim_pat=DELIM_PAT_NO_DANDA
+ )
+ return input_sentences
+
+
+def initialize_model_and_tokenizer(ckpt_dir, quantization, attn_implementation):
+ if quantization == "4-bit":
+ qconfig = BitsAndBytesConfig(
+ load_in_4bit=True,
+ bnb_4bit_use_double_quant=True,
+ bnb_4bit_compute_dtype=torch.bfloat16,
+ )
+ elif quantization == "8-bit":
+ qconfig = BitsAndBytesConfig(
+ load_in_8bit=True,
+ bnb_8bit_use_double_quant=True,
+ bnb_8bit_compute_dtype=torch.bfloat16,
+ )
+ else:
+ qconfig = None
+
+ if attn_implementation == "flash_attention_2":
+ if is_flash_attn_2_available() and is_flash_attn_greater_or_equal_2_10():
+ attn_implementation = "flash_attention_2"
+ else:
+ attn_implementation = "eager"
+
+ tokenizer = AutoTokenizer.from_pretrained(ckpt_dir, trust_remote_code=True)
+ model = AutoModelForSeq2SeqLM.from_pretrained(
+ ckpt_dir,
+ trust_remote_code=True,
+ attn_implementation=attn_implementation,
+ low_cpu_mem_usage=True,
+ quantization_config=qconfig,
+ )
+
+ if qconfig == None:
+ model = model.to(DEVICE)
+ model.half()
+
+ model.eval()
+
+ return tokenizer, model
+
+
+def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip):
+ translations = []
+ for i in range(0, len(input_sentences), BATCH_SIZE):
+ batch = input_sentences[i : i + BATCH_SIZE]
+
+ # Preprocess the batch and extract entity mappings
+ batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)
+
+ # Tokenize the batch and generate input encodings
+ inputs = tokenizer(
+ batch,
+ truncation=True,
+ padding="longest",
+ return_tensors="pt",
+ return_attention_mask=True,
+ ).to(DEVICE)
+
+ # Generate translations using the model
+ with torch.no_grad():
+ generated_tokens = model.generate(
+ **inputs,
+ use_cache=True,
+ min_length=0,
+ max_length=256,
+ num_beams=5,
+ num_return_sequences=1,
+ )
+
+ # Decode the generated tokens into text
+ with tokenizer.as_target_tokenizer():
+ generated_tokens = tokenizer.batch_decode(
+ generated_tokens.detach().cpu().tolist(),
+ skip_special_tokens=True,
+ clean_up_tokenization_spaces=True,
+ )
+
+ # Postprocess the translations, including entity replacement
+ translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang)
+
+ del inputs
+ torch.cuda.empty_cache()
+
+ return translations
+
+
+def translate_paragraph(input_text, src_lang, tgt_lang, model, tokenizer, ip):
+ input_sentences = split_sentences(input_text, src_lang)
+ translated_text = batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip)
+ return " ".join(translated_text)
+
+
+ip = IndicProcessor(inference=True)
+
+en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(
+ en_indic_ckpt_dir, quantization, attn_implementation
+)
+
+indic_en_tokenizer, indic_en_model = initialize_model_and_tokenizer(
+ indic_en_ckpt_dir, quantization, attn_implementation
+)
+
+indic_indic_tokenizer, indic_indic_model = initialize_model_and_tokenizer(
+ indic_indic_ckpt_dir, quantization, attn_implementation
+)
+
+# ---------------------------------------------------------------------------
+# Hindi to English
+# ---------------------------------------------------------------------------
+hi_sents = [
+ "जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।",
+ "उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।",
+ "मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।",
+ "वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।",
+ "हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।",
+ "अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।",
+ "वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।",
+ "राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।",
+ "सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।",
+ "मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।",
+]
+src_lang, tgt_lang = "hin_Deva", "eng_Latn"
+en_translations = batch_translate(
+ hi_sents, src_lang, tgt_lang, indic_en_model, indic_en_tokenizer, ip
+)
+
+print(f"\n{src_lang} - {tgt_lang}")
+for input_sentence, translation in zip(hi_sents, en_translations):
+ print(f"{src_lang}: {input_sentence}")
+ print(f"{tgt_lang}: {translation}")
+
+
+# ---------------------------------------------------------------------------
+# English to Hindi
+# ---------------------------------------------------------------------------
+en_sents = [
+ "When I was young, I used to go to the park every day.",
+ "He has many old books, which he inherited from his ancestors.",
+ "I can't figure out how to solve my problem.",
+ "She is very hardworking and intelligent, which is why she got all the good marks.",
+ "We watched a new movie last week, which was very inspiring.",
+ "If you had met me at that time, we would have gone out to eat.",
+ "She went to the market with her sister to buy a new sari.",
+ "Raj told me that he is going to his grandmother's house next month.",
+ "All the kids were having fun at the party and were eating lots of sweets.",
+ "My friend has invited me to his birthday party, and I will give him a gift.",
+]
+src_lang, tgt_lang = "eng_Latn", "hin_Deva"
+hi_translations = batch_translate(
+ en_sents, src_lang, tgt_lang, en_indic_model, en_indic_tokenizer, ip
+)
+
+print(f"\n{src_lang} - {tgt_lang}")
+for input_sentence, translation in zip(en_sents, hi_translations):
+ print(f"{src_lang}: {input_sentence}")
+ print(f"{tgt_lang}: {translation}")
+
+
+# ---------------------------------------------------------------------------
+# Hindi to Marathi
+# ---------------------------------------------------------------------------
+hi_sents = [
+ "जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।",
+ "उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।",
+ "मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।",
+ "वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।",
+ "हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।",
+ "अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।",
+ "वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।",
+ "राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।",
+ "सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।",
+ "मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।",
+]
+src_lang, tgt_lang = "hin_Deva", "mar_Deva"
+mr_translations = batch_translate(
+ hi_sents, src_lang, tgt_lang, indic_indic_model, indic_indic_tokenizer, ip
+)
+
+print(f"\n{src_lang} - {tgt_lang}")
+for input_sentence, translation in zip(hi_sents, mr_translations):
+ print(f"{src_lang}: {input_sentence}")
+ print(f"{tgt_lang}: {translation}")
+
+
+# ---------------------------------------------------------------------------
+# Paragraph translation
+# ---------------------------------------------------------------------------
+src_lang, tgt_lang = "hin_Deva", "eng_Latn"
+hi_text = "यहाँ एक पाराग्राफ है जो हिंदी में लिखा गया है। हिंदी एक सुंदर भाषा है और भारत की राष्ट्रीय भाषा है। इसका विकास विभिन्न कालों में हुआ है और यह विशेषतः भारतीय उपमहाद्वीप में बोली जाती है। हिंदी भाषा का साहित्य, संस्कृति और इतिहास भी बहुत गर्वनीय है।"
+en_translated_text = translate_paragraph(
+ hi_text, src_lang, tgt_lang, indic_en_model, indic_en_tokenizer, ip
+)
+print(f"{src_lang}: {hi_text}")
+print(f"{tgt_lang}: {en_translated_text}")
diff --git a/IndicTrans2/huggingface_interface/install.sh b/IndicTrans2/huggingface_interface/install.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e74216fa7f70f65f707e832cbc9a45c9abae8f89
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/install.sh
@@ -0,0 +1,49 @@
+#/bin/bash
+
+root_dir=$(pwd)
+echo "Setting up the environment in the $root_dir"
+
+# --------------------------------------------------------------
+# create and activate the virtual environment
+# --------------------------------------------------------------
+echo "Creating a virtual environment with python3"
+conda create -n itv2_hf python=3.9 -y
+conda activate itv2_hf
+
+echo "Installing all the dependencies"
+conda install pip
+python3 -m pip install --upgrade pip
+
+
+# --------------------------------------------------------------
+# PyTorch Installation
+# --------------------------------------------------------------
+python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu118
+
+
+# --------------------------------------------------------------
+# Install additional utility packages
+# --------------------------------------------------------------
+python3 -m pip install nltk sacremoses pandas regex mock transformers>=4.33.2 mosestokenizer
+python3 -c "import nltk; nltk.download('punkt')"
+python3 -m pip install bitsandbytes scipy accelerate datasets flash-attn>=2.1
+
+
+# --------------------------------------------------------------
+# Sentencepiece for tokenization
+# --------------------------------------------------------------
+# build the cpp binaries from the source repo in order to use the command line utility
+# source repo: https://github.com/google/sentencepiece
+python3 -m pip install sentencepiece
+
+
+# -----------------------------------------------------------------
+# Install IndicTrans2 tokenizer and its dependencies
+# -----------------------------------------------------------------
+git clone https://github.com/VarunGumma/IndicTransToolkit
+cd IndicTransToolkit
+python3 -m pip install --editable ./
+cd $root_dir
+
+
+echo "Setup completed!"
diff --git a/IndicTrans2/huggingface_interface/modeling_indictrans.py b/IndicTrans2/huggingface_interface/modeling_indictrans.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8f1f2457bceaf9f31aef0a856557fe71d912158
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/modeling_indictrans.py
@@ -0,0 +1,1801 @@
+# coding=utf-8
+# Copyright 2023 The IndicTrans2 Authors and AI4Bharat team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch IndicTrans model."""
+
+
+import math
+from typing import List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from transformers.activations import ACT2FN
+
+from transformers.modeling_attn_mask_utils import (
+ _prepare_4d_attention_mask,
+ _prepare_4d_attention_mask_for_sdpa,
+ _prepare_4d_causal_attention_mask,
+ _prepare_4d_causal_attention_mask_for_sdpa,
+)
+
+from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
+from transformers.modeling_outputs import (
+ BaseModelOutput,
+ BaseModelOutputWithPastAndCrossAttentions,
+ Seq2SeqLMOutput,
+ Seq2SeqModelOutput
+)
+
+from transformers.utils import (
+ logging,
+ is_flash_attn_2_available,
+ is_flash_attn_greater_or_equal_2_10,
+)
+
+from transformers.modeling_utils import PreTrainedModel
+
+from .configuration_indictrans import IndicTransConfig
+
+
+logger = logging.get_logger(__name__)
+
+INDICTRANS_PRETRAINED_MODEL_ARCHIVE_LIST = [""]
+
+try:
+ if is_flash_attn_2_available():
+ from flash_attn import flash_attn_func, flash_attn_varlen_func
+ from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
+except:
+ pass
+
+
+# Copied from transformers.models.llama.modeling_llama._get_unpad_data
+def _get_unpad_data(attention_mask):
+ seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
+ indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+ max_seqlen_in_batch = seqlens_in_batch.max().item()
+ cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0))
+ return (
+ indices,
+ cu_seqlens,
+ max_seqlen_in_batch,
+ )
+
+
+# Copied from transformers.models.bart.modeling_bart.shift_tokens_right
+def shift_tokens_right(
+ input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int
+):
+ """
+ Shift input ids one token to the right.
+ """
+ shifted_input_ids = input_ids.new_zeros(input_ids.shape)
+ shifted_input_ids[:, 1:] = input_ids[:, :-1].clone()
+ shifted_input_ids[:, 0] = decoder_start_token_id
+
+ if pad_token_id is None:
+ raise ValueError("self.model.config.pad_token_id has to be defined.")
+ # replace possible -100 values in labels by `pad_token_id`
+ shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id)
+
+ return shifted_input_ids
+
+
+def create_position_ids_from_input_ids(
+ input_ids, padding_idx, past_key_values_length=0
+):
+ """
+ Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
+ are ignored. This is modified from fairseq's `utils.make_positions`.
+ """
+ # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
+ mask = input_ids.ne(padding_idx).int()
+ incremental_indices = (
+ torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length
+ ) * mask
+ return incremental_indices.long() + padding_idx
+
+
+# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100SinusoidalPositionalEmbedding->IndicTrans
+class IndicTransSinusoidalPositionalEmbedding(nn.Module):
+ """This module produces sinusoidal positional embeddings of any length."""
+
+ def __init__(
+ self, num_positions: int, embedding_dim: int, padding_idx: Optional[int] = None
+ ):
+ super().__init__()
+ self.offset = 2
+ self.embedding_dim = embedding_dim
+ self.padding_idx = padding_idx
+ self.make_weights(num_positions + self.offset, embedding_dim, padding_idx)
+
+ def make_weights(
+ self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None
+ ):
+ emb_weights = self.get_embedding(num_embeddings, embedding_dim, padding_idx)
+ if hasattr(self, "weights"):
+ # in forward put the weights on the correct dtype and device of the param
+ emb_weights = emb_weights.to(
+ dtype=self.weights.dtype, device=self.weights.device
+ )
+
+ self.register_buffer("weights", emb_weights, persistent=False)
+
+ @staticmethod
+ def get_embedding(
+ num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None
+ ):
+ """
+ Build sinusoidal embeddings.
+
+ This matches the implementation in tensor2tensor, but differs slightly from the description in Section 3.5 of
+ "Attention Is All You Need".
+ """
+ half_dim = embedding_dim // 2
+ emb = math.log(10000) / (half_dim - 1)
+ emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
+ emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(
+ 1
+ ) * emb.unsqueeze(0)
+ emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(
+ num_embeddings, -1
+ )
+ if embedding_dim % 2 == 1:
+ # zero pad
+ emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
+ if padding_idx is not None:
+ emb[padding_idx, :] = 0
+
+ return emb.to(torch.get_default_dtype())
+
+ @torch.no_grad()
+ def forward(
+ self,
+ input_ids: torch.Tensor = None,
+ inputs_embeds: torch.Tensor = None,
+ past_key_values_length: int = 0,
+ ):
+ if input_ids is not None:
+ bsz, seq_len = input_ids.size()
+ # Create the position ids from the input token ids. Any padded tokens remain padded.
+ position_ids = create_position_ids_from_input_ids(
+ input_ids, self.padding_idx, past_key_values_length
+ ).to(input_ids.device)
+ else:
+ bsz, seq_len = inputs_embeds.size()[:-1]
+ position_ids = self.create_position_ids_from_inputs_embeds(
+ inputs_embeds, past_key_values_length
+ )
+
+ # expand embeddings if needed
+ max_pos = self.padding_idx + 1 + seq_len + past_key_values_length
+ if max_pos > self.weights.size(0):
+ self.make_weights(
+ max_pos + self.offset, self.embedding_dim, self.padding_idx
+ )
+
+ return (
+ self.weights.index_select(0, position_ids.view(-1))
+ .view(bsz, seq_len, self.weights.shape[-1])
+ .detach()
+ )
+
+ def create_position_ids_from_inputs_embeds(
+ self, inputs_embeds, past_key_values_length
+ ):
+ """
+ We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.
+
+ Args:
+ inputs_embeds: torch.Tensor
+
+ Returns: torch.Tensor
+ """
+ input_shape = inputs_embeds.size()[:-1]
+ sequence_length = input_shape[1]
+
+ position_ids = torch.arange(
+ self.padding_idx + 1,
+ sequence_length + self.padding_idx + 1,
+ dtype=torch.long,
+ device=inputs_embeds.device,
+ )
+ return (
+ position_ids.unsqueeze(0).expand(input_shape).contiguous()
+ + past_key_values_length
+ )
+
+
+# Copied from transformers.models.bart.modeling_bart.BartAttention with Bart->IndicTrans
+class IndicTransAttention(nn.Module):
+ """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+ def __init__(
+ self,
+ embed_dim: int,
+ num_heads: int,
+ dropout: float = 0.0,
+ is_decoder: bool = False,
+ bias: bool = True,
+ is_causal: bool = False,
+ config: Optional[IndicTransConfig] = None,
+ ):
+ super().__init__()
+ self.embed_dim = embed_dim
+ self.num_heads = num_heads
+ self.dropout = dropout
+ self.head_dim = embed_dim // num_heads
+ self.config = config
+
+ if (self.head_dim * num_heads) != self.embed_dim:
+ raise ValueError(
+ f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim}"
+ f" and `num_heads`: {num_heads})."
+ )
+ self.scaling = self.head_dim**-0.5
+ self.is_decoder = is_decoder
+ self.is_causal = is_causal
+
+ self.k_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+ self.v_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+ self.q_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+ self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+
+ def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+ return (
+ tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
+ .transpose(1, 2)
+ .contiguous()
+ )
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ key_value_states: Optional[torch.Tensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ layer_head_mask: Optional[torch.Tensor] = None,
+ output_attentions: bool = False,
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ """Input shape: Batch x Time x Channel"""
+
+ # if key_value_states are provided this layer is used as a cross-attention layer
+ # for the decoder
+ is_cross_attention = key_value_states is not None
+
+ bsz, tgt_len, _ = hidden_states.size()
+
+ # get query proj
+ query_states = self.q_proj(hidden_states) * self.scaling
+ # get key, value proj
+ # `past_key_value[0].shape[2] == key_value_states.shape[1]`
+ # is checking that the `sequence_length` of the `past_key_value` is the same as
+ # the provided `key_value_states` to support prefix tuning
+ if (
+ is_cross_attention
+ and past_key_value is not None
+ and past_key_value[0].shape[2] == key_value_states.shape[1]
+ ):
+ # reuse k,v, cross_attentions
+ key_states = past_key_value[0]
+ value_states = past_key_value[1]
+ elif is_cross_attention:
+ # cross_attentions
+ key_states = self._shape(self.k_proj(key_value_states), -1, bsz)
+ value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
+ elif past_key_value is not None:
+ # reuse k, v, self_attention
+ key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+ value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+ key_states = torch.cat([past_key_value[0], key_states], dim=2)
+ value_states = torch.cat([past_key_value[1], value_states], dim=2)
+ else:
+ # self_attention
+ key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+ value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+
+ if self.is_decoder:
+ # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
+ # Further calls to cross_attention layer can then reuse all cross-attention
+ # key/value_states (first "if" case)
+ # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
+ # all previous decoder key/value_states. Further calls to uni-directional self-attention
+ # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
+ # if encoder bi-directional self-attention `past_key_value` is always `None`
+ past_key_value = (key_states, value_states)
+
+ proj_shape = (bsz * self.num_heads, -1, self.head_dim)
+ query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
+ key_states = key_states.reshape(*proj_shape)
+ value_states = value_states.reshape(*proj_shape)
+
+ src_len = key_states.size(1)
+ attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
+
+ if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
+ raise ValueError(
+ f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
+ f" {attn_weights.size()}"
+ )
+
+ if attention_mask is not None:
+ if attention_mask.size() != (bsz, 1, tgt_len, src_len):
+ raise ValueError(
+ f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
+ )
+ attn_weights = (
+ attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+ + attention_mask
+ )
+ attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+ attn_weights = F.softmax(attn_weights, dim=-1)
+
+ if layer_head_mask is not None:
+ if layer_head_mask.size() != (self.num_heads,):
+ raise ValueError(
+ f"Head mask for a single layer should be of size {(self.num_heads,)}, but is"
+ f" {layer_head_mask.size()}"
+ )
+ attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(
+ bsz, self.num_heads, tgt_len, src_len
+ )
+ attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+ if output_attentions:
+ # this operation is a bit awkward, but it's required to
+ # make sure that attn_weights keeps its gradient.
+ # In order to do so, attn_weights have to be reshaped
+ # twice and have to be reused in the following
+ attn_weights_reshaped = attn_weights.view(
+ bsz, self.num_heads, tgt_len, src_len
+ )
+ attn_weights = attn_weights_reshaped.view(
+ bsz * self.num_heads, tgt_len, src_len
+ )
+ else:
+ attn_weights_reshaped = None
+
+ attn_probs = F.dropout(attn_weights, p=self.dropout, training=self.training)
+
+ attn_output = torch.bmm(attn_probs, value_states)
+
+ if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz * self.num_heads, tgt_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
+ attn_output = attn_output.transpose(1, 2)
+
+ # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
+ # partitioned across GPUs when using tensor-parallelism.
+ attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+
+ attn_output = self.out_proj(attn_output)
+
+ return attn_output, attn_weights_reshaped, past_key_value
+
+
+class IndicTransFlashAttention2(IndicTransAttention):
+ """
+ IndicTrans flash attention module. This module inherits from `IndicTransAttention` as the weights of the module stays
+ untouched. The only required change would be on the forward pass where it needs to correctly call the public API of
+ flash attention and deal with padding tokens in case the input contains any of them.
+ """
+
+ # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2.__init__
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
+ # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
+ # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
+ self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
+
+ def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+ return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ key_value_states: Optional[torch.Tensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ layer_head_mask: Optional[torch.Tensor] = None,
+ output_attentions: bool = False,
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ # IndicTransFlashAttention2 attention does not support output_attentions
+ if output_attentions:
+ raise ValueError("IndicTransFlashAttention2 attention does not support output_attentions")
+
+ # if key_value_states are provided this layer is used as a cross-attention layer
+ # for the decoder
+ is_cross_attention = key_value_states is not None
+
+ bsz, q_len, _ = hidden_states.size()
+
+ # get query proj
+ query_states = self._reshape(self.q_proj(hidden_states), -1, bsz)
+ # get key, value proj
+ # `past_key_value[0].shape[2] == key_value_states.shape[1]`
+ # is checking that the `sequence_length` of the `past_key_value` is the same as
+ # the provided `key_value_states` to support prefix tuning
+ if (
+ is_cross_attention
+ and past_key_value is not None
+ and past_key_value[0].shape[2] == key_value_states.shape[1]
+ ):
+ # reuse k,v, cross_attentions
+ key_states = past_key_value[0].transpose(1, 2)
+ value_states = past_key_value[1].transpose(1, 2)
+ elif is_cross_attention:
+ # cross_attentions
+ key_states = self._reshape(self.k_proj(key_value_states), -1, bsz)
+ value_states = self._reshape(self.v_proj(key_value_states), -1, bsz)
+ elif past_key_value is not None:
+ # reuse k, v, self_attention
+ key_states = self._reshape(self.k_proj(hidden_states), -1, bsz)
+ value_states = self._reshape(self.v_proj(hidden_states), -1, bsz)
+ key_states = torch.cat([past_key_value[0].transpose(1, 2), key_states], dim=1)
+ value_states = torch.cat([past_key_value[1].transpose(1, 2), value_states], dim=1)
+ else:
+ # self_attention
+ key_states = self._reshape(self.k_proj(hidden_states), -1, bsz)
+ value_states = self._reshape(self.v_proj(hidden_states), -1, bsz)
+
+ if self.is_decoder:
+ # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
+ # Further calls to cross_attention layer can then reuse all cross-attention
+ # key/value_states (first "if" case)
+ # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
+ # all previous decoder key/value_states. Further calls to uni-directional self-attention
+ # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
+ # if encoder bi-directional self-attention `past_key_value` is always `None`
+ past_key_value = (key_states.transpose(1, 2), value_states.transpose(1, 2))
+
+ kv_seq_len = key_states.shape[-2]
+ if past_key_value is not None:
+ kv_seq_len += past_key_value[0].shape[-2]
+
+ # In PEFT, usually we cast the layer norms in float32 for training stability reasons
+ # therefore the input hidden states gets silently casted in float32. Hence, we need
+ # cast them back in the correct dtype just to be sure everything works as expected.
+ # This might slowdown training & inference so it is recommended to not cast the LayerNorms
+ # in fp32. (LlamaRMSNorm handles it correctly)
+
+ input_dtype = query_states.dtype
+ if input_dtype == torch.float32:
+ if torch.is_autocast_enabled():
+ target_dtype = torch.get_autocast_gpu_dtype()
+ # Handle the case where the model is quantized
+ elif hasattr(self.config, "_pre_quantization_dtype"):
+ target_dtype = self.config._pre_quantization_dtype
+ else:
+ target_dtype = self.q_proj.weight.dtype
+
+ logger.warning_once(
+ f"The input hidden states seems to be silently casted in float32, this might be related to"
+ f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in"
+ f" {target_dtype}."
+ )
+
+ query_states = query_states.to(target_dtype)
+ key_states = key_states.to(target_dtype)
+ value_states = value_states.to(target_dtype)
+
+ attn_output = self._flash_attention_forward(
+ query_states, key_states, value_states, attention_mask, q_len, dropout=self.dropout
+ )
+
+ attn_output = attn_output.reshape(bsz, q_len, -1)
+ attn_output = self.out_proj(attn_output)
+
+ if not output_attentions:
+ attn_weights = None
+
+ return attn_output, attn_weights, past_key_value
+
+ # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._flash_attention_forward
+ def _flash_attention_forward(
+ self, query_states, key_states, value_states, attention_mask, query_length, dropout=0.0, softmax_scale=None
+ ):
+ """
+ Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
+ first unpad the input, then computes the attention scores and pad the final attention scores.
+
+ Args:
+ query_states (`torch.Tensor`):
+ Input query states to be passed to Flash Attention API
+ key_states (`torch.Tensor`):
+ Input key states to be passed to Flash Attention API
+ value_states (`torch.Tensor`):
+ Input value states to be passed to Flash Attention API
+ attention_mask (`torch.Tensor`):
+ The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the
+ position of padding tokens and 1 for the position of non-padding tokens.
+ dropout (`float`):
+ Attention dropout
+ softmax_scale (`float`, *optional*):
+ The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim)
+ """
+ if not self._flash_attn_uses_top_left_mask:
+ causal = self.is_causal
+ else:
+ # TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in LlamaFlashAttention2 __init__.
+ causal = self.is_causal and query_length != 1
+
+ # Contains at least one padding token in the sequence
+ if attention_mask is not None:
+ batch_size = query_states.shape[0]
+ query_states, key_states, value_states, indices_q, cu_seq_lens, max_seq_lens = self._upad_input(
+ query_states, key_states, value_states, attention_mask, query_length
+ )
+
+ cu_seqlens_q, cu_seqlens_k = cu_seq_lens
+ max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens
+
+ attn_output_unpad = flash_attn_varlen_func(
+ query_states,
+ key_states,
+ value_states,
+ cu_seqlens_q=cu_seqlens_q,
+ cu_seqlens_k=cu_seqlens_k,
+ max_seqlen_q=max_seqlen_in_batch_q,
+ max_seqlen_k=max_seqlen_in_batch_k,
+ dropout_p=dropout,
+ softmax_scale=softmax_scale,
+ causal=causal,
+ )
+
+ attn_output = pad_input(attn_output_unpad, indices_q, batch_size, query_length)
+ else:
+ attn_output = flash_attn_func(
+ query_states, key_states, value_states, dropout, softmax_scale=softmax_scale, causal=causal
+ )
+
+ return attn_output
+
+ # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._upad_input
+ def _upad_input(self, query_layer, key_layer, value_layer, attention_mask, query_length):
+ indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask)
+ batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
+
+ key_layer = index_first_axis(
+ key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
+ )
+ value_layer = index_first_axis(
+ value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
+ )
+ if query_length == kv_seq_len:
+ query_layer = index_first_axis(
+ query_layer.reshape(batch_size * kv_seq_len, self.num_heads, head_dim), indices_k
+ )
+ cu_seqlens_q = cu_seqlens_k
+ max_seqlen_in_batch_q = max_seqlen_in_batch_k
+ indices_q = indices_k
+ elif query_length == 1:
+ max_seqlen_in_batch_q = 1
+ cu_seqlens_q = torch.arange(
+ batch_size + 1, dtype=torch.int32, device=query_layer.device
+ ) # There is a memcpy here, that is very bad.
+ indices_q = cu_seqlens_q[:-1]
+ query_layer = query_layer.squeeze(1)
+ else:
+ # The -q_len: slice assumes left padding.
+ attention_mask = attention_mask[:, -query_length:]
+ query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(query_layer, attention_mask)
+
+ return (
+ query_layer,
+ key_layer,
+ value_layer,
+ indices_q,
+ (cu_seqlens_q, cu_seqlens_k),
+ (max_seqlen_in_batch_q, max_seqlen_in_batch_k),
+ )
+
+
+class IndicTransSdpaAttention(IndicTransAttention):
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ key_value_states: Optional[torch.Tensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ layer_head_mask: Optional[torch.Tensor] = None,
+ output_attentions: bool = False,
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ """Input shape: Batch x Time x Channel"""
+ if output_attentions or layer_head_mask is not None:
+ # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented.
+ logger.warning_once(
+ "IndicTransModel is using IndicTransSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `layer_head_mask` not None. Falling back to the manual attention"
+ ' implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
+ )
+ return super().forward(
+ hidden_states,
+ key_value_states=key_value_states,
+ past_key_value=past_key_value,
+ attention_mask=attention_mask,
+ layer_head_mask=layer_head_mask,
+ output_attentions=output_attentions,
+ )
+
+ # if key_value_states are provided this layer is used as a cross-attention layer
+ # for the decoder
+ is_cross_attention = key_value_states is not None
+
+ bsz, tgt_len, _ = hidden_states.size()
+
+ # get query proj
+ query_states = self.q_proj(hidden_states)
+ # get key, value proj
+ # `past_key_value[0].shape[2] == key_value_states.shape[1]`
+ # is checking that the `sequence_length` of the `past_key_value` is the same as
+ # the provided `key_value_states` to support prefix tuning
+ if (
+ is_cross_attention
+ and past_key_value is not None
+ and past_key_value[0].shape[2] == key_value_states.shape[1]
+ ):
+ # reuse k,v, cross_attentions
+ key_states = past_key_value[0]
+ value_states = past_key_value[1]
+ elif is_cross_attention:
+ # cross_attentions
+ key_states = self._shape(self.k_proj(key_value_states), -1, bsz)
+ value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
+ elif past_key_value is not None:
+ # reuse k, v, self_attention
+ key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+ value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+ key_states = torch.cat([past_key_value[0], key_states], dim=2)
+ value_states = torch.cat([past_key_value[1], value_states], dim=2)
+ else:
+ # self_attention
+ key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+ value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+
+ if self.is_decoder:
+ # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
+ # Further calls to cross_attention layer can then reuse all cross-attention
+ # key/value_states (first "if" case)
+ # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
+ # all previous decoder key/value_states. Further calls to uni-directional self-attention
+ # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
+ # if encoder bi-directional self-attention `past_key_value` is always `None`
+ past_key_value = (key_states, value_states)
+
+ query_states = self._shape(query_states, tgt_len, bsz)
+
+ # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask,
+ # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577
+ attn_output = F.scaled_dot_product_attention(
+ query_states,
+ key_states,
+ value_states,
+ attn_mask=attention_mask,
+ dropout_p=self.dropout if self.training else 0.0,
+ # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1.
+ is_causal=self.is_causal and attention_mask is None and tgt_len > 1,
+ )
+
+ if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ attn_output = attn_output.transpose(1, 2)
+
+ # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
+ # partitioned across GPUs when using tensor-parallelism.
+ attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+
+ attn_output = self.out_proj(attn_output)
+
+ return attn_output, None, past_key_value
+
+
+INDICTRANS_ATTENTION_CLASSES = {
+ "eager": IndicTransAttention,
+ "sdpa": IndicTransSdpaAttention,
+ "flash_attention_2": IndicTransFlashAttention2,
+}
+
+# Copied from transformers.models.mbart.modeling_mbart.MBartEncoderLayer with MBart->IndicTrans
+class IndicTransEncoderLayer(nn.Module):
+ def __init__(self, config: IndicTransConfig):
+ super().__init__()
+ self.embed_dim = config.encoder_embed_dim
+ self.self_attn = INDICTRANS_ATTENTION_CLASSES[config._attn_implementation](
+ embed_dim=self.embed_dim,
+ num_heads=config.encoder_attention_heads,
+ dropout=config.attention_dropout,
+ config=config,
+ )
+ self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
+ self.dropout = config.dropout
+ self.activation_fn = ACT2FN[config.activation_function]
+ self.activation_dropout = config.activation_dropout
+ self.fc1 = nn.Linear(self.embed_dim, config.encoder_ffn_dim)
+ self.fc2 = nn.Linear(config.encoder_ffn_dim, self.embed_dim)
+ self.final_layer_norm = nn.LayerNorm(self.embed_dim)
+ self.normalize_before = config.encoder_normalize_before
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: torch.Tensor,
+ layer_head_mask: torch.Tensor,
+ output_attentions: bool = False,
+ ) -> torch.Tensor:
+ """
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`): attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
+ `(encoder_attention_heads,)`.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ """
+ residual = hidden_states
+ if self.normalize_before:
+ hidden_states = self.self_attn_layer_norm(hidden_states)
+ hidden_states, attn_weights, _ = self.self_attn(
+ hidden_states=hidden_states,
+ attention_mask=attention_mask,
+ layer_head_mask=layer_head_mask,
+ output_attentions=output_attentions,
+ )
+ hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
+ hidden_states = residual + hidden_states
+ if not self.normalize_before:
+ hidden_states = self.self_attn_layer_norm(hidden_states)
+
+ residual = hidden_states
+ if self.normalize_before:
+ hidden_states = self.final_layer_norm(hidden_states)
+ hidden_states = self.activation_fn(self.fc1(hidden_states))
+ hidden_states = F.dropout(
+ hidden_states, p=self.activation_dropout, training=self.training
+ )
+ hidden_states = self.fc2(hidden_states)
+ hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
+ hidden_states = residual + hidden_states
+ if not self.normalize_before:
+ hidden_states = self.final_layer_norm(hidden_states)
+
+ if hidden_states.dtype == torch.float16 and (
+ torch.isinf(hidden_states).any() or torch.isnan(hidden_states).any()
+ ):
+ clamp_value = torch.finfo(hidden_states.dtype).max - 1000
+ hidden_states = torch.clamp(
+ hidden_states, min=-clamp_value, max=clamp_value
+ )
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (attn_weights,)
+
+ return outputs
+
+
+# Copied from transformers.models.mbart.modeling_mbart.MBartDecoderLayer with MBart->IndicTrans
+class IndicTransDecoderLayer(nn.Module):
+ def __init__(self, config: IndicTransConfig):
+ super().__init__()
+ self.embed_dim = config.decoder_embed_dim
+
+ self.self_attn = INDICTRANS_ATTENTION_CLASSES[config._attn_implementation](
+ embed_dim=self.embed_dim,
+ num_heads=config.decoder_attention_heads,
+ dropout=config.attention_dropout,
+ is_decoder=True,
+ is_causal=True,
+ config=config,
+ )
+ self.dropout = config.dropout
+ self.activation_fn = ACT2FN[config.activation_function]
+ self.activation_dropout = config.activation_dropout
+
+ self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
+ self.encoder_attn = INDICTRANS_ATTENTION_CLASSES[config._attn_implementation](
+ self.embed_dim,
+ config.decoder_attention_heads,
+ dropout=config.attention_dropout,
+ is_decoder=True,
+ config=config,
+ )
+ self.encoder_attn_layer_norm = nn.LayerNorm(self.embed_dim)
+ self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim)
+ self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim)
+ self.final_layer_norm = nn.LayerNorm(self.embed_dim)
+ self.normalize_before = config.decoder_normalize_before
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ encoder_hidden_states: Optional[torch.Tensor] = None,
+ encoder_attention_mask: Optional[torch.Tensor] = None,
+ layer_head_mask: Optional[torch.Tensor] = None,
+ cross_attn_layer_head_mask: Optional[torch.Tensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False,
+ use_cache: Optional[bool] = True,
+ ) -> torch.Tensor:
+ """
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`): attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ encoder_hidden_states (`torch.FloatTensor`):
+ cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
+ encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
+ `(encoder_attention_heads,)`.
+ cross_attn_layer_head_mask (`torch.FloatTensor`): mask for cross-attention heads in a given layer of
+ size `(decoder_attention_heads,)`.
+ past_key_value (`Tuple(torch.FloatTensor)`): cached past key and value projection states
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ """
+ residual = hidden_states
+ if self.normalize_before:
+ hidden_states = self.self_attn_layer_norm(hidden_states)
+
+ # Self Attention
+ # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
+ self_attn_past_key_value = (
+ past_key_value[:2] if past_key_value is not None else None
+ )
+ # add present self-attn cache to positions 1,2 of present_key_value tuple
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
+ hidden_states=hidden_states,
+ past_key_value=self_attn_past_key_value,
+ attention_mask=attention_mask,
+ layer_head_mask=layer_head_mask,
+ output_attentions=output_attentions,
+ )
+ hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
+ hidden_states = residual + hidden_states
+ if not self.normalize_before:
+ hidden_states = self.self_attn_layer_norm(hidden_states)
+
+ # Cross-Attention Block
+ cross_attn_present_key_value = None
+ cross_attn_weights = None
+ if encoder_hidden_states is not None:
+ residual = hidden_states
+ if self.normalize_before:
+ hidden_states = self.encoder_attn_layer_norm(hidden_states)
+
+ # cross_attn cached key/values tuple is at positions 3,4 of present_key_value tuple
+ cross_attn_past_key_value = (
+ past_key_value[-2:] if past_key_value is not None else None
+ )
+ (
+ hidden_states,
+ cross_attn_weights,
+ cross_attn_present_key_value,
+ ) = self.encoder_attn(
+ hidden_states=hidden_states,
+ key_value_states=encoder_hidden_states,
+ attention_mask=encoder_attention_mask,
+ layer_head_mask=cross_attn_layer_head_mask,
+ past_key_value=cross_attn_past_key_value,
+ output_attentions=output_attentions,
+ )
+ hidden_states = F.dropout(
+ hidden_states, p=self.dropout, training=self.training
+ )
+ hidden_states = residual + hidden_states
+ if not self.normalize_before:
+ hidden_states = self.encoder_attn_layer_norm(hidden_states)
+
+ # add cross-attn to positions 3,4 of present_key_value tuple
+ present_key_value = present_key_value + cross_attn_present_key_value
+
+ # Fully Connected
+ residual = hidden_states
+ if self.normalize_before:
+ hidden_states = self.final_layer_norm(hidden_states)
+ hidden_states = self.activation_fn(self.fc1(hidden_states))
+ hidden_states = F.dropout(
+ hidden_states, p=self.activation_dropout, training=self.training
+ )
+ hidden_states = self.fc2(hidden_states)
+ hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
+ hidden_states = residual + hidden_states
+ if not self.normalize_before:
+ hidden_states = self.final_layer_norm(hidden_states)
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (self_attn_weights, cross_attn_weights)
+
+ if use_cache:
+ outputs += (present_key_value,)
+
+ return outputs
+
+
+# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100PretrainedModel->IndicTrans
+class IndicTransPreTrainedModel(PreTrainedModel):
+ config_class = IndicTransConfig
+ base_model_prefix = "model"
+ supports_gradient_checkpointing = True
+ _no_split_modules = ["IndicTransAttention"]
+
+ def _init_weights(self, module):
+ std = self.config.init_std
+ if isinstance(module, nn.Linear):
+ module.weight.data.normal_(mean=0.0, std=std)
+ if module.bias is not None:
+ module.bias.data.zero_()
+ elif isinstance(module, nn.Embedding):
+ module.weight.data.normal_(mean=0.0, std=std)
+ if module.padding_idx is not None:
+ module.weight.data[module.padding_idx].zero_()
+
+ def _set_gradient_checkpointing(self, module, value=False):
+ if isinstance(module, (IndicTransDecoder, IndicTransEncoder)):
+ module.gradient_checkpointing = value
+
+
+# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100EncoderLayer->IndicTrans
+class IndicTransEncoder(IndicTransPreTrainedModel):
+ """
+ Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
+ [`IndicTransEncoderLayer`].
+
+ Args:
+ config: IndicTransConfig
+ embed_tokens (nn.Embedding): output embedding
+ """
+
+ def __init__(
+ self, config: IndicTransConfig, embed_tokens: Optional[nn.Embedding] = None
+ ):
+ super().__init__(config)
+
+ self.dropout = config.dropout
+ self.layerdrop = config.encoder_layerdrop
+
+ embed_dim = config.encoder_embed_dim
+ self.padding_idx = config.pad_token_id
+ self.max_source_positions = config.max_source_positions
+ self.embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0
+
+ self.embed_tokens = nn.Embedding(
+ config.encoder_vocab_size, embed_dim, self.padding_idx
+ )
+
+ if embed_tokens is not None:
+ self.embed_tokens.weight = embed_tokens.weight
+
+ self.embed_positions = IndicTransSinusoidalPositionalEmbedding(
+ config.max_source_positions,
+ embed_dim,
+ self.padding_idx,
+ )
+ self.layers = nn.ModuleList(
+ [IndicTransEncoderLayer(config) for _ in range(config.encoder_layers)]
+ )
+ self.layer_norm = (
+ nn.LayerNorm(embed_dim) if config.encoder_normalize_before else None
+ )
+ self.layernorm_embedding = (
+ nn.LayerNorm(embed_dim) if config.layernorm_embedding else None
+ )
+
+ self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
+ self._use_sdpa = config._attn_implementation == "sdpa"
+
+ self.gradient_checkpointing = False
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def forward(
+ self,
+ input_ids: Optional[torch.Tensor] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ head_mask: Optional[torch.Tensor] = None,
+ inputs_embeds: Optional[torch.Tensor] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ):
+ r"""
+ Args:
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
+ provide it.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+ head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):
+ Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
+
+ - 1 indicates the head is **not masked**,
+ - 0 indicates the head is **masked**.
+
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
+ This is useful if you want more control over how to convert `input_ids` indices into associated vectors
+ than the model's internal embedding lookup matrix.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+ for more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+ """
+ output_attentions = (
+ output_attentions
+ if output_attentions is not None
+ else self.config.output_attentions
+ )
+ output_hidden_states = (
+ output_hidden_states
+ if output_hidden_states is not None
+ else self.config.output_hidden_states
+ )
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ # retrieve input_ids and inputs_embeds
+ if input_ids is not None and inputs_embeds is not None:
+ raise ValueError(
+ "You cannot specify both input_ids and inputs_embeds at the same time"
+ )
+ elif input_ids is not None:
+ self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
+ input_shape = input_ids.size()
+ input_ids = input_ids.view(-1, input_shape[-1])
+ elif inputs_embeds is not None:
+ input_shape = inputs_embeds.size()[:-1]
+ else:
+ raise ValueError("You have to specify either input_ids or inputs_embeds")
+
+ if inputs_embeds is None:
+ inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
+
+ embed_pos = self.embed_positions(input_ids, inputs_embeds)
+ embed_pos = embed_pos.to(inputs_embeds.device)
+
+ hidden_states = inputs_embeds + embed_pos
+ if self.layernorm_embedding is not None:
+ hidden_states = self.layernorm_embedding(hidden_states)
+ hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
+
+ if attention_mask is not None:
+ if self._use_flash_attention_2:
+ attention_mask = attention_mask if 0 in attention_mask else None
+ elif self._use_sdpa and head_mask is None and not output_attentions:
+ # output_attentions=True & head_mask can not be supported when using SDPA, fall back to
+ # the manual implementation that requires a 4D causal mask in all cases.
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ attention_mask = _prepare_4d_attention_mask_for_sdpa(attention_mask, inputs_embeds.dtype)
+ else:
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ attention_mask = _prepare_4d_attention_mask(attention_mask, inputs_embeds.dtype)
+
+
+ encoder_states = () if output_hidden_states else None
+ all_attentions = () if output_attentions else None
+
+ # check if head_mask has a correct number of layers specified if desired
+ if head_mask is not None:
+ if head_mask.size()[0] != len(self.layers):
+ raise ValueError(
+ f"The head_mask should be specified for {len(self.layers)} layers, but it is for"
+ f" {head_mask.size()[0]}."
+ )
+ deepspeed_zero3_is_enabled = is_deepspeed_zero3_enabled()
+
+ for idx, encoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ encoder_states = encoder_states + (hidden_states,)
+
+ # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+ dropout_probability = torch.rand([])
+
+ skip_the_layer = (
+ True
+ if self.training and (dropout_probability < self.layerdrop)
+ else False
+ )
+ if not skip_the_layer or deepspeed_zero3_is_enabled:
+ # under deepspeed zero3 all gpus must run in sync
+
+ if self.gradient_checkpointing and self.training:
+ # create gradient checkpointing function
+ def create_custom_forward(module):
+ def custom_forward(*inputs):
+ return module(*inputs, output_attentions)
+
+ return custom_forward
+
+ layer_outputs = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(encoder_layer),
+ hidden_states,
+ attention_mask,
+ (head_mask[idx] if head_mask is not None else None),
+ )
+ else:
+ layer_outputs = encoder_layer(
+ hidden_states,
+ attention_mask,
+ layer_head_mask=(
+ head_mask[idx] if head_mask is not None else None
+ ),
+ output_attentions=output_attentions,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if skip_the_layer:
+ layer_outputs = (None, None)
+
+ if output_attentions:
+ all_attentions = all_attentions + (layer_outputs[1],)
+
+ if self.layer_norm is not None:
+ hidden_states = self.layer_norm(hidden_states)
+
+ if output_hidden_states:
+ encoder_states = encoder_states + (hidden_states,)
+
+ if not return_dict:
+ return tuple(
+ v
+ for v in [hidden_states, encoder_states, all_attentions]
+ if v is not None
+ )
+ return BaseModelOutput(
+ last_hidden_state=hidden_states,
+ hidden_states=encoder_states,
+ attentions=all_attentions,
+ )
+
+
+# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100DecoderLayer->IndicTrans
+class IndicTransDecoder(IndicTransPreTrainedModel):
+ """
+ Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a [`IndicTransDecoderLayer`]
+
+ Args:
+ config: IndicTransConfig
+ embed_tokens (nn.Embedding): output embedding
+ """
+
+ def __init__(
+ self, config: IndicTransConfig, embed_tokens: Optional[nn.Embedding] = None
+ ):
+ super().__init__(config)
+ self.dropout = config.dropout
+ self.layerdrop = config.decoder_layerdrop
+
+ embed_dim = config.encoder_embed_dim
+ self.padding_idx = config.pad_token_id
+ self.max_target_positions = config.max_target_positions
+ self.embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0
+
+ self.embed_tokens = nn.Embedding(
+ config.decoder_vocab_size, embed_dim, self.padding_idx
+ )
+
+ if embed_tokens is not None:
+ self.embed_tokens.weight = embed_tokens.weight
+
+ self.embed_positions = IndicTransSinusoidalPositionalEmbedding(
+ config.max_target_positions,
+ embed_dim,
+ self.padding_idx,
+ )
+ self.layers = nn.ModuleList(
+ [IndicTransDecoderLayer(config) for _ in range(config.decoder_layers)]
+ )
+ self.layer_norm = (
+ nn.LayerNorm(embed_dim) if config.decoder_normalize_before else None
+ )
+ self.layernorm_embedding = (
+ nn.LayerNorm(embed_dim) if config.layernorm_embedding else None
+ )
+
+ self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
+ self._use_sdpa = config._attn_implementation == "sdpa"
+
+ self.gradient_checkpointing = False
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def forward(
+ self,
+ input_ids: Optional[torch.Tensor] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ encoder_hidden_states: Optional[torch.Tensor] = None,
+ encoder_attention_mask: Optional[torch.Tensor] = None,
+ head_mask: Optional[torch.Tensor] = None,
+ cross_attn_head_mask: Optional[torch.Tensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.Tensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ):
+ r"""
+ Args:
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
+ provide it.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+ encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
+ Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
+ of the decoder.
+ encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*):
+ Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values
+ selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+ head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
+ Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
+
+ - 1 indicates the head is **not masked**,
+ - 0 indicates the head is **masked**.
+
+ cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
+ Mask to nullify selected heads of the cross-attention modules in the decoder to avoid performing
+ cross-attention on hidden heads. Mask values selected in `[0, 1]`:
+
+ - 1 indicates the head is **not masked**,
+ - 0 indicates the head is **masked**.
+
+ past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+ Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+ shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
+ shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
+
+ Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
+ cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+
+ If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
+ that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
+ all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
+ shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
+ `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
+ control over how to convert `input_ids` indices into associated vectors than the model's internal
+ embedding lookup matrix.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+ for more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+ """
+ output_attentions = (
+ output_attentions
+ if output_attentions is not None
+ else self.config.output_attentions
+ )
+ output_hidden_states = (
+ output_hidden_states
+ if output_hidden_states is not None
+ else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ # retrieve input_ids and inputs_embeds
+ if input_ids is not None and inputs_embeds is not None:
+ raise ValueError(
+ "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time"
+ )
+ elif input_ids is not None:
+ input_shape = input_ids.size()
+ input_ids = input_ids.view(-1, input_shape[-1])
+ elif inputs_embeds is not None:
+ input_shape = inputs_embeds.size()[:-1]
+ else:
+ raise ValueError(
+ "You have to specify either decoder_input_ids or decoder_inputs_embeds"
+ )
+
+ # past_key_values_length
+ past_key_values_length = (
+ past_key_values[0][0].shape[2] if past_key_values is not None else 0
+ )
+
+ if inputs_embeds is None:
+ inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
+
+
+ if self._use_flash_attention_2:
+ # 2d mask is passed through the layers
+ attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+ elif self._use_sdpa and not output_attentions and cross_attn_head_mask is None:
+ # output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on
+ # the manual implementation that requires a 4D causal mask in all cases.
+ attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
+ attention_mask,
+ input_shape,
+ inputs_embeds,
+ past_key_values_length,
+ )
+ else:
+ # 4d mask is passed through the layers
+ attention_mask = _prepare_4d_causal_attention_mask(
+ attention_mask, input_shape, inputs_embeds, past_key_values_length
+ )
+
+ # expand encoder attention mask
+ if encoder_hidden_states is not None and encoder_attention_mask is not None:
+ if self._use_flash_attention_2:
+ encoder_attention_mask = encoder_attention_mask if 0 in encoder_attention_mask else None
+ elif self._use_sdpa and cross_attn_head_mask is None and not output_attentions:
+ # output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on
+ # the manual implementation that requires a 4D causal mask in all cases.
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ encoder_attention_mask = _prepare_4d_attention_mask_for_sdpa(
+ encoder_attention_mask,
+ inputs_embeds.dtype,
+ tgt_len=input_shape[-1],
+ )
+ else:
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ encoder_attention_mask = _prepare_4d_attention_mask(
+ encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
+ )
+
+ # embed positions
+ positions = self.embed_positions(
+ input_ids, inputs_embeds, past_key_values_length
+ )
+ positions = positions.to(inputs_embeds.device)
+
+ hidden_states = inputs_embeds + positions
+ if self.layernorm_embedding is not None:
+ hidden_states = self.layernorm_embedding(hidden_states)
+
+ hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
+
+ if self.gradient_checkpointing and self.training:
+ if use_cache:
+ logger.warning_once(
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting"
+ " `use_cache=False`..."
+ )
+ use_cache = False
+
+ # decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ all_cross_attentions = () if output_attentions else None
+ next_decoder_cache = () if use_cache else None
+
+ # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
+ for attn_mask, mask_name in zip(
+ [head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]
+ ):
+ if attn_mask is not None:
+ if attn_mask.size()[0] != len(self.layers):
+ raise ValueError(
+ f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
+ f" {head_mask.size()[0]}."
+ )
+ deepspeed_zero3_is_enabled = is_deepspeed_zero3_enabled()
+
+ for idx, decoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+ dropout_probability = torch.rand([])
+
+ skip_the_layer = (
+ True
+ if self.training and (dropout_probability < self.layerdrop)
+ else False
+ )
+ if not skip_the_layer or deepspeed_zero3_is_enabled:
+ # under deepspeed zero3 all gpus must run in sync
+
+ past_key_value = (
+ past_key_values[idx] if past_key_values is not None else None
+ )
+
+ if self.gradient_checkpointing and self.training:
+
+ def create_custom_forward(module):
+ def custom_forward(*inputs):
+ # None for past_key_value
+ return module(*inputs, output_attentions, use_cache)
+
+ return custom_forward
+
+ layer_outputs = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(decoder_layer),
+ hidden_states,
+ attention_mask,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ head_mask[idx] if head_mask is not None else None,
+ cross_attn_head_mask[idx]
+ if cross_attn_head_mask is not None
+ else None,
+ None,
+ )
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=attention_mask,
+ encoder_hidden_states=encoder_hidden_states,
+ encoder_attention_mask=encoder_attention_mask,
+ layer_head_mask=(
+ head_mask[idx] if head_mask is not None else None
+ ),
+ cross_attn_layer_head_mask=(
+ cross_attn_head_mask[idx]
+ if cross_attn_head_mask is not None
+ else None
+ ),
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if skip_the_layer:
+ continue
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[3 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+ all_cross_attentions += (layer_outputs[2],)
+
+ if self.layer_norm is not None:
+ hidden_states = self.layer_norm(hidden_states)
+
+ # add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = next_decoder_cache if use_cache else None
+ if not return_dict:
+ return tuple(
+ v
+ for v in [
+ hidden_states,
+ next_cache,
+ all_hidden_states,
+ all_self_attns,
+ all_cross_attentions,
+ ]
+ if v is not None
+ )
+ return BaseModelOutputWithPastAndCrossAttentions(
+ last_hidden_state=hidden_states,
+ past_key_values=next_cache,
+ hidden_states=all_hidden_states,
+ attentions=all_self_attns,
+ cross_attentions=all_cross_attentions,
+ )
+
+
+# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100Model->IndicTrans
+class IndicTransModel(IndicTransPreTrainedModel):
+ _tied_weights_keys = None
+
+ def __init__(self, config: IndicTransConfig):
+ super().__init__(config)
+
+ self.encoder = IndicTransEncoder(config)
+ self.decoder = IndicTransDecoder(config)
+
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def get_encoder(self):
+ return self.encoder
+
+ def get_decoder(self):
+ return self.decoder
+
+ def forward(
+ self,
+ input_ids: Optional[torch.LongTensor] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ decoder_input_ids: Optional[torch.LongTensor] = None,
+ decoder_attention_mask: Optional[torch.LongTensor] = None,
+ head_mask: Optional[torch.Tensor] = None,
+ decoder_head_mask: Optional[torch.Tensor] = None,
+ cross_attn_head_mask: Optional[torch.Tensor] = None,
+ encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+ past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple[torch.Tensor], Seq2SeqModelOutput]:
+ output_attentions = (
+ output_attentions
+ if output_attentions is not None
+ else self.config.output_attentions
+ )
+ output_hidden_states = (
+ output_hidden_states
+ if output_hidden_states is not None
+ else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ if encoder_outputs is None:
+ encoder_outputs = self.encoder(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ head_mask=head_mask,
+ inputs_embeds=inputs_embeds,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+ # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True
+ elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):
+ encoder_outputs = BaseModelOutput(
+ last_hidden_state=encoder_outputs[0],
+ hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
+ attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
+ )
+
+ # decoder outputs consists of (dec_features, past_key_value, dec_hidden, dec_attn)
+ decoder_outputs = self.decoder(
+ input_ids=decoder_input_ids,
+ attention_mask=decoder_attention_mask,
+ encoder_hidden_states=encoder_outputs[0],
+ encoder_attention_mask=attention_mask,
+ head_mask=decoder_head_mask,
+ cross_attn_head_mask=cross_attn_head_mask,
+ past_key_values=past_key_values,
+ inputs_embeds=decoder_inputs_embeds,
+ use_cache=use_cache,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+
+ if not return_dict:
+ return decoder_outputs + encoder_outputs
+
+ return Seq2SeqModelOutput(
+ last_hidden_state=decoder_outputs.last_hidden_state,
+ past_key_values=decoder_outputs.past_key_values,
+ decoder_hidden_states=decoder_outputs.hidden_states,
+ decoder_attentions=decoder_outputs.attentions,
+ cross_attentions=decoder_outputs.cross_attentions,
+ encoder_last_hidden_state=encoder_outputs.last_hidden_state,
+ encoder_hidden_states=encoder_outputs.hidden_states,
+ encoder_attentions=encoder_outputs.attentions,
+ )
+
+
+# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100ForConditionalGeneration->IndicTrans
+class IndicTransForConditionalGeneration(IndicTransPreTrainedModel):
+ base_model_prefix = "model"
+ _tied_weights_keys = None
+ _label_smoothing = 0.0
+
+ def __init__(self, config: IndicTransConfig):
+ super().__init__(config)
+ self.model = IndicTransModel(config)
+ self.lm_head = nn.Linear(
+ config.decoder_embed_dim, config.decoder_vocab_size, bias=False
+ )
+
+ if config.share_decoder_input_output_embed:
+ self.lm_head.weight = self.model.decoder.embed_tokens.weight
+
+ self.post_init()
+
+ def tie_weights(self):
+ pass
+
+ def get_encoder(self):
+ return self.model.get_encoder()
+
+ def get_decoder(self):
+ return self.model.get_decoder()
+
+ def get_output_embeddings(self):
+ return self.lm_head
+
+ def set_output_embeddings(self, new_embeddings):
+ self.lm_head = new_embeddings
+
+ def set_label_smoothing(self, label_smoothing):
+ self._label_smoothing = label_smoothing
+
+ def forward(
+ self,
+ input_ids: Optional[torch.LongTensor] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ decoder_input_ids: Optional[torch.LongTensor] = None,
+ decoder_attention_mask: Optional[torch.LongTensor] = None,
+ head_mask: Optional[torch.Tensor] = None,
+ decoder_head_mask: Optional[torch.Tensor] = None,
+ cross_attn_head_mask: Optional[torch.Tensor] = None,
+ encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+ past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
+ labels: Optional[torch.LongTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple[torch.Tensor], Seq2SeqLMOutput]:
+ r"""
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+
+ Returns:
+ """
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ if labels is not None:
+ if decoder_input_ids is None:
+ decoder_input_ids = shift_tokens_right(
+ labels, self.config.pad_token_id, self.config.decoder_start_token_id
+ )
+
+ outputs = self.model(
+ input_ids,
+ attention_mask=attention_mask,
+ decoder_input_ids=decoder_input_ids,
+ encoder_outputs=encoder_outputs,
+ decoder_attention_mask=decoder_attention_mask,
+ head_mask=head_mask,
+ decoder_head_mask=decoder_head_mask,
+ cross_attn_head_mask=cross_attn_head_mask,
+ past_key_values=past_key_values,
+ inputs_embeds=inputs_embeds,
+ decoder_inputs_embeds=decoder_inputs_embeds,
+ use_cache=use_cache,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+ lm_logits = self.lm_head(outputs[0])
+
+ masked_lm_loss = None
+ if labels is not None:
+ # move labels to the correct device to enable PP
+ labels = labels.to(lm_logits.device)
+ masked_lm_loss = F.cross_entropy(
+ input=lm_logits.view(-1, self.config.decoder_vocab_size),
+ target=labels.view(-1),
+ ignore_index=-100,
+ label_smoothing=self._label_smoothing,
+ )
+
+ if not return_dict:
+ output = (lm_logits,) + outputs[1:]
+ return (
+ ((masked_lm_loss,) + output) if masked_lm_loss is not None else output
+ )
+
+ return Seq2SeqLMOutput(
+ loss=masked_lm_loss,
+ logits=lm_logits,
+ past_key_values=outputs.past_key_values,
+ decoder_hidden_states=outputs.decoder_hidden_states,
+ decoder_attentions=outputs.decoder_attentions,
+ cross_attentions=outputs.cross_attentions,
+ encoder_last_hidden_state=outputs.encoder_last_hidden_state,
+ encoder_hidden_states=outputs.encoder_hidden_states,
+ encoder_attentions=outputs.encoder_attentions,
+ )
+
+ def prepare_inputs_for_generation(
+ self,
+ decoder_input_ids,
+ past_key_values=None,
+ attention_mask=None,
+ head_mask=None,
+ decoder_head_mask=None,
+ cross_attn_head_mask=None,
+ use_cache=None,
+ encoder_outputs=None,
+ **kwargs,
+ ):
+ # cut decoder_input_ids if past is used
+ if past_key_values is not None:
+ decoder_input_ids = decoder_input_ids[:, -1:]
+
+ return {
+ "input_ids": None, # encoder_outputs is defined. input_ids not needed
+ "encoder_outputs": encoder_outputs,
+ "past_key_values": past_key_values,
+ "decoder_input_ids": decoder_input_ids,
+ "attention_mask": attention_mask,
+ "head_mask": head_mask,
+ "decoder_head_mask": decoder_head_mask,
+ "cross_attn_head_mask": cross_attn_head_mask,
+ "use_cache": use_cache, # change this to avoid caching (presumably for debugging)
+ }
+
+ @staticmethod
+ def _reorder_cache(past_key_values, beam_idx):
+ reordered_past = ()
+ for layer_past in past_key_values:
+ reordered_past += (
+ tuple(
+ past_state.index_select(0, beam_idx) for past_state in layer_past
+ ),
+ )
+ return reordered_past
diff --git a/IndicTrans2/huggingface_interface/train_lora.py b/IndicTrans2/huggingface_interface/train_lora.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb197a5287085711091758f08c896e1ddff3bd1d
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/train_lora.py
@@ -0,0 +1,355 @@
+import os
+import argparse
+import pandas as pd
+from datasets import Dataset
+from sacrebleu.metrics import BLEU, CHRF
+from peft import LoraConfig, get_peft_model
+from IndicTransToolkit import IndicProcessor, IndicDataCollator
+
+from transformers import (
+ Seq2SeqTrainer,
+ Seq2SeqTrainingArguments,
+ AutoModelForSeq2SeqLM,
+ AutoTokenizer,
+ EarlyStoppingCallback,
+)
+
+bleu_metric = BLEU()
+chrf_metric = CHRF()
+
+
+def get_arg_parse():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model",
+ type=str,
+ )
+ parser.add_argument(
+ "--src_lang_list",
+ type=str,
+ help="comma separated list of source languages",
+ )
+ parser.add_argument(
+ "--tgt_lang_list",
+ type=str,
+ help="comma separated list of target languages",
+ )
+ parser.add_argument("--data_dir", type=str)
+ parser.add_argument("--output_dir", type=str)
+ parser.add_argument("--save_steps", type=int, default=1000)
+ parser.add_argument("--eval_steps", type=int, default=1000)
+ parser.add_argument("--batch_size", type=int, default=32)
+ parser.add_argument("--num_train_epochs", type=int, default=100)
+ parser.add_argument("--max_steps", type=int, default=1000000)
+ parser.add_argument("--grad_accum_steps", type=int, default=4)
+ parser.add_argument("--warmup_steps", type=int, default=4000)
+ parser.add_argument("--warmup_ratio", type=int, default=0.0)
+ parser.add_argument("--max_grad_norm", type=float, default=1.0)
+ parser.add_argument("--learning_rate", type=float, default=5e-4)
+ parser.add_argument("--weight_decay", type=float, default=0.0)
+ parser.add_argument("--adam_beta1", type=float, default=0.9)
+ parser.add_argument("--adam_beta2", type=float, default=0.98)
+ parser.add_argument("--dropout", type=float, default=0.0)
+ parser.add_argument("--print_samples", action="store_true")
+ parser.add_argument(
+ "--optimizer",
+ type=str,
+ default="adamw_torch",
+ choices=[
+ "adam_hf",
+ "adamw_torch",
+ "adamw_torch_fused",
+ "adamw_apex_fused",
+ "adafactor",
+ ],
+ )
+ parser.add_argument(
+ "--lr_scheduler",
+ type=str,
+ default="inverse_sqrt",
+ choices=[
+ "inverse_sqrt",
+ "linear",
+ "polynomial",
+ "cosine",
+ "constant",
+ "constant_with_warmup",
+ ],
+ )
+ parser.add_argument("--label_smoothing", type=float, default=0.0)
+ parser.add_argument("--num_workers", type=int, default=8)
+ parser.add_argument("--metric_for_best_model", type=str, default="eval_loss")
+ parser.add_argument("--greater_is_better", action="store_true")
+ parser.add_argument("--lora_target_modules", type=str, default="q_proj,k_proj")
+ parser.add_argument("--lora_dropout", type=float, default=0.1)
+ parser.add_argument("--lora_r", type=int, default=16)
+ parser.add_argument("--lora_alpha", type=int, default=32)
+ parser.add_argument(
+ "--report_to",
+ type=str,
+ default="none",
+ choices=["wandb", "tensorboard", "azure_ml", "none"],
+ )
+ parser.add_argument("--patience", type=int, default=5),
+ parser.add_argument("--threshold", type=float, default=1e-3)
+ return parser
+
+
+def load_and_process_translation_dataset(
+ data_dir,
+ split="train",
+ tokenizer=None,
+ processor=None,
+ src_lang_list=None,
+ tgt_lang_list=None,
+ num_proc=8,
+ seed=42
+):
+ complete_dataset = {
+ "sentence_SRC": [],
+ "sentence_TGT": [],
+ }
+
+ for src_lang in src_lang_list:
+ for tgt_lang in tgt_lang_list:
+ if src_lang == tgt_lang:
+ continue
+ src_path = os.path.join(
+ data_dir, split, f"{src_lang}-{tgt_lang}", f"{split}.{src_lang}"
+ )
+ tgt_path = os.path.join(
+ data_dir, split, f"{src_lang}-{tgt_lang}", f"{split}.{tgt_lang}"
+ )
+ if not os.path.exists(src_path) or not os.path.exists(tgt_path):
+ raise FileNotFoundError(
+ f"Source ({split}.{src_lang}) or Target ({split}.{tgt_lang}) file not found in {data_dir}"
+ )
+ with open(src_path, encoding="utf-8") as src_file, open(
+ tgt_path, encoding="utf-8"
+ ) as tgt_file:
+ src_lines = src_file.readlines()
+ tgt_lines = tgt_file.readlines()
+
+ # Ensure both files have the same number of lines
+ assert len(src_lines) == len(
+ tgt_lines
+ ), f"Source and Target files have different number of lines for {split}.{src_lang} and {split}.{tgt_lang}"
+
+ complete_dataset["sentence_SRC"] += processor.preprocess_batch(
+ src_lines, src_lang=src_lang, tgt_lang=tgt_lang, is_target=False
+ )
+
+ complete_dataset["sentence_TGT"] += processor.preprocess_batch(
+ tgt_lines, src_lang=tgt_lang, tgt_lang=src_lang, is_target=True
+ )
+
+ complete_dataset = Dataset.from_dict(complete_dataset).shuffle(seed=seed)
+
+ return complete_dataset.map(
+ lambda example: preprocess_fn(
+ example,
+ tokenizer=tokenizer
+ ),
+ batched=True,
+ num_proc=num_proc,
+ )
+
+
+def compute_metrics_factory(
+ tokenizer, metric_dict=None, print_samples=False, n_samples=10
+):
+ def compute_metrics(eval_preds):
+ preds, labels = eval_preds
+
+ labels[labels == -100] = tokenizer.pad_token_id
+ preds[preds == -100] = tokenizer.pad_token_id
+
+ with tokenizer.as_target_tokenizer():
+ preds = [
+ x.strip()
+ for x in tokenizer.batch_decode(
+ preds, skip_special_tokens=True, clean_up_tokenization_spaces=True
+ )
+ ]
+ labels = [
+ x.strip()
+ for x in tokenizer.batch_decode(
+ labels, skip_special_tokens=True, clean_up_tokenization_spaces=True
+ )
+ ]
+
+ assert len(preds) == len(
+ labels
+ ), "Predictions and Labels have different lengths"
+
+ df = pd.DataFrame({"Predictions": preds, "References": labels}).sample(
+ n=n_samples
+ )
+
+ if print_samples:
+ for pred, label in zip(df["Predictions"].values, df["References"].values):
+ print(f" | > Prediction: {pred}")
+ print(f" | > Reference: {label}\n")
+
+ return {
+ metric_name: metric.corpus_score(preds, [labels]).score
+ for (metric_name, metric) in metric_dict.items()
+ }
+
+ return compute_metrics
+
+
+def preprocess_fn(example, tokenizer, **kwargs):
+ model_inputs = tokenizer(
+ example["sentence_SRC"], truncation=True, padding=False, max_length=256
+ )
+
+ with tokenizer.as_target_tokenizer():
+ labels = tokenizer(
+ example["sentence_TGT"], truncation=True, padding=False, max_length=256
+ )
+
+ model_inputs["labels"] = labels["input_ids"]
+ return model_inputs
+
+
+def main(args):
+ print(f" | > Loading {args.model} and tokenizer ...")
+ model = AutoModelForSeq2SeqLM.from_pretrained(
+ args.model,
+ trust_remote_code=True,
+ attn_implementation="eager",
+ dropout=args.dropout
+ )
+
+ tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
+ processor = IndicProcessor(inference=False) # pre-process before tokenization
+
+ data_collator = IndicDataCollator(
+ tokenizer=tokenizer,
+ model=model,
+ padding="longest", # saves padding tokens
+ pad_to_multiple_of=8, # better to have it as 8 when using fp16
+ label_pad_token_id=-100
+ )
+
+ if args.data_dir is not None:
+ train_dataset = load_and_process_translation_dataset(
+ args.data_dir,
+ split="train",
+ tokenizer=tokenizer,
+ processor=processor,
+ src_lang_list=args.src_lang_list.split(","),
+ tgt_lang_list=args.tgt_lang_list.split(","),
+ )
+ print(f" | > Loaded train dataset from {args.data_dir}. Size: {len(train_dataset)} ...")
+
+ eval_dataset = load_and_process_translation_dataset(
+ args.data_dir,
+ split="dev",
+ tokenizer=tokenizer,
+ processor=processor,
+ src_lang_list=args.src_lang_list.split(","),
+ tgt_lang_list=args.tgt_lang_list.split(","),
+ )
+ print(f" | > Loaded eval dataset from {args.data_dir}. Size: {len(eval_dataset)} ...")
+ else:
+ raise ValueError(" | > Data directory not provided")
+
+ lora_config = LoraConfig(
+ r=args.lora_r,
+ bias="none",
+ inference_mode=False,
+ task_type="SEQ_2_SEQ_LM",
+ lora_alpha=args.lora_alpha,
+ lora_dropout=args.lora_dropout,
+ target_modules=args.lora_target_modules.split(","),
+ )
+
+ model.set_label_smoothing(args.label_smoothing)
+
+ model = get_peft_model(model, lora_config)
+ model.print_trainable_parameters()
+
+ print(f" | > Loading metrics factory with BLEU and chrF ...")
+ seq2seq_compute_metrics = compute_metrics_factory(
+ tokenizer=tokenizer,
+ print_samples=args.print_samples,
+ metric_dict={"BLEU": bleu_metric, "chrF": chrf_metric},
+ )
+
+ training_args = Seq2SeqTrainingArguments(
+ output_dir=args.output_dir,
+ do_train=True,
+ do_eval=True,
+ fp16=True, # use fp16 for faster training
+ logging_strategy="steps",
+ evaluation_strategy="steps",
+ save_strategy="steps",
+ logging_steps=100,
+ save_total_limit=1,
+ predict_with_generate=True,
+ load_best_model_at_end=True,
+ max_steps=args.max_steps, # max_steps overrides num_train_epochs
+ per_device_train_batch_size=args.batch_size,
+ per_device_eval_batch_size=args.batch_size,
+ gradient_accumulation_steps=args.grad_accum_steps,
+ eval_accumulation_steps=args.grad_accum_steps,
+ weight_decay=args.weight_decay,
+ adam_beta1=args.adam_beta1,
+ adam_beta2=args.adam_beta2,
+ max_grad_norm=args.max_grad_norm,
+ optim=args.optimizer,
+ lr_scheduler_type=args.lr_scheduler,
+ warmup_ratio=args.warmup_ratio,
+ warmup_steps=args.warmup_steps,
+ learning_rate=args.learning_rate,
+ num_train_epochs=args.num_train_epochs,
+ save_steps=args.save_steps,
+ eval_steps=args.eval_steps,
+ dataloader_num_workers=args.num_workers,
+ metric_for_best_model=args.metric_for_best_model,
+ greater_is_better=args.greater_is_better,
+ report_to=args.report_to,
+ generation_max_length=256,
+ generation_num_beams=5,
+ sortish_sampler=True,
+ group_by_length=True,
+ include_tokens_per_second=True,
+ include_num_input_tokens_seen=True,
+ dataloader_prefetch_factor=2,
+ )
+
+ # Create Trainer instance
+ trainer = Seq2SeqTrainer(
+ model=model,
+ args=training_args,
+ data_collator=data_collator,
+ train_dataset=train_dataset,
+ eval_dataset=eval_dataset,
+ compute_metrics=seq2seq_compute_metrics,
+ callbacks=[
+ EarlyStoppingCallback(
+ early_stopping_patience=args.patience,
+ early_stopping_threshold=args.threshold,
+ )
+ ],
+ )
+
+ print(f" | > Starting training ...")
+
+ try:
+ trainer.train()
+ except KeyboardInterrupt:
+ print(f" | > Training interrupted ...")
+
+ # this will only save the LoRA adapter weights
+ model.save_pretrained(args.output_dir)
+
+
+
+if __name__ == "__main__":
+ parser = get_arg_parse()
+ args = parser.parse_args()
+
+ main(args)
diff --git a/IndicTrans2/huggingface_interface/train_lora.sh b/IndicTrans2/huggingface_interface/train_lora.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c3a615cd0a46543452d980ad48969ccc3ea5beb0
--- /dev/null
+++ b/IndicTrans2/huggingface_interface/train_lora.sh
@@ -0,0 +1,35 @@
+export CUDA_VISIBLE_DEVICES=0
+
+data_dir=${1:-"en-indic-exp"}
+model_name=${2:-"ai4bharat/indictrans2-en-indic-dist-200M"}
+output_dir=${3:-"output"}
+src_lang_list=${4:-"eng_Latn"}
+tgt_lang_list=${5:-"asm_Beng,ben_Beng,guj_Gujr,hin_Deva,kan_Knda,mal_Mlym,mar_Deva,npi_Deva,ory_Orya,pan_Guru,tam_Taml,tel_Telu,urd_Arab"}
+
+python3 train_lora.py \
+ --data_dir $data_dir \
+ --model_name $model_name \
+ --output_dir $output_dir \
+ --src_lang_list $src_lang_list \
+ --tgt_lang_list $tgt_lang_list \
+ --save_steps 1000 \
+ --max_steps 1000000 \
+ --batch_size 32 \
+ --grad_accum_steps 4 \
+ --warmup_steps 4000 \
+ --max_grad_norm 1.0 \
+ --learning_rate 2e-4 \
+ --adam_beta1 0.9 \
+ --adam_beta2 0.98 \
+ --optimizer adamw_torch \
+ --lr_scheduler inverse_sqrt \
+ --num_workers 16 \
+ --metric_for_best_model eval_BLEU \
+ --greater_is_better \
+ --patience 10 \
+ --weight_decay 0.01 \
+ --lora_target_modules "q_proj,k_proj" \
+ --lora_dropout 0.1 \
+ --lora_r 16 \
+ --lora_alpha 32 \
+ --print_samples
\ No newline at end of file
diff --git a/IndicTrans2/inference/__init__.py b/IndicTrans2/inference/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/IndicTrans2/inference/custom_interactive.py b/IndicTrans2/inference/custom_interactive.py
new file mode 100644
index 0000000000000000000000000000000000000000..0053e1a241685f94330256f8a87acc8a709535df
--- /dev/null
+++ b/IndicTrans2/inference/custom_interactive.py
@@ -0,0 +1,304 @@
+# python wrapper for fairseq-interactive command line tool
+
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Translate raw text with a trained model. Batches data on-the-fly.
+"""
+
+import os
+import ast
+from collections import namedtuple
+
+import torch
+from fairseq import checkpoint_utils, options, tasks, utils
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.token_generation_constraints import pack_constraints, unpack_constraints
+from fairseq_cli.generate import get_symbols_to_strip_from_output
+
+import codecs
+
+PWD = os.path.dirname(__file__)
+Batch = namedtuple("Batch", "ids src_tokens src_lengths constraints")
+Translation = namedtuple("Translation", "src_str hypos pos_scores alignments")
+
+
+def make_batches(
+ lines, cfg, task, max_positions, encode_fn, constrainted_decoding=False
+):
+ def encode_fn_target(x):
+ return encode_fn(x)
+
+ if constrainted_decoding:
+ # Strip (tab-delimited) contraints, if present, from input lines,
+ # store them in batch_constraints
+ batch_constraints = [list() for _ in lines]
+ for i, line in enumerate(lines):
+ if "\t" in line:
+ lines[i], *batch_constraints[i] = line.split("\t")
+
+ # Convert each List[str] to List[Tensor]
+ for i, constraint_list in enumerate(batch_constraints):
+ batch_constraints[i] = [
+ task.target_dictionary.encode_line(
+ encode_fn_target(constraint),
+ append_eos=False,
+ add_if_not_exist=False,
+ )
+ for constraint in constraint_list
+ ]
+
+ if constrainted_decoding:
+ constraints_tensor = pack_constraints(batch_constraints)
+ else:
+ constraints_tensor = None
+
+ tokens, lengths = task.get_interactive_tokens_and_lengths(lines, encode_fn)
+
+ itr = task.get_batch_iterator(
+ dataset=task.build_dataset_for_inference(
+ tokens, lengths, constraints=constraints_tensor
+ ),
+ max_tokens=cfg.dataset.max_tokens,
+ max_sentences=cfg.dataset.batch_size,
+ max_positions=max_positions,
+ ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test,
+ ).next_epoch_itr(shuffle=False)
+ for batch in itr:
+ ids = batch["id"]
+ src_tokens = batch["net_input"]["src_tokens"]
+ src_lengths = batch["net_input"]["src_lengths"]
+ constraints = batch.get("constraints", None)
+
+ yield Batch(
+ ids=ids,
+ src_tokens=src_tokens,
+ src_lengths=src_lengths,
+ constraints=constraints,
+ )
+
+
+class Translator:
+ """
+ Wrapper class to handle the interaction with fairseq model class for translation
+ """
+
+ def __init__(
+ self, data_dir, checkpoint_path, batch_size=25, constrained_decoding=False
+ ):
+
+ self.constrained_decoding = constrained_decoding
+ self.parser = options.get_generation_parser(interactive=True)
+ # buffer_size is currently not used but we just initialize it to batch
+ # size + 1 to avoid any assertion errors.
+ if self.constrained_decoding:
+ self.parser.set_defaults(
+ path=checkpoint_path,
+ num_workers=-1,
+ constraints="ordered",
+ batch_size=batch_size,
+ buffer_size=batch_size + 1,
+ )
+ else:
+ self.parser.set_defaults(
+ path=checkpoint_path,
+ remove_bpe="subword_nmt",
+ num_workers=-1,
+ batch_size=batch_size,
+ buffer_size=batch_size + 1,
+ )
+ args = options.parse_args_and_arch(self.parser, input_args=[data_dir])
+ # we are explictly setting src_lang and tgt_lang here
+ # generally the data_dir we pass contains {split}-{src_lang}-{tgt_lang}.*.idx files from
+ # which fairseq infers the src and tgt langs(if these are not passed). In deployment we dont
+ # use any idx files and only store the SRC and TGT dictionaries.
+ args.source_lang = "SRC"
+ args.target_lang = "TGT"
+ # since we are truncating sentences to max_seq_len in engine, we can set it to False here
+ args.skip_invalid_size_inputs_valid_test = False
+
+ # we have custom architechtures in this folder and we will let fairseq
+ # import this
+ args.user_dir = os.path.join(PWD, "model_configs")
+ self.cfg = convert_namespace_to_omegaconf(args)
+
+ utils.import_user_module(self.cfg.common)
+
+ if self.cfg.interactive.buffer_size < 1:
+ self.cfg.interactive.buffer_size = 1
+ if self.cfg.dataset.max_tokens is None and self.cfg.dataset.batch_size is None:
+ self.cfg.dataset.batch_size = 1
+
+ assert (
+ not self.cfg.generation.sampling
+ or self.cfg.generation.nbest == self.cfg.generation.beam
+ ), "--sampling requires --nbest to be equal to --beam"
+ assert (
+ not self.cfg.dataset.batch_size
+ or self.cfg.dataset.batch_size <= self.cfg.interactive.buffer_size
+ ), "--batch-size cannot be larger than --buffer-size"
+
+ # Fix seed for stochastic decoding
+ # if self.cfg.common.seed is not None and not self.cfg.generation.no_seed_provided:
+ # np.random.seed(self.cfg.common.seed)
+ # utils.set_torch_seed(self.cfg.common.seed)
+
+ # if not self.constrained_decoding:
+ # self.use_cuda = torch.cuda.is_available() and not self.cfg.common.cpu
+ # else:
+ # self.use_cuda = False
+
+ self.use_cuda = torch.cuda.is_available() and not self.cfg.common.cpu
+
+ # Setup task, e.g., translation
+ self.task = tasks.setup_task(self.cfg.task)
+
+ # Load ensemble
+ overrides = ast.literal_eval(self.cfg.common_eval.model_overrides)
+ self.models, self._model_args = checkpoint_utils.load_model_ensemble(
+ utils.split_paths(self.cfg.common_eval.path),
+ arg_overrides=overrides,
+ task=self.task,
+ suffix=self.cfg.checkpoint.checkpoint_suffix,
+ strict=(self.cfg.checkpoint.checkpoint_shard_count == 1),
+ num_shards=self.cfg.checkpoint.checkpoint_shard_count,
+ )
+
+ # Set dictionaries
+ self.src_dict = self.task.source_dictionary
+ self.tgt_dict = self.task.target_dictionary
+
+ # Optimize ensemble for generation
+ for model in self.models:
+ if model is None:
+ continue
+ if self.cfg.common.fp16:
+ model.half()
+ if (
+ self.use_cuda
+ and not self.cfg.distributed_training.pipeline_model_parallel
+ ):
+ model.cuda()
+ model.prepare_for_inference_(self.cfg)
+
+ # Initialize generator
+ self.generator = self.task.build_generator(self.models, self.cfg.generation)
+
+ self.tokenizer = None
+ self.bpe = None
+ # # Handle tokenization and BPE
+ # self.tokenizer = self.task.build_tokenizer(self.cfg.tokenizer)
+ # self.bpe = self.task.build_bpe(self.cfg.bpe)
+
+ # Load alignment dictionary for unknown word replacement
+ # (None if no unknown word replacement, empty if no path to align dictionary)
+ self.align_dict = utils.load_align_dict(self.cfg.generation.replace_unk)
+
+ self.max_positions = utils.resolve_max_positions(
+ self.task.max_positions(), *[model.max_positions() for model in self.models]
+ )
+
+ def encode_fn(self, x):
+ if self.tokenizer is not None:
+ x = self.tokenizer.encode(x)
+ if self.bpe is not None:
+ x = self.bpe.encode(x)
+ return x
+
+ def decode_fn(self, x):
+ if self.bpe is not None:
+ x = self.bpe.decode(x)
+ if self.tokenizer is not None:
+ x = self.tokenizer.decode(x)
+ return x
+
+ def translate(self, inputs, constraints=None):
+ if self.constrained_decoding and constraints is None:
+ raise ValueError("Constraints cant be None in constrained decoding mode")
+ if not self.constrained_decoding and constraints is not None:
+ raise ValueError("Cannot pass constraints during normal translation")
+ if constraints:
+ constrained_decoding = True
+ modified_inputs = []
+ for _input, constraint in zip(inputs, constraints):
+ modified_inputs.append(_input + f"\t{constraint}")
+ inputs = modified_inputs
+ else:
+ constrained_decoding = False
+
+ start_id = 0
+ results = []
+ final_translations = []
+ for batch in make_batches(
+ inputs,
+ self.cfg,
+ self.task,
+ self.max_positions,
+ self.encode_fn,
+ constrained_decoding,
+ ):
+ bsz = batch.src_tokens.size(0)
+ src_tokens = batch.src_tokens
+ src_lengths = batch.src_lengths
+ constraints = batch.constraints
+ if self.use_cuda:
+ src_tokens = src_tokens.cuda()
+ src_lengths = src_lengths.cuda()
+ if constraints is not None:
+ constraints = constraints.cuda()
+
+ sample = {
+ "net_input": {
+ "src_tokens": src_tokens,
+ "src_lengths": src_lengths,
+ },
+ }
+
+ translations = self.task.inference_step(
+ self.generator, self.models, sample, constraints=constraints
+ )
+
+ list_constraints = [[] for _ in range(bsz)]
+ if constrained_decoding:
+ list_constraints = [unpack_constraints(c) for c in constraints]
+ for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)):
+ src_tokens_i = utils.strip_pad(src_tokens[i], self.tgt_dict.pad())
+ constraints = list_constraints[i]
+ results.append(
+ (
+ start_id + id,
+ src_tokens_i,
+ hypos,
+ {
+ "constraints": constraints,
+ },
+ )
+ )
+
+ # sort output to match input order
+ for id_, src_tokens, hypos, _ in sorted(results, key=lambda x: x[0]):
+ src_str = ""
+ if self.src_dict is not None:
+ src_str = self.src_dict.string(
+ src_tokens, self.cfg.common_eval.post_process
+ )
+
+ # Process top predictions
+ for hypo in hypos[: min(len(hypos), self.cfg.generation.nbest)]:
+ hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
+ hypo_tokens=hypo["tokens"].int().cpu(),
+ src_str=src_str,
+ alignment=hypo["alignment"],
+ align_dict=self.align_dict,
+ tgt_dict=self.tgt_dict,
+
+ extra_symbols_to_ignore=get_symbols_to_strip_from_output(
+ self.generator
+ ),
+ )
+ detok_hypo_str = self.decode_fn(hypo_str)
+ final_translations.append(detok_hypo_str)
+ return final_translations
diff --git a/IndicTrans2/inference/download.py b/IndicTrans2/inference/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..57ec9fe22624657b27bf8fbb6b0a9b05c7a8418b
--- /dev/null
+++ b/IndicTrans2/inference/download.py
@@ -0,0 +1,5 @@
+import urduhack
+urduhack.download()
+
+import nltk
+nltk.download('punkt')
diff --git a/IndicTrans2/inference/engine.py b/IndicTrans2/inference/engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..b58cb07ce41926d7ff63641100e041b01dbd4cad
--- /dev/null
+++ b/IndicTrans2/inference/engine.py
@@ -0,0 +1,472 @@
+import hashlib
+import os
+import uuid
+from typing import List, Tuple, Union, Dict
+
+import regex as re
+import sentencepiece as spm
+from indicnlp.normalize import indic_normalize
+from indicnlp.tokenize import indic_detokenize, indic_tokenize
+from indicnlp.tokenize.sentence_tokenize import DELIM_PAT_NO_DANDA, sentence_split
+from indicnlp.transliterate import unicode_transliterate
+from mosestokenizer import MosesSentenceSplitter
+from nltk.tokenize import sent_tokenize
+from sacremoses import MosesDetokenizer, MosesPunctNormalizer, MosesTokenizer
+from tqdm import tqdm
+
+from .flores_codes_map_indic import flores_codes, iso_to_flores
+from .normalize_punctuation import punc_norm
+from .normalize_regex_inference import EMAIL_PATTERN, normalize
+
+
+def split_sentences(paragraph: str, lang: str) -> List[str]:
+ """
+ Splits the input text paragraph into sentences. It uses `moses` for English and
+ `indic-nlp` for Indic languages.
+
+ Args:
+ paragraph (str): input text paragraph.
+ lang (str): flores language code.
+
+ Returns:
+ List[str] -> list of sentences.
+ """
+ if lang == "eng_Latn":
+ with MosesSentenceSplitter(flores_codes[lang]) as splitter:
+ sents_moses = splitter([paragraph])
+ sents_nltk = sent_tokenize(paragraph)
+ if len(sents_nltk) < len(sents_moses):
+ sents = sents_nltk
+ else:
+ sents = sents_moses
+ return [sent.replace("\xad", "") for sent in sents]
+ else:
+ return sentence_split(paragraph, lang=flores_codes[lang], delim_pat=DELIM_PAT_NO_DANDA)
+
+
+def add_token(sent: str, src_lang: str, tgt_lang: str, delimiter: str = " ") -> str:
+ """
+ Add special tokens indicating source and target language to the start of the input sentence.
+ The resulting string will have the format: "`{src_lang} {tgt_lang} {input_sentence}`".
+
+ Args:
+ sent (str): input sentence to be translated.
+ src_lang (str): flores lang code of the input sentence.
+ tgt_lang (str): flores lang code in which the input sentence will be translated.
+ delimiter (str): separator to add between language tags and input sentence (default: " ").
+
+ Returns:
+ str: input sentence with the special tokens added to the start.
+ """
+ return src_lang + delimiter + tgt_lang + delimiter + sent
+
+
+def apply_lang_tags(sents: List[str], src_lang: str, tgt_lang: str) -> List[str]:
+ """
+ Add special tokens indicating source and target language to the start of the each input sentence.
+ Each resulting input sentence will have the format: "`{src_lang} {tgt_lang} {input_sentence}`".
+
+ Args:
+ sent (str): input sentence to be translated.
+ src_lang (str): flores lang code of the input sentence.
+ tgt_lang (str): flores lang code in which the input sentence will be translated.
+
+ Returns:
+ List[str]: list of input sentences with the special tokens added to the start.
+ """
+ tagged_sents = []
+ for sent in sents:
+ tagged_sent = add_token(sent.strip(), src_lang, tgt_lang)
+ tagged_sents.append(tagged_sent)
+ return tagged_sents
+
+
+def truncate_long_sentences(
+ sents: List[str], placeholder_entity_map_sents: List[Dict]
+) -> Tuple[List[str], List[Dict]]:
+ """
+ Truncates the sentences that exceed the maximum sequence length.
+ The maximum sequence for the IndicTrans2 model is limited to 256 tokens.
+
+ Args:
+ sents (List[str]): list of input sentences to truncate.
+
+ Returns:
+ Tuple[List[str], List[Dict]]: tuple containing the list of sentences with truncation applied and the updated placeholder entity maps.
+ """
+ MAX_SEQ_LEN = 256
+ new_sents = []
+ placeholders = []
+
+ for j, sent in enumerate(sents):
+ words = sent.split()
+ num_words = len(words)
+ if num_words > MAX_SEQ_LEN:
+ sents = []
+ i = 0
+ while i <= len(words):
+ sents.append(" ".join(words[i : i + MAX_SEQ_LEN]))
+ i += MAX_SEQ_LEN
+ placeholders.extend([placeholder_entity_map_sents[j]] * (len(sents)))
+ new_sents.extend(sents)
+ else:
+ placeholders.append(placeholder_entity_map_sents[j])
+ new_sents.append(sent)
+ return new_sents, placeholders
+
+
+class Model:
+ """
+ Model class to run the IndicTransv2 models using python interface.
+ """
+
+ def __init__(
+ self,
+ ckpt_dir: str,
+ device: str = "cuda",
+ input_lang_code_format: str = "flores",
+ model_type: str = "ctranslate2",
+ ):
+ """
+ Initialize the model class.
+
+ Args:
+ ckpt_dir (str): path of the model checkpoint directory.
+ device (str, optional): where to load the model (defaults: cuda).
+ """
+ self.ckpt_dir = ckpt_dir
+ self.en_tok = MosesTokenizer(lang="en")
+ self.en_normalizer = MosesPunctNormalizer()
+ self.en_detok = MosesDetokenizer(lang="en")
+ self.xliterator = unicode_transliterate.UnicodeIndicTransliterator()
+
+ print("Initializing sentencepiece model for SRC and TGT")
+ self.sp_src = spm.SentencePieceProcessor(
+ model_file=os.path.join(ckpt_dir, "vocab", "model.SRC")
+ )
+ self.sp_tgt = spm.SentencePieceProcessor(
+ model_file=os.path.join(ckpt_dir, "vocab", "model.TGT")
+ )
+
+ self.input_lang_code_format = input_lang_code_format
+
+ print("Initializing model for translation")
+ # initialize the model
+ if model_type == "ctranslate2":
+ import ctranslate2
+
+ self.translator = ctranslate2.Translator(
+ self.ckpt_dir, device=device
+ ) # , compute_type="auto")
+ self.translate_lines = self.ctranslate2_translate_lines
+ elif model_type == "fairseq":
+ from .custom_interactive import Translator
+
+ self.translator = Translator(
+ data_dir=os.path.join(self.ckpt_dir, "final_bin"),
+ checkpoint_path=os.path.join(self.ckpt_dir, "model", "checkpoint_best.pt"),
+ batch_size=100,
+ )
+ self.translate_lines = self.fairseq_translate_lines
+ else:
+ raise NotImplementedError(f"Unknown model_type: {model_type}")
+
+ def ctranslate2_translate_lines(self, lines: List[str]) -> List[str]:
+ tokenized_sents = [x.strip().split(" ") for x in lines]
+ translations = self.translator.translate_batch(
+ tokenized_sents,
+ max_batch_size=9216,
+ batch_type="tokens",
+ max_input_length=160,
+ max_decoding_length=256,
+ beam_size=5,
+ )
+ translations = [" ".join(x.hypotheses[0]) for x in translations]
+ return translations
+
+ def fairseq_translate_lines(self, lines: List[str]) -> List[str]:
+ return self.translator.translate(lines)
+
+ def paragraphs_batch_translate__multilingual(self, batch_payloads: List[tuple]) -> List[str]:
+ """
+ Translates a batch of input paragraphs (including pre/post processing)
+ from any language to any language.
+
+ Args:
+ batch_payloads (List[tuple]): batch of long input-texts to be translated, each in format: (paragraph, src_lang, tgt_lang)
+
+ Returns:
+ List[str]: batch of paragraph-translations in the respective languages.
+ """
+ paragraph_id_to_sentence_range = []
+ global__sents = []
+ global__preprocessed_sents = []
+ global__preprocessed_sents_placeholder_entity_map = []
+
+ for i in range(len(batch_payloads)):
+ paragraph, src_lang, tgt_lang = batch_payloads[i]
+ if self.input_lang_code_format == "iso":
+ src_lang, tgt_lang = iso_to_flores[src_lang], iso_to_flores[tgt_lang]
+
+ batch = split_sentences(paragraph, src_lang)
+ global__sents.extend(batch)
+
+ preprocessed_sents, placeholder_entity_map_sents = self.preprocess_batch(
+ batch, src_lang, tgt_lang
+ )
+
+ global_sentence_start_index = len(global__preprocessed_sents)
+ global__preprocessed_sents.extend(preprocessed_sents)
+ global__preprocessed_sents_placeholder_entity_map.extend(placeholder_entity_map_sents)
+ paragraph_id_to_sentence_range.append(
+ (global_sentence_start_index, len(global__preprocessed_sents))
+ )
+
+ translations = self.translate_lines(global__preprocessed_sents)
+
+ translated_paragraphs = []
+ for paragraph_id, sentence_range in enumerate(paragraph_id_to_sentence_range):
+ tgt_lang = batch_payloads[paragraph_id][2]
+ if self.input_lang_code_format == "iso":
+ tgt_lang = iso_to_flores[tgt_lang]
+
+ postprocessed_sents = self.postprocess(
+ translations[sentence_range[0] : sentence_range[1]],
+ global__preprocessed_sents_placeholder_entity_map[
+ sentence_range[0] : sentence_range[1]
+ ],
+ tgt_lang,
+ )
+ translated_paragraph = " ".join(postprocessed_sents)
+ translated_paragraphs.append(translated_paragraph)
+
+ return translated_paragraphs
+
+ # translate a batch of sentences from src_lang to tgt_lang
+ def batch_translate(self, batch: List[str], src_lang: str, tgt_lang: str) -> List[str]:
+ """
+ Translates a batch of input sentences (including pre/post processing)
+ from source language to target language.
+
+ Args:
+ batch (List[str]): batch of input sentences to be translated.
+ src_lang (str): flores source language code.
+ tgt_lang (str): flores target language code.
+
+ Returns:
+ List[str]: batch of translated-sentences generated by the model.
+ """
+
+ assert isinstance(batch, list)
+
+ if self.input_lang_code_format == "iso":
+ src_lang, tgt_lang = iso_to_flores[src_lang], iso_to_flores[tgt_lang]
+
+ preprocessed_sents, placeholder_entity_map_sents = self.preprocess_batch(
+ batch, src_lang, tgt_lang
+ )
+ translations = self.translate_lines(preprocessed_sents)
+ return self.postprocess(translations, placeholder_entity_map_sents, tgt_lang)
+
+ # translate a paragraph from src_lang to tgt_lang
+ def translate_paragraph(self, paragraph: str, src_lang: str, tgt_lang: str) -> str:
+ """
+ Translates an input text paragraph (including pre/post processing)
+ from source language to target language.
+
+ Args:
+ paragraph (str): input text paragraph to be translated.
+ src_lang (str): flores source language code.
+ tgt_lang (str): flores target language code.
+
+ Returns:
+ str: paragraph translation generated by the model.
+ """
+
+ assert isinstance(paragraph, str)
+
+ if self.input_lang_code_format == "iso":
+ flores_src_lang = iso_to_flores[src_lang]
+ else:
+ flores_src_lang = src_lang
+
+ sents = split_sentences(paragraph, flores_src_lang)
+ postprocessed_sents = self.batch_translate(sents, src_lang, tgt_lang)
+ translated_paragraph = " ".join(postprocessed_sents)
+
+ return translated_paragraph
+
+ def preprocess_batch(self, batch: List[str], src_lang: str, tgt_lang: str) -> List[str]:
+ """
+ Preprocess an array of sentences by normalizing, tokenization, and possibly transliterating it. It also tokenizes the
+ normalized text sequences using sentence piece tokenizer and also adds language tags.
+
+ Args:
+ batch (List[str]): input list of sentences to preprocess.
+ src_lang (str): flores language code of the input text sentences.
+ tgt_lang (str): flores language code of the output text sentences.
+
+ Returns:
+ Tuple[List[str], List[Dict]]: a tuple of list of preprocessed input text sentences and also a corresponding list of dictionary
+ mapping placeholders to their original values.
+ """
+ preprocessed_sents, placeholder_entity_map_sents = self.preprocess(batch, lang=src_lang)
+ tokenized_sents = self.apply_spm(preprocessed_sents)
+ tokenized_sents, placeholder_entity_map_sents = truncate_long_sentences(
+ tokenized_sents, placeholder_entity_map_sents
+ )
+ tagged_sents = apply_lang_tags(tokenized_sents, src_lang, tgt_lang)
+ return tagged_sents, placeholder_entity_map_sents
+
+ def apply_spm(self, sents: List[str]) -> List[str]:
+ """
+ Applies sentence piece encoding to the batch of input sentences.
+
+ Args:
+ sents (List[str]): batch of the input sentences.
+
+ Returns:
+ List[str]: batch of encoded sentences with sentence piece model
+ """
+ return [" ".join(self.sp_src.encode(sent, out_type=str)) for sent in sents]
+
+ def preprocess_sent(
+ self,
+ sent: str,
+ normalizer: Union[MosesPunctNormalizer, indic_normalize.IndicNormalizerFactory],
+ lang: str,
+ ) -> Tuple[str, Dict]:
+ """
+ Preprocess an input text sentence by normalizing, tokenization, and possibly transliterating it.
+
+ Args:
+ sent (str): input text sentence to preprocess.
+ normalizer (Union[MosesPunctNormalizer, indic_normalize.IndicNormalizerFactory]): an object that performs normalization on the text.
+ lang (str): flores language code of the input text sentence.
+
+ Returns:
+ Tuple[str, Dict]: A tuple containing the preprocessed input text sentence and a corresponding dictionary
+ mapping placeholders to their original values.
+ """
+ iso_lang = flores_codes[lang]
+ sent = punc_norm(sent, iso_lang)
+ sent, placeholder_entity_map = normalize(sent)
+
+ transliterate = True
+ if lang.split("_")[1] in ["Arab", "Aran", "Olck", "Mtei", "Latn"]:
+ transliterate = False
+
+ if iso_lang == "en":
+ processed_sent = " ".join(
+ self.en_tok.tokenize(self.en_normalizer.normalize(sent.strip()), escape=False)
+ )
+ elif transliterate:
+ # transliterates from the any specific language to devanagari
+ # which is why we specify lang2_code as "hi".
+ processed_sent = self.xliterator.transliterate(
+ " ".join(
+ indic_tokenize.trivial_tokenize(normalizer.normalize(sent.strip()), iso_lang)
+ ),
+ iso_lang,
+ "hi",
+ ).replace(" ् ", "्")
+ else:
+ # we only need to transliterate for joint training
+ processed_sent = " ".join(
+ indic_tokenize.trivial_tokenize(normalizer.normalize(sent.strip()), iso_lang)
+ )
+
+ return processed_sent, placeholder_entity_map
+
+ def preprocess(self, sents: List[str], lang: str):
+ """
+ Preprocess an array of sentences by normalizing, tokenization, and possibly transliterating it.
+
+ Args:
+ batch (List[str]): input list of sentences to preprocess.
+ lang (str): flores language code of the input text sentences.
+
+ Returns:
+ Tuple[List[str], List[Dict]]: a tuple of list of preprocessed input text sentences and also a corresponding list of dictionary
+ mapping placeholders to their original values.
+ """
+ processed_sents, placeholder_entity_map_sents = [], []
+
+ if lang == "eng_Latn":
+ normalizer = None
+ else:
+ normfactory = indic_normalize.IndicNormalizerFactory()
+ normalizer = normfactory.get_normalizer(flores_codes[lang])
+
+ for sent in sents:
+ sent, placeholder_entity_map = self.preprocess_sent(sent, normalizer, lang)
+ processed_sents.append(sent)
+ placeholder_entity_map_sents.append(placeholder_entity_map)
+
+ return processed_sents, placeholder_entity_map_sents
+
+ def postprocess(
+ self,
+ sents: List[str],
+ placeholder_entity_map: List[Dict],
+ lang: str,
+ common_lang: str = "hin_Deva",
+ ) -> List[str]:
+ """
+ Postprocesses a batch of input sentences after the translation generations.
+
+ Args:
+ sents (List[str]): batch of translated sentences to postprocess.
+ placeholder_entity_map (List[Dict]): dictionary mapping placeholders to the original entity values.
+ lang (str): flores language code of the input sentences.
+ common_lang (str, optional): flores language code of the transliterated language (defaults: hin_Deva).
+
+ Returns:
+ List[str]: postprocessed batch of input sentences.
+ """
+
+ lang_code, script_code = lang.split("_")
+ # SPM decode
+ for i in range(len(sents)):
+ # sent_tokens = sents[i].split(" ")
+ # sents[i] = self.sp_tgt.decode(sent_tokens)
+
+ sents[i] = sents[i].replace(" ", "").replace("▁", " ").strip()
+
+ # Fixes for Perso-Arabic scripts
+ # TODO: Move these normalizations inside indic-nlp-library
+ if script_code in {"Arab", "Aran"}:
+ # UrduHack adds space before punctuations. Since the model was trained without fixing this issue, let's fix it now
+ sents[i] = sents[i].replace(" ؟", "؟").replace(" ۔", "۔").replace(" ،", "،")
+ # Kashmiri bugfix for palatalization: https://github.com/AI4Bharat/IndicTrans2/issues/11
+ sents[i] = sents[i].replace("ٮ۪", "ؠ")
+
+ assert len(sents) == len(placeholder_entity_map)
+
+ for i in range(0, len(sents)):
+ for key in placeholder_entity_map[i].keys():
+ sents[i] = sents[i].replace(key, placeholder_entity_map[i][key])
+
+ # Detokenize and transliterate to native scripts if applicable
+ postprocessed_sents = []
+
+ if lang == "eng_Latn":
+ for sent in sents:
+ postprocessed_sents.append(self.en_detok.detokenize(sent.split(" ")))
+ else:
+ for sent in sents:
+ outstr = indic_detokenize.trivial_detokenize(
+ self.xliterator.transliterate(
+ sent, flores_codes[common_lang], flores_codes[lang]
+ ),
+ flores_codes[lang],
+ )
+
+ # Oriya bug: indic-nlp-library produces ଯ଼ instead of ୟ when converting from Devanagari to Odia
+ # TODO: Find out what's the issue with unicode transliterator for Oriya and fix it
+ if lang_code == "ory":
+ outstr = outstr.replace("ଯ଼", 'ୟ')
+
+ postprocessed_sents.append(outstr)
+
+ return postprocessed_sents
diff --git a/IndicTrans2/inference/flores_codes_map_indic.py b/IndicTrans2/inference/flores_codes_map_indic.py
new file mode 100644
index 0000000000000000000000000000000000000000..cec76c53720a82d181786399f05b87d5461e4a36
--- /dev/null
+++ b/IndicTrans2/inference/flores_codes_map_indic.py
@@ -0,0 +1,83 @@
+"""
+FLORES language code mapping to 2 letter ISO language code for compatibility
+with Indic NLP Library (https://github.com/anoopkunchukuttan/indic_nlp_library)
+"""
+flores_codes = {
+ "asm_Beng": "as",
+ "awa_Deva": "hi",
+ "ben_Beng": "bn",
+ "bho_Deva": "hi",
+ "brx_Deva": "hi",
+ "doi_Deva": "hi",
+ "eng_Latn": "en",
+ "gom_Deva": "kK",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "hne_Deva": "hi",
+ "kan_Knda": "kn",
+ "kas_Arab": "ur",
+ "kas_Deva": "hi",
+ "kha_Latn": "en",
+ "lus_Latn": "en",
+ "mag_Deva": "hi",
+ "mai_Deva": "hi",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "bn",
+ "mni_Mtei": "hi",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "hi",
+ "sat_Olck": "or",
+ "snd_Arab": "ur",
+ "snd_Deva": "hi",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+}
+
+
+flores_to_iso = {
+ "asm_Beng": "as",
+ "awa_Deva": "awa",
+ "ben_Beng": "bn",
+ "bho_Deva": "bho",
+ "brx_Deva": "brx",
+ "doi_Deva": "doi",
+ "eng_Latn": "en",
+ "gom_Deva": "gom",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "hne_Deva": "hne",
+ "kan_Knda": "kn",
+ "kas_Arab": "ksa",
+ "kas_Deva": "ksd",
+ "kha_Latn": "kha",
+ "lus_Latn": "lus",
+ "mag_Deva": "mag",
+ "mai_Deva": "mai",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "mnib",
+ "mni_Mtei": "mnim",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "sa",
+ "sat_Olck": "sat",
+ "snd_Arab": "sda",
+ "snd_Deva": "sdd",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+}
+
+iso_to_flores = {iso_code: flores_code for flores_code, iso_code in flores_to_iso.items()}
+# Patch for digraphic langs.
+iso_to_flores["ks"] = "kas_Arab"
+iso_to_flores["ks_Deva"] = "kas_Deva"
+iso_to_flores["mni"] = "mni_Mtei"
+iso_to_flores["mni_Beng"] = "mni_Beng"
+iso_to_flores["sd"] = "snd_Arab"
+iso_to_flores["sd_Deva"] = "snd_Deva"
diff --git a/IndicTrans2/inference/indic_num_map.py b/IndicTrans2/inference/indic_num_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd339a7d90eb41eab0605f15f7ea9339641cb465
--- /dev/null
+++ b/IndicTrans2/inference/indic_num_map.py
@@ -0,0 +1,117 @@
+"""
+A dictionary mapping intended to normalize the numerals in Indic languages from
+native script to Roman script. This is done to ensure that the figures / numbers
+mentioned in native script are perfectly preserved during translation.
+"""
+INDIC_NUM_MAP = {
+ "\u09e6": "0",
+ "0": "0",
+ "\u0ae6": "0",
+ "\u0ce6": "0",
+ "\u0966": "0",
+ "\u0660": "0",
+ "\uabf0": "0",
+ "\u0b66": "0",
+ "\u0a66": "0",
+ "\u1c50": "0",
+ "\u06f0": "0",
+ "\u09e7": "1",
+ "1": "1",
+ "\u0ae7": "1",
+ "\u0967": "1",
+ "\u0ce7": "1",
+ "\u06f1": "1",
+ "\uabf1": "1",
+ "\u0b67": "1",
+ "\u0a67": "1",
+ "\u1c51": "1",
+ "\u0c67": "1",
+ "\u09e8": "2",
+ "2": "2",
+ "\u0ae8": "2",
+ "\u0968": "2",
+ "\u0ce8": "2",
+ "\u06f2": "2",
+ "\uabf2": "2",
+ "\u0b68": "2",
+ "\u0a68": "2",
+ "\u1c52": "2",
+ "\u0c68": "2",
+ "\u09e9": "3",
+ "3": "3",
+ "\u0ae9": "3",
+ "\u0969": "3",
+ "\u0ce9": "3",
+ "\u06f3": "3",
+ "\uabf3": "3",
+ "\u0b69": "3",
+ "\u0a69": "3",
+ "\u1c53": "3",
+ "\u0c69": "3",
+ "\u09ea": "4",
+ "4": "4",
+ "\u0aea": "4",
+ "\u096a": "4",
+ "\u0cea": "4",
+ "\u06f4": "4",
+ "\uabf4": "4",
+ "\u0b6a": "4",
+ "\u0a6a": "4",
+ "\u1c54": "4",
+ "\u0c6a": "4",
+ "\u09eb": "5",
+ "5": "5",
+ "\u0aeb": "5",
+ "\u096b": "5",
+ "\u0ceb": "5",
+ "\u06f5": "5",
+ "\uabf5": "5",
+ "\u0b6b": "5",
+ "\u0a6b": "5",
+ "\u1c55": "5",
+ "\u0c6b": "5",
+ "\u09ec": "6",
+ "6": "6",
+ "\u0aec": "6",
+ "\u096c": "6",
+ "\u0cec": "6",
+ "\u06f6": "6",
+ "\uabf6": "6",
+ "\u0b6c": "6",
+ "\u0a6c": "6",
+ "\u1c56": "6",
+ "\u0c6c": "6",
+ "\u09ed": "7",
+ "7": "7",
+ "\u0aed": "7",
+ "\u096d": "7",
+ "\u0ced": "7",
+ "\u06f7": "7",
+ "\uabf7": "7",
+ "\u0b6d": "7",
+ "\u0a6d": "7",
+ "\u1c57": "7",
+ "\u0c6d": "7",
+ "\u09ee": "8",
+ "8": "8",
+ "\u0aee": "8",
+ "\u096e": "8",
+ "\u0cee": "8",
+ "\u06f8": "8",
+ "\uabf8": "8",
+ "\u0b6e": "8",
+ "\u0a6e": "8",
+ "\u1c58": "8",
+ "\u0c6e": "8",
+ "\u09ef": "9",
+ "9": "9",
+ "\u0aef": "9",
+ "\u096f": "9",
+ "\u0cef": "9",
+ "\u06f9": "9",
+ "\uabf9": "9",
+ "\u0b6f": "9",
+ "\u0a6f": "9",
+ "\u1c59": "9",
+ "\u0c6f": "9",
+}
diff --git a/IndicTrans2/inference/model_configs/__init__.py b/IndicTrans2/inference/model_configs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ec41f7daeb7930e9df766abdd790c4c5b09b6d9
--- /dev/null
+++ b/IndicTrans2/inference/model_configs/__init__.py
@@ -0,0 +1 @@
+from . import custom_transformer
\ No newline at end of file
diff --git a/IndicTrans2/inference/model_configs/custom_transformer.py b/IndicTrans2/inference/model_configs/custom_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..efc69aebf70fe6db73b8a75a083c11b0dcf1dc71
--- /dev/null
+++ b/IndicTrans2/inference/model_configs/custom_transformer.py
@@ -0,0 +1,82 @@
+from fairseq.models import register_model_architecture
+from fairseq.models.transformer import base_architecture
+
+
+@register_model_architecture("transformer", "transformer_2x")
+def transformer_big(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_4x")
+def transformer_huge(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1536)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1536)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_9x")
+def transformer_xlarge(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 2048)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 8192)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 2048)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8192)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_12e12d_9xeq")
+def transformer_vxlarge(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1536)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1536)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ args.encoder_layers = getattr(args, "encoder_layers", 12)
+ args.decoder_layers = getattr(args, "decoder_layers", 12)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_18_18")
+def transformer_deep(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 8 * 1024)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+ args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8 * 1024)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ args.encoder_layers = getattr(args, "encoder_layers", 18)
+ args.decoder_layers = getattr(args, "decoder_layers", 18)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_24_24")
+def transformer_xdeep(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 8 * 1024)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+ args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8 * 1024)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ args.encoder_layers = getattr(args, "encoder_layers", 24)
+ args.decoder_layers = getattr(args, "decoder_layers", 24)
+ base_architecture(args)
diff --git a/IndicTrans2/inference/normalize-punctuation.perl b/IndicTrans2/inference/normalize-punctuation.perl
new file mode 100644
index 0000000000000000000000000000000000000000..d473fa3b7ba80a1a761217d3bf1255583f06e61c
--- /dev/null
+++ b/IndicTrans2/inference/normalize-punctuation.perl
@@ -0,0 +1,90 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses. Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use warnings;
+use strict;
+
+my $language = "en";
+my $PENN = 0;
+
+while (@ARGV) {
+ $_ = shift;
+ /^-b$/ && ($| = 1, next); # not buffered (flush each line)
+ /^-l$/ && ($language = shift, next);
+ /^[^\-]/ && ($language = $_, next);
+ /^-penn$/ && ($PENN = 1, next);
+}
+
+while() {
+ s/\r//g;
+ # remove extra spaces
+ s/\(/ \(/g;
+ s/\)/\) /g; s/ +/ /g;
+ s/\) ([\.\!\:\?\;\,])/\)$1/g;
+ s/\( /\(/g;
+ s/ \)/\)/g;
+ s/(\d) \%/$1\%/g;
+ s/ :/:/g;
+ s/ ;/;/g;
+ # normalize unicode punctuation
+ if ($PENN == 0) {
+ s/\`/\'/g;
+ s/\'\'/ \" /g;
+ }
+
+ s/„/\"/g;
+ s/“/\"/g;
+ s/”/\"/g;
+ s/–/-/g;
+ s/—/ - /g; s/ +/ /g;
+ s/´/\'/g;
+ s/([a-z])‘([a-z])/$1\'$2/gi;
+ s/([a-z])’([a-z])/$1\'$2/gi;
+ s/‘/\'/g;
+ s/‚/\'/g;
+ s/’/\"/g;
+ s/''/\"/g;
+ s/´´/\"/g;
+ s/…/.../g;
+ # French quotes
+ s/ « / \"/g;
+ s/« /\"/g;
+ s/«/\"/g;
+ s/ » /\" /g;
+ s/ »/\"/g;
+ s/»/\"/g;
+ # handle pseudo-spaces
+ s/ \%/\%/g;
+ s/nº /nº /g;
+ s/ :/:/g;
+ s/ ºC/ ºC/g;
+ s/ cm/ cm/g;
+ s/ \?/\?/g;
+ s/ \!/\!/g;
+ s/ ;/;/g;
+ s/, /, /g; s/ +/ /g;
+
+ # English "quotation," followed by comma, style
+ if ($language eq "en") {
+ s/\"([,\.]+)/$1\"/g;
+ }
+ # Czech is confused
+ elsif ($language eq "cs" || $language eq "cz") {
+ }
+ # German/Spanish/French "quotation", followed by comma, style
+ else {
+ s/,\"/\",/g;
+ s/(\.+)\"(\s*[^<])/\"$1$2/g; # don't fix period at end of sentence
+ }
+
+
+ if ($language eq "de" || $language eq "es" || $language eq "cz" || $language eq "cs" || $language eq "fr") {
+ s/(\d) (\d)/$1,$2/g;
+ }
+ else {
+ s/(\d) (\d)/$1.$2/g;
+ }
+ print $_;
+}
diff --git a/IndicTrans2/inference/normalize_punctuation.py b/IndicTrans2/inference/normalize_punctuation.py
new file mode 100644
index 0000000000000000000000000000000000000000..710df2ca33ced801efda5713b78f91ccc1699b50
--- /dev/null
+++ b/IndicTrans2/inference/normalize_punctuation.py
@@ -0,0 +1,60 @@
+# IMPORTANT NOTE: DO NOT DIRECTLY EDIT THIS FILE
+# This file was manually ported from `normalize-punctuation.perl`
+# TODO: Only supports English, add others
+
+import regex as re
+multispace_regex = re.compile("[ ]{2,}")
+multidots_regex = re.compile(r"\.{2,}")
+end_bracket_space_punc_regex = re.compile(r"\) ([\.!:?;,])")
+digit_space_percent = re.compile(r"(\d) %")
+double_quot_punc = re.compile(r"\"([,\.]+)")
+digit_nbsp_digit = re.compile(r"(\d) (\d)")
+
+def punc_norm(text, lang="en"):
+ text = text.replace('\r', '') \
+ .replace('(', " (") \
+ .replace(')', ") ") \
+ \
+ .replace("( ", "(") \
+ .replace(" )", ")") \
+ \
+ .replace(" :", ':') \
+ .replace(" ;", ';') \
+ .replace('`', "'") \
+ \
+ .replace('„', '"') \
+ .replace('“', '"') \
+ .replace('”', '"') \
+ .replace('–', '-') \
+ .replace('—', " - ") \
+ .replace('´', "'") \
+ .replace('‘', "'") \
+ .replace('‚', "'") \
+ .replace('’', "'") \
+ .replace("''", "\"") \
+ .replace("´´", '"') \
+ .replace('…', "...") \
+ .replace(" « ", " \"") \
+ .replace("« ", '"') \
+ .replace('«', '"') \
+ .replace(" » ", "\" ") \
+ .replace(" »", '"') \
+ .replace('»', '"') \
+ .replace(" %", '%') \
+ .replace("nº ", "nº ") \
+ .replace(" :", ':') \
+ .replace(" ºC", " ºC") \
+ .replace(" cm", " cm") \
+ .replace(" ?", '?') \
+ .replace(" !", '!') \
+ .replace(" ;", ';') \
+ .replace(", ", ", ") \
+
+
+ text = multispace_regex.sub(' ', text)
+ text = multidots_regex.sub('.', text)
+ text = end_bracket_space_punc_regex.sub(r")\1", text)
+ text = digit_space_percent.sub(r"\1%", text)
+ text = double_quot_punc.sub(r'\1"', text) # English "quotation," followed by comma, style
+ text = digit_nbsp_digit.sub(r"\1.\2", text) # What does it mean?
+ return text.strip(' ')
\ No newline at end of file
diff --git a/IndicTrans2/inference/normalize_punctuation.sh b/IndicTrans2/inference/normalize_punctuation.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d927e1051056f255e1f16b3e594bd794ce381450
--- /dev/null
+++ b/IndicTrans2/inference/normalize_punctuation.sh
@@ -0,0 +1,33 @@
+set -euo pipefail
+
+root=$(dirname $0)
+
+lang_map_path=$root/utils.map_token_lang.tsv
+
+usage () {
+ echo "usage: $0 lang" >&2
+ exit 1
+}
+
+[ $# -eq 1 ] || usage
+
+lang=$1
+
+declare -A lang_map
+
+while read line; do
+ key=$(cut -f1 <<< "$line")
+ val=$(cut -f2 <<< "$line")
+ lang_map[$key]=$val
+done < $lang_map_path
+
+if [ -v "lang_map[$lang]" ]; then
+ lang=${lang_map[$lang]}
+elif [ -v "lang_map[${lang:0:3}]" ]; then
+ lang=${lang_map[${lang:0:3}]}
+else
+ echo "undefined mapping: ${lang}, falling back to: en" >&2
+ lang=en
+fi
+
+perl $root/normalize-punctuation.perl $lang
diff --git a/IndicTrans2/inference/normalize_regex_inference.py b/IndicTrans2/inference/normalize_regex_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3a55a3be6a6967219b5213258a6ce27656a88ca
--- /dev/null
+++ b/IndicTrans2/inference/normalize_regex_inference.py
@@ -0,0 +1,105 @@
+from typing import Tuple
+import regex as re
+import sys
+from tqdm import tqdm
+from .indic_num_map import INDIC_NUM_MAP
+
+
+URL_PATTERN = r'\b(? Tuple[str, dict]:
+ """
+ Wraps substrings with matched patterns in the given text with placeholders and returns
+ the modified text along with a mapping of the placeholders to their original value.
+
+ Args:
+ text (str): an input string which needs to be wrapped with the placeholders.
+ pattern (list): list of patterns to search for in the input string.
+
+ Returns:
+ Tuple[str, dict]: a tuple containing the modified text and a dictionary mapping
+ placeholders to their original values.
+ """
+ serial_no = 1
+
+ placeholder_entity_map = dict()
+
+ for pattern in patterns:
+ matches = set(re.findall(pattern, text))
+
+ # wrap common match with placeholder tags
+ for match in matches:
+ if pattern==URL_PATTERN :
+ #Avoids false positive URL matches for names with initials.
+ temp = match.replace(".",'')
+ if len(temp)<4:
+ continue
+ if pattern==NUMERAL_PATTERN :
+ #Short numeral patterns do not need placeholder based handling.
+ temp = match.replace(" ",'').replace(".",'').replace(":",'')
+ if len(temp)<4:
+ continue
+
+ #Set of Translations of "ID" in all the suppported languages have been collated.
+ #This has been added to deal with edge cases where placeholders might get translated.
+ indic_failure_cases = ['آی ڈی ', 'ꯑꯥꯏꯗꯤ', 'आईडी', 'आई . डी . ', 'ऐटि', 'آئی ڈی ', 'ᱟᱭᱰᱤ ᱾', 'आयडी', 'ऐडि', 'आइडि']
+ placeholder = "".format(serial_no)
+ alternate_placeholder = "< ID{} >".format(serial_no)
+ placeholder_entity_map[placeholder] = match
+ placeholder_entity_map[alternate_placeholder] = match
+
+ for i in indic_failure_cases:
+ placeholder_temp = "<{}{}>".format(i,serial_no)
+ placeholder_entity_map[placeholder_temp] = match
+ placeholder_temp = "< {}{} >".format(i, serial_no)
+ placeholder_entity_map[placeholder_temp] = match
+ placeholder_temp = "< {} {} >".format(i, serial_no)
+ placeholder_entity_map[placeholder_temp] = match
+
+ text = text.replace(match, placeholder)
+ serial_no+=1
+
+ text = re.sub("\s+", " ", text)
+
+ #Regex has failure cases in trailing "/" in URLs, so this is a workaround.
+ text = text.replace(">/",">")
+
+ return text, placeholder_entity_map
+
+
+def normalize(text: str, patterns: list = [EMAIL_PATTERN, URL_PATTERN, NUMERAL_PATTERN, OTHER_PATTERN]) -> Tuple[str, dict]:
+ """
+ Normalizes and wraps the spans of input string with placeholder tags. It first normalizes
+ the Indic numerals in the input string to Roman script. Later, it uses the input string with normalized
+ Indic numerals to wrap the spans of text matching the pattern with placeholder tags.
+
+ Args:
+ text (str): input string.
+ pattern (list): list of patterns to search for in the input string.
+
+ Returns:
+ Tuple[str, dict]: a tuple containing the modified text and a dictionary mapping
+ placeholders to their original values.
+ """
+ text = normalize_indic_numerals(text.strip("\n"))
+ text, placeholder_entity_map = wrap_with_placeholders(text, patterns)
+ return text, placeholder_entity_map
diff --git a/IndicTrans2/inference/requirements.txt b/IndicTrans2/inference/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aba6fadcede40df4ba255c169c108eda5ec68dc9
--- /dev/null
+++ b/IndicTrans2/inference/requirements.txt
@@ -0,0 +1,11 @@
+git+https://github.com/anoopkunchukuttan/indic_nlp_library
+git+https://github.com/pytorch/fairseq
+sacremoses
+pandas
+mock
+nltk
+sacrebleu
+urduhack[tf]
+mosestokenizer
+ctranslate2
+sentencepiece
\ No newline at end of file
diff --git a/IndicTrans2/inference/triton_server/Dockerfile b/IndicTrans2/inference/triton_server/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..48f5ec6456ff6946567888ccb60211c547fee402
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/Dockerfile
@@ -0,0 +1,25 @@
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:22.12-py3
+FROM ${BASE_IMAGE}
+
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONIOENCODING=utf8
+
+WORKDIR /home
+
+WORKDIR /home/indicTrans2
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+COPY download.py .
+RUN python3 download.py
+
+COPY . ./inference
+
+WORKDIR /home/
+COPY ./triton_server/triton_repo ./triton_repo
+
+CMD ["tritonserver", "--model-repository=/home/triton_repo", "--log-verbose=2", "--strict-model-config=false", "--http-port=8000", "--grpc-port=8001", "--metrics-port=8002"]
+EXPOSE 8000
+EXPOSE 8001
+EXPOSE 8002
diff --git a/IndicTrans2/inference/triton_server/README.md b/IndicTrans2/inference/triton_server/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..162d9606615ad1240976d0895be54714ea3e5d20
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/README.md
@@ -0,0 +1,22 @@
+# Triton server
+
+## Building the image
+
+```
+cd indicTrans2/inference/
+docker build -f triton_server/Dockerfile -t indictrans2_triton .
+```
+
+## Running the container
+
+Place the `en-indic` and `indic-en` checkpoint folders into `indicTrans2/checkpoints` directory
+
+Then start the server by:
+```
+docker run --shm-size=256m --gpus=1 --rm -v ${PWD}/../checkpoints/:/models/checkpoints -p 8000:8000 -t indictrans2_triton
+```
+
+## Sample client
+
+- Do `pip install tritonclient[all] gevent` first.
+- Then `python3 triton_server/client.py`
diff --git a/IndicTrans2/inference/triton_server/azure_ml/README.md b/IndicTrans2/inference/triton_server/azure_ml/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..570bf076da15e3d4d501a48989963e103431b9a9
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/azure_ml/README.md
@@ -0,0 +1,56 @@
+# Deployment on Azure Machine Learning
+
+## Pre-requisites
+
+```
+cd inference/triton_server
+```
+
+Set the environment for AML:
+```
+export RESOURCE_GROUP=Dhruva-prod
+export WORKSPACE_NAME=dhruva--central-india
+export DOCKER_REGISTRY=dhruvaprod
+```
+
+Also remember to edit the `yml` files accordingly.
+
+## Registering the model
+
+```
+az ml model create --file azure_ml/model.yml --resource-group $RESOURCE_GROUP --workspace-name $WORKSPACE_NAME
+```
+
+## Pushing the docker image to Container Registry
+
+```
+az acr login --name $DOCKER_REGISTRY
+docker tag indictrans2_triton $DOCKER_REGISTRY.azurecr.io/nmt/triton-indictrans-v2:latest
+docker push $DOCKER_REGISTRY.azurecr.io/nmt/triton-indictrans-v2:latest
+```
+
+## Creating the execution environment
+
+```
+az ml environment create -f azure_ml/environment.yml -g $RESOURCE_GROUP -w $WORKSPACE_NAME
+```
+
+## Publishing the endpoint for online inference
+
+```
+az ml online-endpoint create -f azure_ml/endpoint.yml -g $RESOURCE_GROUP -w $WORKSPACE_NAME
+```
+
+Now from the Azure Portal, open the Container Registry, and grant ACR_PULL permission for the above endpoint, so that it is allowed to download the docker image.
+
+## Attaching a deployment
+
+```
+az ml online-deployment create -f azure_ml/deployment.yml --all-traffic -g $RESOURCE_GROUP -w $WORKSPACE_NAME
+```
+
+## Testing if inference works
+
+1. From Azure ML Studio, go to the "Consume" tab, and get the endpoint domain (without `https://` or trailing `/`) and an authentication key.
+2. In `client.py`, enable `ENABLE_SSL = True`, and then set the `ENDPOINT_URL` variable as well as `Authorization` value inside `HTTP_HEADERS`.
+3. Run `python3 client.py`
diff --git a/IndicTrans2/inference/triton_server/azure_ml/deployment.yml b/IndicTrans2/inference/triton_server/azure_ml/deployment.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bf3b8490e1eaf1b1168513aab19c008c51746656
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/azure_ml/deployment.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: ai4b-indictransv2--t4-piv--gpu
+endpoint_name: ai4b-indictransv2--t4
+model: azureml:indictrans-v2--models:1
+model_mount_path: /models
+environment: azureml:triton-indictrans-v2-env:1
+instance_type: Standard_NC4as_T4_v3
+instance_count: 1
+request_settings:
+ request_timeout_ms: 90000
+ max_concurrent_requests_per_instance: 100
+ max_queue_wait_ms: 2000
+app_insights_enabled: true
diff --git a/IndicTrans2/inference/triton_server/azure_ml/endpoint.yml b/IndicTrans2/inference/triton_server/azure_ml/endpoint.yml
new file mode 100644
index 0000000000000000000000000000000000000000..43dbf572ebf641f5f07c73ec0a408e46edad8b28
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/azure_ml/endpoint.yml
@@ -0,0 +1,3 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
+name: ai4b-indictransv2--t4
+auth_mode: key
diff --git a/IndicTrans2/inference/triton_server/azure_ml/environment.yml b/IndicTrans2/inference/triton_server/azure_ml/environment.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ed19ef7ada282243e0a986df8d0bec694cb29655
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/azure_ml/environment.yml
@@ -0,0 +1,14 @@
+$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
+name: triton-indictrans-v2-env
+image: dhruvaprod.azurecr.io/nmt/triton-indictrans-v2:latest
+version: 1
+inference_config:
+ liveness_route:
+ path: /v2/health/live
+ port: 8000
+ readiness_route:
+ path: /v2/health/ready
+ port: 8000
+ scoring_route:
+ path: /
+ port: 8000
diff --git a/IndicTrans2/inference/triton_server/azure_ml/model.yml b/IndicTrans2/inference/triton_server/azure_ml/model.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cce1d22e18cf1c1f96c5dbacf73711be9ec41839
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/azure_ml/model.yml
@@ -0,0 +1,5 @@
+$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
+name: indictrans-v2--models
+version: 1
+path: ../../../checkpoints
+type: triton_model
diff --git a/IndicTrans2/inference/triton_server/client.py b/IndicTrans2/inference/triton_server/client.py
new file mode 100644
index 0000000000000000000000000000000000000000..16f5cd78d8bd2dda835b1fd9fc674491c579211d
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/client.py
@@ -0,0 +1,55 @@
+import tritonclient.http as http_client
+from tritonclient.utils import *
+import numpy as np
+
+ENABLE_SSL = False
+ENDPOINT_URL = 'localhost:8000'
+HTTP_HEADERS = {"Authorization": "Bearer __PASTE_KEY_HERE__"}
+
+# Connect to the server
+if ENABLE_SSL:
+ import gevent.ssl
+ triton_http_client = http_client.InferenceServerClient(
+ url=ENDPOINT_URL, verbose=False,
+ ssl=True, ssl_context_factory=gevent.ssl._create_default_https_context,
+ )
+else:
+ triton_http_client = http_client.InferenceServerClient(
+ url=ENDPOINT_URL, verbose=False,
+ )
+
+print("Is server ready - {}".format(triton_http_client.is_server_ready(headers=HTTP_HEADERS)))
+
+def get_string_tensor(string_values, tensor_name):
+ string_obj = np.array(string_values, dtype="object")
+ input_obj = http_client.InferInput(tensor_name, string_obj.shape, np_to_triton_dtype(string_obj.dtype))
+ input_obj.set_data_from_numpy(string_obj)
+ return input_obj
+
+def get_translation_input_for_triton(texts: list, src_lang: str, tgt_lang: str):
+ return [
+ get_string_tensor([[text] for text in texts], "INPUT_TEXT"),
+ get_string_tensor([[src_lang]] * len(texts), "INPUT_LANGUAGE_ID"),
+ get_string_tensor([[tgt_lang]] * len(texts), "OUTPUT_LANGUAGE_ID"),
+ ]
+
+# Prepare input and output tensors
+input_sentences = ["Hello world, I am Ram and I am from Ayodhya.", "How are you Ravan bro?"]
+inputs = get_translation_input_for_triton(input_sentences, "en", "hi")
+output0 = http_client.InferRequestedOutput("OUTPUT_TEXT")
+
+# Send request
+response = triton_http_client.infer(
+ "nmt",
+ model_version='1',
+ inputs=inputs,
+ outputs=[output0],
+ headers=HTTP_HEADERS,
+)#.get_response()
+
+# Decode the response
+output_batch = response.as_numpy('OUTPUT_TEXT').tolist()
+for input_sentence, translation in zip(input_sentences, output_batch):
+ print()
+ print(input_sentence)
+ print(translation[0].decode("utf-8"))
diff --git a/IndicTrans2/inference/triton_server/dhruva/ulca_model.json b/IndicTrans2/inference/triton_server/dhruva/ulca_model.json
new file mode 100644
index 0000000000000000000000000000000000000000..1e500c6a122e76d14a80ad7c38c4934b643f5ce2
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/dhruva/ulca_model.json
@@ -0,0 +1,3949 @@
+{
+ "modelId": "ai4bharat/indictrans-v2-all-gpu",
+ "version": "v1",
+ "submittedOn": 1684249537000,
+ "updatedOn": 1684249537000,
+ "name": "AI4Bharat Translation (IndicTrans2) -- All directions on GPU",
+ "description": "IndicTranslate-v2 is a Transformer-based (~1.1B params) multilingual NMT model trained on datasets covering all 22 scheduled langauges of the Indian Republic.",
+ "refUrl": "https://github.com/AI4Bharat/indicTrans2",
+ "task": {
+ "type": "translation"
+ },
+ "languages": [
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "en",
+ "sourceScriptCode": "Latn",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "en",
+ "targetScriptCode": "Latn"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "as",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "bn",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "brx",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "doi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "gom",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "gu",
+ "sourceScriptCode": "Gujr",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "hi",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "kn",
+ "sourceScriptCode": "Knda",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "ks",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "mai",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "ml",
+ "sourceScriptCode": "Mlym",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Mtei",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "mni",
+ "sourceScriptCode": "Beng",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "mr",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "ne",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "or",
+ "sourceScriptCode": "Orya",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "pa",
+ "sourceScriptCode": "Guru",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "sa",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "sat",
+ "sourceScriptCode": "Olck",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Arab",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "sd",
+ "sourceScriptCode": "Deva",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ },
+ {
+ "sourceLanguage": "ta",
+ "sourceScriptCode": "Taml",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "te",
+ "sourceScriptCode": "Telu",
+ "targetLanguage": "ur",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "as",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "bn",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "brx",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "doi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "gom",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "gu",
+ "targetScriptCode": "Gujr"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "hi",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "kn",
+ "targetScriptCode": "Knda"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Aran"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ks",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mai",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ml",
+ "targetScriptCode": "Mlym"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Mtei"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mni",
+ "targetScriptCode": "Beng"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "mr",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ne",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "or",
+ "targetScriptCode": "Orya"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "pa",
+ "targetScriptCode": "Guru"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sa",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sat",
+ "targetScriptCode": "Olck"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Arab"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "sd",
+ "targetScriptCode": "Deva"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "ta",
+ "targetScriptCode": "Taml"
+ },
+ {
+ "sourceLanguage": "ur",
+ "sourceScriptCode": "Aran",
+ "targetLanguage": "te",
+ "targetScriptCode": "Telu"
+ }
+ ],
+ "license": "mit",
+ "domain": [
+ "general"
+ ],
+ "inferenceEndPoint": {
+ "schema": {
+ "request": {
+ "input": [
+ {
+ "source": "कैसे हैं आप?"
+ }
+ ],
+ "config": {
+ "language": {
+ "sourceLanguage": "hi",
+ "targetLanguage": "en"
+ }
+ }
+ },
+ "response": {
+ "output": [
+ {
+ "source": "कैसे हैं आप?",
+ "target": "How are you?"
+ }
+ ]
+ }
+ }
+ },
+ "submitter": {
+ "name": "AI4Bharat",
+ "aboutMe": "A non-profit, open-source community of engineers, domain experts, policy makers, and academicians collaborating to build AI solutions to solve India’s problems",
+ "team": [
+ {
+ "name": "Jay Gala",
+ "aboutMe": "AI Resident, AI4Bharat"
+ },
+ {
+ "name": "Pranjal Agadh Chitale",
+ "aboutMe": "MS Student, IITM"
+ },
+ {
+ "name": "Kumar Deepak",
+ "aboutMe": "Tarento, EkStep"
+ },
+ {
+ "name": "Raghavan AK",
+ "aboutMe": "IIT-Madras"
+ },
+ {
+ "name": "Sumanth Doddapaneni",
+ "aboutMe": "RBCDSAI, IITM"
+ },
+ {
+ "name": "Anoop Kunchukuttan",
+ "aboutMe": "IIT-Madras"
+ },
+ {
+ "name": "Mitesh Khapra",
+ "aboutMe": "IIT-Madras"
+ },
+ {
+ "name": "Pratyush Kumar",
+ "aboutMe": "AI4Bharat"
+ },
+ {
+ "name": "Raj Dabre",
+ "aboutMe": "NICT Japan"
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/IndicTrans2/inference/triton_server/triton_repo/nmt/1/model.py b/IndicTrans2/inference/triton_server/triton_repo/nmt/1/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..8076ae9ae2c4d0edb56d01c27ddc02994e9376ac
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/triton_repo/nmt/1/model.py
@@ -0,0 +1,167 @@
+import os
+import sys
+import json
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+PWD = os.path.dirname(__file__)
+
+INFERENCE_MODULE_DIR = "/home/indicTrans2/"
+sys.path.insert(0, INFERENCE_MODULE_DIR)
+from inference.engine import Model, iso_to_flores
+INDIC_LANGUAGES = set(iso_to_flores)
+
+ALLOWED_DIRECTION_STRINGS = {"en-indic", "indic-en", "indic-indic"}
+FORCE_PIVOTING = False
+DEFAULT_PIVOT_LANG = "en"
+
+class TritonPythonModel:
+ def initialize(self, args):
+ self.model_config = json.loads(args['model_config'])
+ self.model_instance_device_id = json.loads(args['model_instance_device_id'])
+ self.output_name = "OUTPUT_TEXT"
+ self.output_dtype = pb_utils.triton_string_to_numpy(
+ pb_utils.get_output_config_by_name(self.model_config, self.output_name)["data_type"])
+
+
+ # checkpoints_root_dir = os.path.join(PWD, "checkpoints")
+ checkpoints_root_dir = "/models/checkpoints"
+ checkpoint_folders = [ f.path for f in os.scandir(checkpoints_root_dir) if f.is_dir() ]
+ # The assumption is that, each folder name is `-to-`
+
+ if not checkpoint_folders:
+ raise RuntimeError(f"No checkpoint folders in: {checkpoints_root_dir}")
+
+ self.models = {}
+ for checkpoint_folder in checkpoint_folders:
+ direction_string = os.path.basename(checkpoint_folder)
+ assert direction_string in ALLOWED_DIRECTION_STRINGS, f"Checkpoint folder-name `{direction_string}` not allowed"
+ self.models[direction_string] = Model(os.path.join(checkpoint_folder, "ct2_fp16_model"), input_lang_code_format="iso", model_type="ctranslate2")
+ # self.models[direction_string] = Model(checkpoint_folder, input_lang_code_format="iso", model_type="fairseq")
+
+ self.pivot_lang = None
+ if "en-indic" in self.models and "indic-en" in self.models:
+ if "indic-indic" not in self.models:
+ self.pivot_lang = DEFAULT_PIVOT_LANG
+ elif FORCE_PIVOTING:
+ del self.models["indic-indic"]
+ self.pivot_lang = DEFAULT_PIVOT_LANG
+
+ def get_direction_string(self, input_language_id, output_language_id):
+ direction_string = None
+ if input_language_id == DEFAULT_PIVOT_LANG and output_language_id in INDIC_LANGUAGES:
+ direction_string = "en-indic"
+ elif input_language_id in INDIC_LANGUAGES:
+ if output_language_id == DEFAULT_PIVOT_LANG:
+ direction_string = "indic-en"
+ elif output_language_id in INDIC_LANGUAGES:
+ direction_string = "indic-indic"
+ return direction_string
+
+ def get_model(self, input_language_id, output_language_id):
+ direction_string = self.get_direction_string(input_language_id, output_language_id)
+
+ if direction_string in self.models:
+ return self.models[direction_string]
+ raise RuntimeError(f"Language-pair not supported: {input_language_id}-{output_language_id}")
+
+ def execute(self,requests):
+ # print("REQ_COUNT", len(requests))
+ modelwise_batches = {}
+ responses = []
+ for request_id, request in enumerate(requests):
+ input_text_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT").as_numpy()
+ input_language_id_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_LANGUAGE_ID").as_numpy()
+ output_language_id_batch = pb_utils.get_input_tensor_by_name(request, "OUTPUT_LANGUAGE_ID").as_numpy()
+
+ input_text_batch = [input_text[0].decode("utf-8", "ignore") for input_text in input_text_batch]
+ input_language_id_batch = [input_language_id[0].decode("utf-8", "ignore") for input_language_id in input_language_id_batch]
+ output_language_id_batch = [output_language_id[0].decode("utf-8", "ignore") for output_language_id in output_language_id_batch]
+
+ responses.append([['']] * len(input_text_batch))
+
+ for input_id, (input_text, input_language_id, output_language_id) in enumerate(zip(input_text_batch, input_language_id_batch, output_language_id_batch)):
+ direction_string = self.get_direction_string(input_language_id, output_language_id)
+ if direction_string not in self.models:
+ if direction_string == "indic-indic" and self.pivot_lang:
+ pass
+ else:
+ raise RuntimeError(f"Language-pair not supported: {input_language_id}-{output_language_id}")
+
+ if direction_string not in modelwise_batches:
+ modelwise_batches[direction_string] = {
+ "payloads": [],
+ "text_id_to_req_id_input_id": [],
+ }
+
+ modelwise_batches[direction_string]["payloads"].append([input_text, input_language_id, output_language_id])
+ modelwise_batches[direction_string]["text_id_to_req_id_input_id"].append((request_id, input_id))
+
+ for direction_string, batch in modelwise_batches.items():
+ if direction_string == "indic-indic" and self.pivot_lang:
+ model = self.get_model("hi", self.pivot_lang)
+ original_langs = []
+ for i in range(len(batch["payloads"])):
+ original_langs.append(batch["payloads"][i][2])
+ batch["payloads"][i][2] = self.pivot_lang
+
+ pivot_texts = model.paragraphs_batch_translate__multilingual(batch["payloads"])
+
+ for i in range(len(batch["payloads"])):
+ batch["payloads"][i][0] = pivot_texts[i]
+ batch["payloads"][i][1] = self.pivot_lang
+ batch["payloads"][i][2] = original_langs[i]
+
+ model = self.get_model(self.pivot_lang, "hi")
+ translations = model.paragraphs_batch_translate__multilingual(batch["payloads"])
+ else:
+ model = self.models[direction_string]
+ translations = model.paragraphs_batch_translate__multilingual(batch["payloads"])
+ # translations = ["bro"] * len(batch["payloads"])
+
+ for translation, (request_id, output_id) in zip(translations, batch["text_id_to_req_id_input_id"]):
+ responses[request_id][output_id] = [translation]
+
+ for i in range(len(responses)):
+ responses[i] = pb_utils.InferenceResponse(output_tensors=[
+ pb_utils.Tensor(
+ self.output_name,
+ np.array(responses[i], dtype=self.output_dtype),
+ )
+ ])
+ return responses
+
+ def execute_sequential(self,requests):
+ # print("REQ_COUNT", len(requests))
+ responses = []
+ for request in requests:
+ input_text_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT").as_numpy()
+ input_language_id_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_LANGUAGE_ID").as_numpy()
+ output_language_id_batch = pb_utils.get_input_tensor_by_name(request, "OUTPUT_LANGUAGE_ID").as_numpy()
+
+ input_text_batch = [input_text[0].decode("utf-8", "ignore") for input_text in input_text_batch]
+ input_language_id_batch = [input_language_id[0].decode("utf-8", "ignore") for input_language_id in input_language_id_batch]
+ output_language_id_batch = [output_language_id[0].decode("utf-8", "ignore") for output_language_id in output_language_id_batch]
+
+ generated_outputs = []
+
+ for input_text, input_language_id, output_language_id in zip(input_text_batch, input_language_id_batch, output_language_id_batch):
+ if self.pivot_lang and (input_language_id != self.pivot_lang and output_language_id != self.pivot_lang):
+ model = self.get_model(input_language_id, self.pivot_lang)
+ pivot_text = model.translate_paragraph(input_text, input_language_id, self.pivot_lang)
+
+ model = self.get_model(self.pivot_lang, output_language_id)
+ translation = model.translate_paragraph(pivot_text, self.pivot_lang, output_language_id)
+ else:
+ model = self.get_model(input_language_id, output_language_id)
+ translation = model.translate_paragraph(input_text, input_language_id, output_language_id)
+ generated_outputs.append([translation])
+
+ inference_response = pb_utils.InferenceResponse(output_tensors=[
+ pb_utils.Tensor(
+ self.output_name,
+ np.array(generated_outputs, dtype=self.output_dtype),
+ )
+ ])
+ responses.append(inference_response)
+ return responses
diff --git a/IndicTrans2/inference/triton_server/triton_repo/nmt/config.pbtxt b/IndicTrans2/inference/triton_server/triton_repo/nmt/config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..d4b363745b2d957da423dc7bf494c33c0afd9a02
--- /dev/null
+++ b/IndicTrans2/inference/triton_server/triton_repo/nmt/config.pbtxt
@@ -0,0 +1,32 @@
+backend: "python"
+max_batch_size: 512
+input [{
+ name: "INPUT_TEXT"
+ data_type: TYPE_STRING
+ dims: 1
+},
+{
+ name: "INPUT_LANGUAGE_ID"
+ data_type: TYPE_STRING
+ dims: 1
+},
+{
+ name: "OUTPUT_LANGUAGE_ID"
+ data_type: TYPE_STRING
+ dims: 1
+}]
+
+output {
+ name: "OUTPUT_TEXT"
+ data_type: TYPE_STRING
+ dims: 1
+}
+
+dynamic_batching {
+
+}
+
+instance_group [{
+ count: 1
+ kind: KIND_GPU
+}]
diff --git a/IndicTrans2/inference/utils.map_token_lang.tsv b/IndicTrans2/inference/utils.map_token_lang.tsv
new file mode 100644
index 0000000000000000000000000000000000000000..e5db3f34ce57efd79dca9b7e5a6d58273179df0b
--- /dev/null
+++ b/IndicTrans2/inference/utils.map_token_lang.tsv
@@ -0,0 +1,26 @@
+asm_Beng hi
+ben_Beng hi
+brx_Deva hi
+doi_Deva hi
+gom_Deva hi
+eng_Latn en
+guj_Gujr hi
+hin_Deva hi
+kan_Knda hi
+kas_Arab ar
+kas_Deva hi
+mai_Deva hi
+mar_Deva hi
+mal_Mlym hi
+mni_Beng hi
+mni_Mtei en
+npi_Deva hi
+ory_Orya hi
+pan_Guru hi
+san_Deva hi
+sat_Olck hi
+snd_Arab ar
+snd_Deva hi
+tam_Taml hi
+tel_Telu hi
+urd_Arab ar
diff --git a/IndicTrans2/install.sh b/IndicTrans2/install.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0cc5ee28af950c93544fe0b082e433c8c2d4c319
--- /dev/null
+++ b/IndicTrans2/install.sh
@@ -0,0 +1,59 @@
+#/bin/bash
+
+root_dir=$(pwd)
+echo "Setting up the environment in the $root_dir"
+
+# --------------------------------------------------------------
+# create and activate the virtual environment
+# --------------------------------------------------------------
+echo "Creating a virtual environment with python3"
+conda create -n itv2 python=3.9 -y
+conda activate itv2
+
+echo "Installing all the dependencies"
+conda install pip
+python3 -m pip install --upgrade pip==24.0
+
+
+# --------------------------------------------------------------
+# PyTorch Installation
+# --------------------------------------------------------------
+# python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu118
+pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu118
+
+
+# --------------------------------------------------------------
+# Install IndicNLP library and necessary resources
+# --------------------------------------------------------------
+git clone https://github.com/anoopkunchukuttan/indic_nlp_resources.git
+export INDIC_RESOURCES_PATH=$root_dir/indic_nlp_resources
+
+# we use version 0.92 which is the latest in the github repo
+git clone https://github.com/anoopkunchukuttan/indic_nlp_library.git
+cd indic_nlp_library
+python3 -m pip install ./
+cd $root_dir
+
+# --------------------------------------------------------------
+# Install additional utility packages
+# --------------------------------------------------------------
+python3 -m pip install nltk sacremoses regex pandas mock transformers==4.28.1 sacrebleu==2.3.1 urduhack[tf] mosestokenizer ctranslate2==3.9.0 gradio
+python3 -c "import urduhack; urduhack.download()"
+python3 -c "import nltk; nltk.download('punkt')"
+
+# --------------------------------------------------------------
+# Sentencepiece for tokenization
+# --------------------------------------------------------------
+# build the cpp binaries from the source repo in order to use the command line utility
+# source repo: https://github.com/google/sentencepiece
+python3 -m pip install sentencepiece
+
+# --------------------------------------------------------------
+# Fairseq Installation from Source
+# --------------------------------------------------------------
+git clone https://github.com/pytorch/fairseq.git
+cd fairseq
+python3 -m pip install ./
+cd $root_dir
+
+echo "Setup completed!"
diff --git a/IndicTrans2/joint_translate.sh b/IndicTrans2/joint_translate.sh
new file mode 100644
index 0000000000000000000000000000000000000000..59e7274342d313fa33bae6eced9724c1b77186b6
--- /dev/null
+++ b/IndicTrans2/joint_translate.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# This script performs inference from a source language to a target language using fairseq model.
+
+
+echo `date`
+infname=$1 # path to the input file name
+outfname=$2 # path to the output file name
+src_lang=$3 # source language (according to the flores code)
+tgt_lang=$4 # target language (according to the flores code)
+ckpt_dir=$5 # path to the checkpoint directory
+
+
+# check if the source language text requires transliteration
+src_transliterate="true"
+if [[ $src_lang == *"Arab"* ]] || [[ $src_lang == *"Olck"* ]] || \
+ [[ $src_lang == *"Mtei"* ]] || [[ $src_lang == *"Latn"* ]]; then
+ src_transliterate="false"
+fi
+
+
+# check if the target language text requires transliteration
+tgt_transliterate="true"
+if [[ $tgt_lang == *"Arab"* ]] || [[ $tgt_lang == *"Olck"* ]] || \
+ [[ $tgt_lang == *"Mtei"* ]] || [[ $tgt_lang == *"Latn"* ]]; then
+ tgt_transliterate="false"
+fi
+
+
+# define the prefixes for source and target languages
+SRC_PREFIX='SRC'
+TGT_PREFIX='TGT'
+
+
+echo "Normalizing punctuations"
+bash normalize_punctuation.sh $src_lang < $infname > $outfname._norm
+
+echo "Adding do not translate tags"
+python3 scripts/normalize_regex_inference.py $outfname._norm $outfname.norm
+rm -rf $outfname._norm && mv $outfname.norm $outfname._norm
+
+echo "Applying normalization and script conversion"
+input_size=`python scripts/preprocess_translate.py $outfname._norm $outfname.norm $src_lang $src_transliterate false`
+echo "Number of sentences in input: $input_size"
+
+
+echo "Applying sentence piece"
+spm_encode --model $ckpt_dir/vocab/model.SRC \
+ --output_format=piece \
+ < $outfname.norm \
+ > $outfname._bpe
+
+echo "Adding language tags"
+python scripts/add_tags_translate.py $outfname._bpe $outfname.bpe $src_lang $tgt_lang
+
+
+echo "Decoding"
+fairseq-interactive $ckpt_dir/final_bin \
+ -s $SRC_PREFIX -t $TGT_PREFIX \
+ --distributed-world-size 1 --fp16 \
+ --path $ckpt_dir/model/checkpoint_best.pt \
+ --task translation \
+ --user-dir model_configs \
+ --skip-invalid-size-inputs-valid-test \
+ --batch-size 128 --buffer-size 2500 --beam 5 \
+ --input $outfname.bpe > $outfname.log 2>&1
+
+
+echo "Extracting translations, script conversion and detokenization"
+# this script extracts the translations, convert script from devanagari script to target language if needed and detokenizes the output
+python scripts/postprocess_translate.py $outfname.log $outfname $input_size $tgt_lang $tgt_transliterate $ckpt_dir/vocab/model.TGT
+
+echo "Translation completed"
diff --git a/IndicTrans2/model_configs/__init__.py b/IndicTrans2/model_configs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ec41f7daeb7930e9df766abdd790c4c5b09b6d9
--- /dev/null
+++ b/IndicTrans2/model_configs/__init__.py
@@ -0,0 +1 @@
+from . import custom_transformer
\ No newline at end of file
diff --git a/IndicTrans2/model_configs/custom_transformer.py b/IndicTrans2/model_configs/custom_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..89753fb7bf8fee996bb890bc7cf55ec530735a91
--- /dev/null
+++ b/IndicTrans2/model_configs/custom_transformer.py
@@ -0,0 +1,92 @@
+from fairseq.models import register_model_architecture
+from fairseq.models.transformer import base_architecture
+
+
+@register_model_architecture("transformer", "transformer_2x")
+def transformer_big(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_4x")
+def transformer_huge(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1536)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1536)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_9x")
+def transformer_xlarge(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 2048)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 8192)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 2048)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8192)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_12e12d_9xeq")
+def transformer_vxlarge(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1536)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1536)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ args.encoder_layers = getattr(args, "encoder_layers", 12)
+ args.decoder_layers = getattr(args, "decoder_layers", 12)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_18_18")
+def transformer_deep(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 8 * 1024)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+ args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8 * 1024)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ args.encoder_layers = getattr(args, "encoder_layers", 18)
+ args.decoder_layers = getattr(args, "decoder_layers", 18)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_24_24")
+def transformer_xdeep(args):
+ args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+ args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 8 * 1024)
+ args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+ args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+ args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+ args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8 * 1024)
+ args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+ args.encoder_layers = getattr(args, "encoder_layers", 24)
+ args.decoder_layers = getattr(args, "decoder_layers", 24)
+ base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_base18L")
+def transformer_base18L(args):
+ args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+ args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+ args.layernorm_embedding = getattr(args, "layernorm_embedding", True)
+ args.encoder_layers = getattr(args, "encoder_layers", 18)
+ args.decoder_layers = getattr(args, "decoder_layers", 18)
+ base_architecture(args)
diff --git a/IndicTrans2/normalize-punctuation.perl b/IndicTrans2/normalize-punctuation.perl
new file mode 100644
index 0000000000000000000000000000000000000000..d473fa3b7ba80a1a761217d3bf1255583f06e61c
--- /dev/null
+++ b/IndicTrans2/normalize-punctuation.perl
@@ -0,0 +1,90 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses. Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use warnings;
+use strict;
+
+my $language = "en";
+my $PENN = 0;
+
+while (@ARGV) {
+ $_ = shift;
+ /^-b$/ && ($| = 1, next); # not buffered (flush each line)
+ /^-l$/ && ($language = shift, next);
+ /^[^\-]/ && ($language = $_, next);
+ /^-penn$/ && ($PENN = 1, next);
+}
+
+while() {
+ s/\r//g;
+ # remove extra spaces
+ s/\(/ \(/g;
+ s/\)/\) /g; s/ +/ /g;
+ s/\) ([\.\!\:\?\;\,])/\)$1/g;
+ s/\( /\(/g;
+ s/ \)/\)/g;
+ s/(\d) \%/$1\%/g;
+ s/ :/:/g;
+ s/ ;/;/g;
+ # normalize unicode punctuation
+ if ($PENN == 0) {
+ s/\`/\'/g;
+ s/\'\'/ \" /g;
+ }
+
+ s/„/\"/g;
+ s/“/\"/g;
+ s/”/\"/g;
+ s/–/-/g;
+ s/—/ - /g; s/ +/ /g;
+ s/´/\'/g;
+ s/([a-z])‘([a-z])/$1\'$2/gi;
+ s/([a-z])’([a-z])/$1\'$2/gi;
+ s/‘/\'/g;
+ s/‚/\'/g;
+ s/’/\"/g;
+ s/''/\"/g;
+ s/´´/\"/g;
+ s/…/.../g;
+ # French quotes
+ s/ « / \"/g;
+ s/« /\"/g;
+ s/«/\"/g;
+ s/ » /\" /g;
+ s/ »/\"/g;
+ s/»/\"/g;
+ # handle pseudo-spaces
+ s/ \%/\%/g;
+ s/nº /nº /g;
+ s/ :/:/g;
+ s/ ºC/ ºC/g;
+ s/ cm/ cm/g;
+ s/ \?/\?/g;
+ s/ \!/\!/g;
+ s/ ;/;/g;
+ s/, /, /g; s/ +/ /g;
+
+ # English "quotation," followed by comma, style
+ if ($language eq "en") {
+ s/\"([,\.]+)/$1\"/g;
+ }
+ # Czech is confused
+ elsif ($language eq "cs" || $language eq "cz") {
+ }
+ # German/Spanish/French "quotation", followed by comma, style
+ else {
+ s/,\"/\",/g;
+ s/(\.+)\"(\s*[^<])/\"$1$2/g; # don't fix period at end of sentence
+ }
+
+
+ if ($language eq "de" || $language eq "es" || $language eq "cz" || $language eq "cs" || $language eq "fr") {
+ s/(\d) (\d)/$1,$2/g;
+ }
+ else {
+ s/(\d) (\d)/$1.$2/g;
+ }
+ print $_;
+}
diff --git a/IndicTrans2/normalize_punctuation.sh b/IndicTrans2/normalize_punctuation.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5b68bc6a5d54f75212302040cd98623893afa843
--- /dev/null
+++ b/IndicTrans2/normalize_punctuation.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# This script normalizes the punctuations and strips the extra spaces in the input text
+# Directly sourced from https://github.com/pluiez/NLLB-inference
+
+
+set -euo pipefail
+
+root=$(dirname $0)
+
+lang_map_path=$root/utils.map_token_lang.tsv
+
+usage () {
+ echo "usage: $0 lang" >&2
+ exit 1
+}
+
+[ $# -eq 1 ] || usage
+
+lang=$1
+
+declare -A lang_map
+
+while read line; do
+ key=$(cut -f1 <<< "$line")
+ val=$(cut -f2 <<< "$line")
+ lang_map[$key]=$val
+done < $lang_map_path
+
+if [ -v "lang_map[$lang]" ]; then
+ lang=${lang_map[$lang]}
+elif [ -v "lang_map[${lang:0:3}]" ]; then
+ lang=${lang_map[${lang:0:3}]}
+else
+ echo "undefined mapping: ${lang}, falling back to: en" >&2
+ lang=en
+fi
+
+perl $root/normalize-punctuation.perl $lang
diff --git a/IndicTrans2/pivot_eval.sh b/IndicTrans2/pivot_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7cda12b59d0ddd8f02ddff21ec3ef388010a0f82
--- /dev/null
+++ b/IndicTrans2/pivot_eval.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# This script evaluates the performance of a machine translation system
+# on a evaluation set in forward direction. For example, if the evaluation set
+# consists of language pairs, such as X-Y, where X represents the source Indic language
+# and Y represents the target Indic language then this script accesses the translation
+# system from the source Indic language (X) to the target Indic language (Y) direction
+# using English as the pivot language (X -> En and En -> Y).
+
+
+echo `date`
+devtest_data_dir=$1 # path to the evaluation set
+pivot_lang=${2:-"eng_Latn"} # pivot language of choice
+src2pivot_ckpt_dir=$3 # path to the Indic-En checkpoint directory
+pivot2tgt_ckpt_dir=$4 # path of the En-Indic checkpoint directory
+system=${3:-"it2"} # name of the machine translation system
+
+
+# get a list of language pairs in the `devtest_data_dir`
+pairs=$(ls -d $devtest_data_dir/* | sort)
+
+
+# iterate over each language pair
+for pair in ${pairs[@]}; do
+ # extract the source and target languages from the pair name
+ pair=$(basename $pair)
+ src_lang=$(echo "$pair" | cut -d "-" -f 1)
+ tgt_lang=$(echo "$pair" | cut -d "-" -f 2)
+
+ src_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$src_lang
+ pivot_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$pivot_lang
+ tgt_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$tgt_lang
+
+ # check if the source and target files exists
+ if [ -f "$src_fname" ] && [ -f "$tgt_fname" ]; then
+ echo "Evaluating $src_lang-$tgt_lang ..."
+ else
+ echo "Skipping $src_lang-$tgt_lang ..."
+ continue
+ fi
+
+ # generate translations if the system name contains "it2"
+ if [[ $system == *"it2"* ]]; then
+ # source to pivot translation
+ echo "Generating Source to Pivot Translations"
+ bash joint_translate.sh $src_fname $pivot_fname.pred.$system $src_lang $pivot_lang $src2pivot_ckpt_dir
+
+ # pivot to target translation
+ echo "Generating Pivot to Target Translations"
+ bash joint_translate.sh $pivot_fname.pred.$system $tgt_fname.pred.$system $pivot_lang $tgt_lang $pivot2tgt_ckpt_dir
+ fi
+
+ # compute automatic string-based metrics if the prediction exists for the system
+ if [[ -f "${tgt_fname}.pred.${system}" ]]; then
+ echo "Computing Metrics"
+ bash compute_metrics.sh $tgt_fname.pred.$system $tgt_fname $tgt_lang > $devtest_data_dir/$src_lang-$tgt_lang/${src_lang}_${tgt_lang}_${system}_scores.txt
+ fi
+
+ # remove the intermediate files
+ rm $pivot_fname.pred.${system}.*
+ rm $tgt_fname.pred.${system}.*
+ rm -rf $devtest_data_dir/$src_lang-$tgt_lang/*.tok
+
+done
diff --git a/IndicTrans2/prepare_data_joint_finetuning.sh b/IndicTrans2/prepare_data_joint_finetuning.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a07c47608f2457ae6f43306bd40e073374507f19
--- /dev/null
+++ b/IndicTrans2/prepare_data_joint_finetuning.sh
@@ -0,0 +1,181 @@
+#/bin/bash
+
+# This script preprocesses and binarizes the data using shared fairseq dict generated from
+# `prepare_data_joint_training.sh` initially for training translation models using fairseq.
+# We primarily this script for training all our models.
+
+
+echo `date`
+exp_dir=$1 # path to the experiment directory
+vocab_dir=${2:-"$exp_dir/vocab"} # path to the spm-based tokenizer directory
+train_data_dir=${3:-"$exp_dir/train"} # path to the train data within experiment directory
+devtest_data_dir=${4:-"$exp_dir/devtest/all"} # path to the devtest data within experiment directory
+
+root=$(dirname $0)
+
+echo "Running experiment ${exp_dir}"
+
+train_processed_dir=$exp_dir/data
+devtest_processed_dir=$exp_dir/data
+out_data_dir=$exp_dir/final_bin
+
+mkdir -p $train_processed_dir
+mkdir -p $devtest_processed_dir
+mkdir -p $out_data_dir
+
+parallel_installed=false
+
+# Check if GNU Parallel is installed
+if command -v parallel &> /dev/null; then
+ echo "GNU Parallel is installed. Version information:"
+ parallel --version
+ parallel_installed=true
+fi
+
+# get a list of language pairs in the `train_data_dir`
+pairs=$(ls -d $train_data_dir/* | sort)
+
+
+# iterate over each language pair
+for pair in ${pairs[@]}; do
+ # extract the source and target languages from the pair name
+ pair=$(basename $pair)
+ src_lang=$(echo "$pair" | cut -d "-" -f 1)
+ tgt_lang=$(echo "$pair" | cut -d "-" -f 2)
+ echo "$src_lang - $tgt_lang"
+
+ train_norm_dir=$exp_dir/norm/$src_lang-$tgt_lang
+ devtest_norm_dir=$exp_dir/norm/$src_lang-$tgt_lang
+ mkdir -p $train_norm_dir
+ mkdir -p $devtest_norm_dir
+
+
+ # check if the source language text requires transliteration
+ src_transliterate="true"
+ if [[ $src_lang == *"Arab"* ]] || [[ $src_lang == *"Olck"* ]] || \
+ [[ $src_lang == *"Mtei"* ]] || [[ $src_lang == *"Latn"* ]]; then
+ src_transliterate="false"
+ fi
+
+ # check if the target language text requires transliteration
+ tgt_transliterate="true"
+ if [[ $tgt_lang == *"Arab"* ]] || [[ $tgt_lang == *"Olck"* ]] || \
+ [[ $tgt_lang == *"Mtei"* ]] || [[ $tgt_lang == *"Latn"* ]]; then
+ tgt_transliterate="false"
+ fi
+
+
+ # --------------------------------------------------------------------------
+ # train preprocessing
+ # --------------------------------------------------------------------------
+ train_infname_src=$train_data_dir/${src_lang}-${tgt_lang}/train.$src_lang
+ train_infname_tgt=$train_data_dir/${src_lang}-${tgt_lang}/train.$tgt_lang
+ train_outfname_src=$train_norm_dir/train.$src_lang
+ train_outfname_tgt=$train_norm_dir/train.$tgt_lang
+
+ echo "Normalizing punctuations for train"
+ if $parallel_installed; then
+ parallel --pipe --keep-order bash $root/normalize_punctuation.sh $src_lang < $train_infname_src > $train_outfname_src._norm
+ parallel --pipe --keep-order bash $root/normalize_punctuation.sh $tgt_lang < $train_infname_tgt > $train_outfname_tgt._norm
+ else
+ bash $root/normalize_punctuation.sh $src_lang < $train_infname_src > $train_outfname_src._norm
+ bash $root/normalize_punctuation.sh $tgt_lang < $train_infname_tgt > $train_outfname_tgt._norm
+ fi
+
+ # add do not translate tags to handle special failure cases
+ echo "Applying do not translate tags for train"
+ python3 scripts/normalize_regex.py $train_outfname_src._norm $train_outfname_tgt._norm $train_outfname_src.norm $train_outfname_tgt.norm
+
+ echo "Applying normalization and script conversion for train"
+ # this script preprocesses the text and for indic languages, converts script to devanagari if needed
+ input_size=`python3 scripts/preprocess_translate.py $train_outfname_src.norm $train_outfname_src $src_lang $src_transliterate false`
+ input_size=`python3 scripts/preprocess_translate.py $train_outfname_tgt.norm $train_outfname_tgt $tgt_lang $tgt_transliterate true`
+ echo "Number of sentences in train: $input_size"
+
+
+ # --------------------------------------------------------------------------
+ # dev preprocessing
+ # --------------------------------------------------------------------------
+ dev_infname_src=$devtest_data_dir/${src_lang}-${tgt_lang}/dev.$src_lang
+ dev_infname_tgt=$devtest_data_dir/${src_lang}-${tgt_lang}/dev.$tgt_lang
+ dev_outfname_src=$devtest_norm_dir/dev.$src_lang
+ dev_outfname_tgt=$devtest_norm_dir/dev.$tgt_lang
+
+ echo "Normalizing punctuations for dev"
+ if $parallel_installed; then
+ parallel --pipe --keep-order bash normalize_punctuation.sh $src_lang < $dev_infname_src > $dev_outfname_src._norm
+ parallel --pipe --keep-order bash normalize_punctuation.sh $tgt_lang < $dev_infname_tgt > $dev_outfname_tgt._norm
+ else
+ bash normalize_punctuation.sh $src_lang < $dev_infname_src > $dev_outfname_src._norm
+ bash normalize_punctuation.sh $tgt_lang < $dev_infname_tgt > $dev_outfname_tgt._norm
+ fi
+
+ # add do not translate tags to handle special failure cases
+ echo "Applying do not translate tags for dev"
+ python3 scripts/normalize_regex.py $dev_outfname_src._norm $dev_outfname_tgt._norm $dev_outfname_src.norm $dev_outfname_tgt.norm
+
+ echo "Applying normalization and script conversion for dev"
+ # this script preprocesses the text and for indic languages, converts script to devanagari if needed
+ input_size=`python scripts/preprocess_translate.py $dev_outfname_src.norm $dev_outfname_src $src_lang $src_transliterate false`
+ input_size=`python scripts/preprocess_translate.py $dev_outfname_tgt.norm $dev_outfname_tgt $tgt_lang $tgt_transliterate true`
+ echo "Number of sentences in dev: $input_size"
+done
+
+
+# this concatenates lang pair data and creates text files to keep track of number of
+# lines in each lang pair. this is important for joint training, as we will merge all
+# the lang pairs and the indivitual lang lines info would be required for adding specific
+# lang tags later.
+# the outputs of these scripts will be text file like this:
+#
+# lang1-lang2 n1
+# lang1-lang3 n2
+python scripts/concat_joint_data.py $exp_dir/norm $exp_dir/data 'train'
+python scripts/concat_joint_data.py $exp_dir/norm $exp_dir/data 'dev'
+
+
+# tokenization of train and dev set using the spm trained models
+mkdir -p $exp_dir/bpe
+
+splits=(train dev)
+for split in ${splits[@]}; do
+ echo "Applying sentence piece for $split"
+ bash apply_sentence_piece.sh $exp_dir $exp_dir/data $exp_dir/bpe SRC TGT $split $parallel_installed
+done
+
+
+# this is only required for joint training
+# we apply language tags to the bpe segmented data
+# if we are translating lang1 to lang2 then will become
+mkdir -p $exp_dir/final
+
+echo "Adding language tags"
+python scripts/add_joint_tags_translate.py $exp_dir 'train'
+python scripts/add_joint_tags_translate.py $exp_dir 'dev'
+
+
+# this is important step if you are training with tpu and using num_batch_buckets
+# the current implementation does not remove outliers before bucketing and hence
+# removing these large sentences ourselves helps with getting better buckets
+# python scripts/remove_large_sentences.py $exp_dir/bpe/train.SRC $exp_dir/bpe/train.TGT $exp_dir/final/train.SRC $exp_dir/final/train.TGT
+# python scripts/remove_large_sentences.py $exp_dir/bpe/dev.SRC $exp_dir/bpe/dev.TGT $exp_dir/final/dev.SRC $exp_dir/final/dev.TGT
+# python scripts/remove_large_sentences.py $exp_dir/bpe/test.SRC $exp_dir/bpe/test.TGT $exp_dir/final/test.SRC $exp_dir/final/test.TGT
+
+
+echo "Binarizing data"
+
+# use cpu_count to get num_workers instead of setting it manually when running
+# in different instances
+num_workers=`python -c "import multiprocessing; print(multiprocessing.cpu_count())"`
+
+data_dir=$exp_dir/final
+out_data_dir=$exp_dir/final_bin
+
+fairseq-preprocess \
+ --source-lang SRC --target-lang TGT \
+ --trainpref $data_dir/train \
+ --validpref $data_dir/dev \
+ --destdir $out_data_dir \
+ --workers $num_workers \
+ --srcdict $exp_dir/final_bin/dict.SRC.txt \
+ --tgtdict $exp_dir/final_bin/dict.TGT.txt \
diff --git a/IndicTrans2/prepare_data_joint_training.sh b/IndicTrans2/prepare_data_joint_training.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5cd748a413fc975ff359e780c007e17f50616975
--- /dev/null
+++ b/IndicTrans2/prepare_data_joint_training.sh
@@ -0,0 +1,182 @@
+#/bin/bash
+
+# This script preprocesses and binarizes the data for training translation models using fairseq.
+# Only difference between this script and `prepare_data_joint_finetuning.sh` that we generate
+# fairseq dict using this script that is commonly shared across for training all the models further.
+
+
+echo `date`
+exp_dir=$1 # path to the experiment directory
+vocab_dir=${2:-"$exp_dir/vocab"} # path to the spm-based tokenizer directory
+train_data_dir=${3:-"$exp_dir/train"} # path to the train data within experiment directory
+devtest_data_dir=${4:-"$exp_dir/devtest/all"} # path to the devtest data within experiment directory
+
+root=$(dirname $0)
+
+echo "Running experiment ${exp_dir}"
+
+train_processed_dir=$exp_dir/data
+devtest_processed_dir=$exp_dir/data
+out_data_dir=$exp_dir/final_bin
+
+mkdir -p $train_processed_dir
+mkdir -p $devtest_processed_dir
+mkdir -p $out_data_dir
+
+parallel_installed=false
+
+# Check if GNU Parallel is installed
+if command -v parallel &> /dev/null; then
+ echo "GNU Parallel is installed. Version information:"
+ parallel --version
+ parallel_installed=true
+fi
+
+# get a list of language pairs in the `train_data_dir`
+pairs=$(ls -d $train_data_dir/* | sort)
+
+
+# iterate over each language pair
+for pair in ${pairs[@]}; do
+ # extract the source and target languages from the pair name
+ pair=$(basename $pair)
+ src_lang=$(echo "$pair" | cut -d "-" -f 1)
+ tgt_lang=$(echo "$pair" | cut -d "-" -f 2)
+ echo "$src_lang - $tgt_lang"
+
+ train_norm_dir=$exp_dir/norm/$src_lang-$tgt_lang
+ devtest_norm_dir=$exp_dir/norm/$src_lang-$tgt_lang
+ mkdir -p $train_norm_dir
+ mkdir -p $devtest_norm_dir
+
+
+ # check if the source language text requires transliteration
+ src_transliterate="true"
+ if [[ $src_lang == *"Arab"* ]] || [[ $src_lang == *"Olck"* ]] || \
+ [[ $src_lang == *"Mtei"* ]] || [[ $src_lang == *"Latn"* ]]; then
+ src_transliterate="false"
+ fi
+
+ # check if the target language text requires transliteration
+ tgt_transliterate="true"
+ if [[ $tgt_lang == *"Arab"* ]] || [[ $tgt_lang == *"Olck"* ]] || \
+ [[ $tgt_lang == *"Mtei"* ]] || [[ $tgt_lang == *"Latn"* ]]; then
+ tgt_transliterate="false"
+ fi
+
+
+ # --------------------------------------------------------------------------
+ # train preprocessing
+ # --------------------------------------------------------------------------
+ train_infname_src=$train_data_dir/${src_lang}-${tgt_lang}/train.$src_lang
+ train_infname_tgt=$train_data_dir/${src_lang}-${tgt_lang}/train.$tgt_lang
+ train_outfname_src=$train_norm_dir/train.$src_lang
+ train_outfname_tgt=$train_norm_dir/train.$tgt_lang
+
+ echo "Normalizing punctuations for train"
+ if $parallel_installed; then
+ parallel --pipe --keep-order bash $root/normalize_punctuation.sh $src_lang < $train_infname_src > $train_outfname_src._norm
+ parallel --pipe --keep-order bash $root/normalize_punctuation.sh $tgt_lang < $train_infname_tgt > $train_outfname_tgt._norm
+ else
+ bash $root/normalize_punctuation.sh $src_lang < $train_infname_src > $train_outfname_src._norm
+ bash $root/normalize_punctuation.sh $tgt_lang < $train_infname_tgt > $train_outfname_tgt._norm
+ fi
+
+ # add do not translate tags to handle special failure cases
+ echo "Applying do not translate tags for train"
+ python3 scripts/normalize_regex.py $train_outfname_src._norm $train_outfname_tgt._norm $train_outfname_src.norm $train_outfname_tgt.norm
+
+ echo "Applying normalization and script conversion for train"
+ # this script preprocesses the text and for indic languages, converts script to devanagari if needed
+ input_size=`python3 scripts/preprocess_translate.py $train_outfname_src.norm $train_outfname_src $src_lang $src_transliterate false`
+ input_size=`python3 scripts/preprocess_translate.py $train_outfname_tgt.norm $train_outfname_tgt $tgt_lang $tgt_transliterate true`
+ echo "Number of sentences in train: $input_size"
+
+
+ # --------------------------------------------------------------------------
+ # dev preprocessing
+ # --------------------------------------------------------------------------
+ dev_infname_src=$devtest_data_dir/${src_lang}-${tgt_lang}/dev.$src_lang
+ dev_infname_tgt=$devtest_data_dir/${src_lang}-${tgt_lang}/dev.$tgt_lang
+ dev_outfname_src=$devtest_norm_dir/dev.$src_lang
+ dev_outfname_tgt=$devtest_norm_dir/dev.$tgt_lang
+
+ echo "Normalizing punctuations for dev"
+ if $parallel_installed; then
+ parallel --pipe --keep-order bash normalize_punctuation.sh $src_lang < $dev_infname_src > $dev_outfname_src._norm
+ parallel --pipe --keep-order bash normalize_punctuation.sh $tgt_lang < $dev_infname_tgt > $dev_outfname_tgt._norm
+ else
+ bash normalize_punctuation.sh $src_lang < $dev_infname_src > $dev_outfname_src._norm
+ bash normalize_punctuation.sh $tgt_lang < $dev_infname_tgt > $dev_outfname_tgt._norm
+ fi
+
+ # add do not translate tags to handle special failure cases
+ echo "Applying do not translate tags for dev"
+ python3 scripts/normalize_regex.py $dev_outfname_src._norm $dev_outfname_tgt._norm $dev_outfname_src.norm $dev_outfname_tgt.norm
+
+ echo "Applying normalization and script conversion for dev"
+ # this script preprocesses the text and for indic languages, converts script to devanagari if needed
+ input_size=`python scripts/preprocess_translate.py $dev_outfname_src.norm $dev_outfname_src $src_lang $src_transliterate false`
+ input_size=`python scripts/preprocess_translate.py $dev_outfname_tgt.norm $dev_outfname_tgt $tgt_lang $tgt_transliterate true`
+ echo "Number of sentences in dev: $input_size"
+done
+
+
+# this concatenates lang pair data and creates text files to keep track of number of
+# lines in each lang pair. this is important for joint training, as we will merge all
+# the lang pairs and the indivitual lang lines info would be required for adding specific
+# lang tags later.
+# the outputs of these scripts will be text file like this:
+#
+# lang1-lang2 n1
+# lang1-lang3 n2
+python scripts/concat_joint_data.py $exp_dir/norm $exp_dir/data 'train'
+python scripts/concat_joint_data.py $exp_dir/norm $exp_dir/data 'dev'
+
+
+# tokenization of train and dev set using the spm trained models
+mkdir -p $exp_dir/bpe
+
+splits=(train dev)
+for split in ${splits[@]}; do
+ echo "Applying sentence piece for $split"
+ bash apply_sentence_piece.sh $exp_dir $exp_dir/data $exp_dir/bpe SRC TGT $split $parallel_installed
+done
+
+
+# this is only required for joint training
+# we apply language tags to the bpe segmented data
+# if we are translating lang1 to lang2 then will become
+mkdir -p $exp_dir/final
+
+echo "Adding language tags"
+python scripts/add_joint_tags_translate.py $exp_dir 'train'
+python scripts/add_joint_tags_translate.py $exp_dir 'dev'
+
+
+# this is important step if you are training with tpu and using num_batch_buckets
+# the current implementation does not remove outliers before bucketing and hence
+# removing these large sentences ourselves helps with getting better buckets
+# python scripts/remove_large_sentences.py $exp_dir/bpe/train.SRC $exp_dir/bpe/train.TGT $exp_dir/final/train.SRC $exp_dir/final/train.TGT
+# python scripts/remove_large_sentences.py $exp_dir/bpe/dev.SRC $exp_dir/bpe/dev.TGT $exp_dir/final/dev.SRC $exp_dir/final/dev.TGT
+# python scripts/remove_large_sentences.py $exp_dir/bpe/test.SRC $exp_dir/bpe/test.TGT $exp_dir/final/test.SRC $exp_dir/final/test.TGT
+
+
+echo "Binarizing data"
+
+# use cpu_count to get num_workers instead of setting it manually when running
+# in different instances
+num_workers=`python -c "import multiprocessing; print(multiprocessing.cpu_count())"`
+
+data_dir=$exp_dir/final
+out_data_dir=$exp_dir/final_bin
+
+rm -rf $out_data_dir
+
+fairseq-preprocess \
+ --source-lang SRC --target-lang TGT \
+ --trainpref $data_dir/train \
+ --validpref $data_dir/dev \
+ --destdir $out_data_dir \
+ --workers $num_workers \
+ --thresholdtgt 5
diff --git a/IndicTrans2/scripts/__init__.py b/IndicTrans2/scripts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/IndicTrans2/scripts/add_joint_tags_translate.py b/IndicTrans2/scripts/add_joint_tags_translate.py
new file mode 100644
index 0000000000000000000000000000000000000000..93ccd2067e1742b84286a3603098a3b5d88c8133
--- /dev/null
+++ b/IndicTrans2/scripts/add_joint_tags_translate.py
@@ -0,0 +1,54 @@
+import os
+import sys
+from tqdm import tqdm
+from typing import Iterator, Tuple
+from add_tags_translate import add_token
+
+
+def generate_lang_tag_iterator(infname: str) -> Iterator[Tuple[str, str]]:
+ """
+ Creates an iterator that reads the meta data from `infname` file and
+ yields the language tags in the form of tuples "`(src_lang, tgt_lang)`."
+
+ Args:
+ infname (str): path of the input filename from which the metadata will be read.
+
+ Yields:
+ Iterator[Tuple[str, str]]: an iterator that yields source and target language tags
+ in the form of tuples.
+ """
+ with open(infname, "r", encoding="utf-8") as infile:
+ for line in infile:
+ src_lang, tgt_lang, count = line.strip().split("\t")
+ count = int(count)
+ for _ in range(count):
+ yield (src_lang, tgt_lang)
+
+
+if __name__ == "__main__":
+
+ expdir = sys.argv[1]
+ split = sys.argv[2]
+
+ src_fname = os.path.join(expdir, "bpe", f"{split}.SRC")
+ tgt_fname = os.path.join(expdir, "bpe", f"{split}.TGT")
+ meta_fname = os.path.join(expdir, "data", f"{split}_lang_pairs.txt")
+
+ out_src_fname = os.path.join(expdir, "final", f"{split}.SRC")
+ out_tgt_fname = os.path.join(expdir, "final", f"{split}.TGT")
+
+ lang_tag_iterator = generate_lang_tag_iterator(meta_fname)
+
+ os.makedirs(os.path.join(expdir, "final"), exist_ok=True)
+
+ with open(src_fname, "r", encoding="utf-8") as src_file, open(
+ tgt_fname, "r", encoding="utf-8"
+ ) as tgt_file, open(out_src_fname, "w", encoding="utf-8") as out_src_file, open(
+ out_tgt_fname, "w", encoding="utf-8"
+ ) as out_tgt_file:
+
+ for (src_lang, tgt_lang), src_sent, tgt_sent in tqdm(
+ zip(lang_tag_iterator, src_file, tgt_file)
+ ):
+ out_src_file.write(add_token(src_sent.strip(), src_lang, tgt_lang) + "\n")
+ out_tgt_file.write(tgt_sent.strip() + "\n")
diff --git a/IndicTrans2/scripts/add_tags_translate.py b/IndicTrans2/scripts/add_tags_translate.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9ab7c0cb8167c2e2777f659d1458b6b161b876c
--- /dev/null
+++ b/IndicTrans2/scripts/add_tags_translate.py
@@ -0,0 +1,33 @@
+import sys
+from tqdm import tqdm
+
+
+def add_token(sent: str, src_lang: str, tgt_lang: str, delimiter: str = " ") -> str:
+ """
+ Add special tokens indicating source and target language to the start of the input sentence.
+ The resulting string will have the format: "`{src_lang} {tgt_lang} {input_sentence}`".
+
+ Args:
+ sent (str): input sentence to be translated.
+ src_lang (str): language of the input sentence.
+ tgt_lang (str): language in which the input sentence will be translated.
+ delimiter (str): separator to add between language tags and input sentence (default: " ").
+
+ Returns:
+ str: input sentence with the special tokens added to the start.
+ """
+ return src_lang + delimiter + tgt_lang + delimiter + sent
+
+
+if __name__ == "__main__":
+ infname = sys.argv[1]
+ outfname = sys.argv[2]
+ src_lang = sys.argv[3]
+ tgt_lang = sys.argv[4]
+
+ with open(infname, "r", encoding="utf-8") as infile, open(
+ outfname, "w", encoding="utf-8"
+ ) as outfile:
+ for line in tqdm(infile):
+ outstr = add_token(line.strip(), src_lang, tgt_lang)
+ outfile.write(outstr + "\n")
diff --git a/IndicTrans2/scripts/clean_vocab.py b/IndicTrans2/scripts/clean_vocab.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb1ba6720a6d7a4848d8ee88c576937e4b1ee5b9
--- /dev/null
+++ b/IndicTrans2/scripts/clean_vocab.py
@@ -0,0 +1,28 @@
+import sys
+
+
+def clean_vocab(in_vocab_fname: str, out_vocab_fname: str):
+ """
+ Cleans a vocabulary file by filtering out invalid lines.
+
+ Args:
+ in_vocab_fname (str): path of the input vocabulary file.
+ out_vocab_fname (str): path of the input vocabulary file.
+ """
+ with open(in_vocab_fname, "r", encoding="utf-8") as infile, open(
+ out_vocab_fname, "w", encoding="utf-8"
+ ) as outfile:
+ for i, line in enumerate(infile):
+ fields = line.strip("\r\n ").split(" ")
+ if len(fields) == 2:
+ outfile.write(line)
+ if len(fields) != 2:
+ print(f"{i}: {line.strip()}")
+ for c in line:
+ print(f"{c}:{hex(ord(c))}")
+
+
+if __name__ == "__main__":
+ in_vocab_fname = sys.argv[1]
+ out_vocab_fname = sys.argv[2]
+ clean_vocab(in_vocab_fname, out_vocab_fname)
diff --git a/IndicTrans2/scripts/concat_joint_data.py b/IndicTrans2/scripts/concat_joint_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd473e97b62a05b5ad0e3d70c5433de3f13e6019
--- /dev/null
+++ b/IndicTrans2/scripts/concat_joint_data.py
@@ -0,0 +1,101 @@
+import os
+import sys
+from tqdm import tqdm
+from typing import List
+
+
+def concat_data(
+ data_dir: str,
+ out_dir: str,
+ lang_pair_list: List[List[str]],
+ out_src_lang: str = "SRC",
+ out_tgt_lang: str = "TGT",
+ split: str = "train",
+):
+ """
+ Concatenate data files from different language pairs and writes the output to a specified directory.
+
+ Args:
+ data_dir (str): path of the directory containing the data files for language pairs.
+ out_dir (str): path of the directory where the output files will be saved.
+ lang_pair_list (List[List[str]]): a list of language pairs, where each pair is a list of two strings.
+ out_src_lang (str, optional): suffix to use for the source language (default: "SRC").
+ out_tgt_lang (str, optional): suffix to use for the source language (default: "TGT").
+ split (str, optional): name of the split (e.g. "train", "dev", "test") to concatenate (default: "train").
+ """
+ os.makedirs(out_dir, exist_ok=True)
+
+ out_src_fname = os.path.join(out_dir, f"{split}.{out_src_lang}")
+ out_tgt_fname = os.path.join(out_dir, f"{split}.{out_tgt_lang}")
+
+ print()
+ print(out_src_fname)
+ print(out_tgt_fname)
+
+ # concatenate data for different language pairs
+ if os.path.isfile(out_src_fname):
+ os.unlink(out_src_fname)
+ if os.path.isfile(out_tgt_fname):
+ os.unlink(out_tgt_fname)
+
+ for src_lang, tgt_lang in tqdm(lang_pair_list):
+ print("src: {}, tgt:{}".format(src_lang, tgt_lang))
+
+ in_src_fname = os.path.join(data_dir, f"{src_lang}-{tgt_lang}", f"{split}.{src_lang}")
+ in_tgt_fname = os.path.join(data_dir, f"{src_lang}-{tgt_lang}", f"{split}.{tgt_lang}")
+
+ if not os.path.exists(in_src_fname) or not os.path.exists(in_tgt_fname):
+ continue
+
+ print(in_src_fname)
+ os.system("cat {} >> {}".format(in_src_fname, out_src_fname))
+
+ print(in_tgt_fname)
+ os.system("cat {} >> {}".format(in_tgt_fname, out_tgt_fname))
+
+ corpus_stats(data_dir, out_dir, lang_pair_list, split)
+
+
+def corpus_stats(data_dir: str, out_dir: str, lang_pair_list: List[List[str]], split: str):
+ """
+ Computes statistics for the given language pairs in a corpus and
+ writes the results to a file in the output directory.
+
+ Args:
+ data_dir (str): path of the directory containing the corpus data.
+ out_dir (str): path of the directory where the output file should be written.
+ lang_pair_list (List[List[str]]): a list of language pairs as lists of strings in the form "`[src_lang, tgt_lang]`".
+ split (str): a string indicating the split (e.g. 'train', 'dev', 'test') of the corpus to consider.
+ """
+ meta_fname = os.path.join(out_dir, f"{split}_lang_pairs.txt")
+ with open(meta_fname, "w", encoding="utf-8") as lp_file:
+
+ for src_lang, tgt_lang in tqdm(lang_pair_list):
+ print("src: {}, tgt:{}".format(src_lang, tgt_lang))
+
+ in_src_fname = os.path.join(data_dir, f"{src_lang}-{tgt_lang}", f"{split}.{src_lang}")
+ if not os.path.exists(in_src_fname):
+ continue
+
+ print(in_src_fname)
+
+ corpus_size = 0
+ with open(in_src_fname, "r", encoding="utf-8") as infile:
+ corpus_size = sum(map(lambda x: 1, infile))
+
+ lp_file.write(f"{src_lang}\t{tgt_lang}\t{corpus_size}\n")
+
+
+if __name__ == "__main__":
+
+ in_dir = sys.argv[1]
+ out_dir = sys.argv[2]
+ split = sys.argv[3]
+ lang_pair_list = []
+
+ pairs = os.listdir(in_dir)
+ for pair in pairs:
+ src_lang, tgt_lang = pair.split("-")
+ lang_pair_list.append([src_lang, tgt_lang])
+
+ concat_data(in_dir, out_dir, lang_pair_list, split=split)
diff --git a/IndicTrans2/scripts/convert_to_flores_codes.py b/IndicTrans2/scripts/convert_to_flores_codes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6687a72d5b9327c952e9e4d86592df452a59a79a
--- /dev/null
+++ b/IndicTrans2/scripts/convert_to_flores_codes.py
@@ -0,0 +1,47 @@
+import os
+import sys
+from flores_codes_map_indic import flores_to_iso
+
+
+def convert_iso_to_flores(data_dir: str):
+ """
+ Converts ISO language code to flores language code for a given directory of language pairs.
+ Assumes that each subdirectory of the given directory corresponds to a language pair, and
+ that each subdirectory contains files named according to the ISO language codes of the source
+ and target languages.
+
+ Args:
+ data_dir (str): path of the directory containing the data files for language pairs in ISO language code.
+ """
+ pairs = os.listdir(data_dir)
+ iso_to_flores = {v:k for k, v in flores_to_iso.items()}
+
+ for pair in pairs:
+ print(pair)
+ path = os.path.join(data_dir, pair)
+ src_lang_iso, tgt_lang_iso = pair.split('-')
+
+ src_lang = iso_to_flores[src_lang_iso]
+ tgt_lang = iso_to_flores[tgt_lang_iso]
+
+ for fname in os.listdir(os.path.join(data_dir, pair)):
+ if fname.endswith(src_lang_iso):
+ old_fname = os.path.join(path, fname)
+ new_fname = os.path.join(path, fname.replace(src_lang_iso, src_lang))
+ os.rename(old_fname, new_fname)
+
+ if fname.endswith(tgt_lang_iso):
+ old_fname = os.path.join(path, fname)
+ new_fname = os.path.join(path, fname.replace(tgt_lang_iso, tgt_lang))
+ os.rename(old_fname, new_fname)
+
+ new_pair ="{}-{}".format(src_lang, tgt_lang)
+ new_path = os.path.join(data_dir, new_pair)
+ os.rename(path, new_path)
+
+
+if __name__ == "__main__":
+ data_dir = sys.argv[1]
+
+ convert_iso_to_flores(data_dir)
+
diff --git a/IndicTrans2/scripts/dedup_benchmark.py b/IndicTrans2/scripts/dedup_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..befd4a76d45f2f11a78a85b907aba5a185ab41c8
--- /dev/null
+++ b/IndicTrans2/scripts/dedup_benchmark.py
@@ -0,0 +1,68 @@
+import regex as re
+import os
+import sys
+from collections import defaultdict
+from tqdm import tqdm
+
+def remove_overlaps(in_data_dir: str, out_data_dir: str, benchmark_dir: str):
+ """
+ Removes overlapping sentences between train dataset and dev/test dataset from the
+ input directory and writes de-duplicated train data to the specified output directory.
+
+ Args:
+ in_data_dir (str): path of the directory containing train data for each language pair.
+ out_data_dir (str): path of the directory where the de-duplicated train data will be written for each language pair.
+ benchmark_dir (str): path of the directory containing the language-wise monolingual side of dev/test set.
+ """
+ # load dev/test dataset for each language
+ devtest_normalized = defaultdict(set)
+ for lang in os.listdir(benchmark_dir):
+ fname = os.path.join(benchmark_dir, lang)
+
+ with open(fname, "r") as f:
+ sents = [sent for sent in f.read().split("\n") if sent.strip()]
+ sents = [re.sub(" +", " ", sent).replace("\n", "").strip() for sent in sents]
+ sents = [re.sub(" +", " ", re.sub(r"[^\w\s]", "", x)).lower() for x in sents]
+ devtest_normalized[lang] = set(sents)
+
+ # process each language pair train dataset to remove overlapping sentences
+ pairs = sorted(os.listdir(in_data_dir))
+ for pair in pairs:
+ print(pair)
+ src_lang, tgt_lang = pair.split("-")
+
+ src_infname = os.path.join(in_data_dir, pair, f"train.{src_lang}")
+ tgt_infname = os.path.join(in_data_dir, pair, f"train.{tgt_lang}")
+
+ src_outfname = os.path.join(out_data_dir, pair, f"train.{src_lang}")
+ tgt_outfname = os.path.join(out_data_dir, pair, f"train.{tgt_lang}")
+
+ os.makedirs(os.path.join(out_data_dir, pair), exist_ok=True)
+
+ # remove overlapping sentences and write de-duplicated train data to output directory
+ with open(src_infname, 'r', encoding='utf-8') as src_infile, \
+ open(tgt_infname, 'r', encoding='utf-8') as tgt_infile, \
+ open(src_outfname, 'w', encoding='utf-8') as src_outfile, \
+ open(tgt_outfname, 'w', encoding='utf-8') as tgt_outfile:
+
+ for src_line, tgt_line in tqdm(zip(src_infile, tgt_infile)):
+ src_line = re.sub(" +", " ", src_line).replace("\n", "").strip()
+ tgt_line = re.sub(" +", " ", tgt_line).replace("\n", "").strip()
+
+ src_line_normalized = re.sub(" +", " ", re.sub(r"[^\w\s]", "", src_line)).lower()
+ tgt_line_normalized = re.sub(" +", " ", re.sub(r"[^\w\s]", "", tgt_line)).lower()
+ if src_line_normalized in devtest_normalized[src_lang] or tgt_line_normalized in devtest_normalized[tgt_lang]:
+ continue
+
+ src_outfile.write(src_line + "\n")
+ tgt_outfile.write(tgt_line + "\n")
+
+
+if __name__ == "__main__":
+ in_data_dir = sys.argv[1]
+ out_data_dir = sys.argv[2]
+ benchmark_dir = sys.argv[3]
+
+ os.makedirs(out_data_dir, exist_ok=True)
+
+ remove_overlaps(in_data_dir, out_data_dir, benchmark_dir)
diff --git a/IndicTrans2/scripts/extract_non_english_pairs.py b/IndicTrans2/scripts/extract_non_english_pairs.py
new file mode 100644
index 0000000000000000000000000000000000000000..9977c92ef60cbb9466708206a1d5fc2dea5bc25c
--- /dev/null
+++ b/IndicTrans2/scripts/extract_non_english_pairs.py
@@ -0,0 +1,131 @@
+import os
+import sys
+from tqdm import tqdm
+from typing import Iterator, List, Tuple
+from remove_train_devtest_overlaps import pair_dedup_files
+
+
+def read_file(fname: str) -> Iterator[str]:
+ """
+ Reads text from the input file and yields the text line-by-line as string.
+
+ Args:
+ fname (str): name of the input file to read.
+
+ Yields:
+ Iterator[str]: yields text line-by-line as a string from the input file.
+ """
+ with open(fname, "r", encoding="utf-8") as infile:
+ for line in infile:
+ yield line.strip()
+
+
+def extract_non_english_pairs(in_dir: str, out_dir: str, pivot_lang: str, langs: List[str]):
+ """
+ Extracts non-English language pairs from a parallel corpora using pivot-translation.
+
+ Args:
+ in_dir (str): path of the directory where the input files are stored.
+ out_dir (str): path of the directory where the output files are stored.
+ pivot_lang (str): pivot language that the input files are translated to.
+ langs (List[str]): a list of language codes for the non-English languages.
+ """
+ for i in tqdm(range(len(langs) - 1)):
+ print()
+ for j in range(i + 1, len(langs)):
+ lang1 = langs[i]
+ lang2 = langs[j]
+
+ print("{} {}".format(lang1, lang2))
+
+ fname1 = "{}/{}-{}/train.{}".format(in_dir, pivot_lang, lang1, pivot_lang)
+ fname2 = "{}/{}-{}/train.{}".format(in_dir, pivot_lang, lang2, pivot_lang)
+
+ enset_l1 = set(read_file(fname1))
+ common_en_set = enset_l1.intersection(read_file(fname2))
+
+ il_fname1 = "{}/{}-{}/train.{}".format(in_dir, pivot_lang, lang1, lang1)
+ en_lang1_dict = {}
+ for en_line, il_line in zip(read_file(fname1), read_file(il_fname1)):
+ if en_line in common_en_set:
+ en_lang1_dict[en_line] = il_line
+
+ os.makedirs("{}/{}-{}".format(out_dir, lang1, lang2), exist_ok=True)
+ out_l1_fname = "{o}/{l1}-{l2}/train.{l1}".format(o=out_dir, l1=lang1, l2=lang2)
+ out_l2_fname = "{o}/{l1}-{l2}/train.{l2}".format(o=out_dir, l1=lang1, l2=lang2)
+
+ il_fname2 = "{}/en-{}/train.{}".format(in_dir, lang2, lang2)
+ with open(out_l1_fname, "w", encoding="utf-8") as out_l1_file, open(
+ out_l2_fname, "w", encoding="utf-8"
+ ) as out_l2_file:
+ for en_line, il_line in zip(read_file(fname2), read_file(il_fname2)):
+ if en_line in en_lang1_dict:
+ # this block should be used if you want to consider multiple tranlations.
+ for il_line_lang1 in en_lang1_dict[en_line]:
+ # lang1_line, lang2_line = il_line_lang1, il_line
+ # out_l1_file.write(lang1_line + "\n")
+ # out_l2_file.write(lang2_line + "\n")
+
+ # this block should be used if you DONT to consider multiple translation.
+ lang1_line, lang2_line = en_lang1_dict[en_line], il_line
+ out_l1_file.write(lang1_line + "\n")
+ out_l2_file.write(lang2_line + "\n")
+
+ pair_dedup_files(out_l1_fname, out_l2_fname)
+
+
+def get_extracted_stats(out_dir: str, langs: List[str]) -> List[Tuple[str, str, int]]:
+ """
+ Gathers stats from the extracted non-english pairs.
+
+ Args:
+ out_dir (str): path of the directory where the output files are stored.
+ langs (List[str]): a list of language codes.
+
+ Returns:
+ List[Tuple[str, str, int]]: a list of tuples, where each tuple contains statistical information
+ about a language pair in the form "`(lang1, lang2, count)`".
+ """
+ common_stats = []
+ for i in tqdm(range(len(langs) - 1)):
+ for j in range(i + 1, len(langs)):
+ lang1 = langs[i]
+ lang2 = langs[j]
+
+ out_l1_fname = "{o}/{l1}-{l2}/train.{l1}".format(o=out_dir, l1=lang1, l2=lang2)
+
+ cnt = sum([1 for _ in read_file(out_l1_fname)])
+ common_stats.append((lang1, lang2, cnt))
+ common_stats.append((lang2, lang1, cnt))
+ return common_stats
+
+
+if __name__ == "__main__":
+ #TODO: need to fix this
+
+ in_dir = sys.argv[1]
+ out_dir = sys.argv[2]
+ langs = sorted([lang.strip() for lang in sys.argv[3].split(",")])
+
+ if len(sys.argv) == 4:
+ pivot_lang = "eng_Latn"
+ else:
+ pivot_lang = sys.argv[4]
+
+ for pair in os.listdir(in_dir):
+ src_lang, tgt_lang = pair.split("-")
+ if src_lang == pivot_lang:
+ continue
+ else:
+ tmp_in_dir = os.path.join(in_dir, pair)
+ tmp_out_dir = os.path.join(in_dir, "{}-{}".format(pivot_lang, src_lang))
+ os.rename(tmp_in_dir, tmp_out_dir)
+
+ #extract_non_english_pairs(in_dir, out_dir, pivot_lang, langs)
+
+ """stats = get_extracted_stats(out_dir, langs)
+ with open("{}/lang_pairs.txt", "w") as f:
+ for stat in stats:
+ stat = list(map(str, stat))
+ f.write("\t".join(stat) + "\n")
+"""
\ No newline at end of file
diff --git a/IndicTrans2/scripts/flores_codes_map_indic.py b/IndicTrans2/scripts/flores_codes_map_indic.py
new file mode 100644
index 0000000000000000000000000000000000000000..1768a478e0b4bcf99687b67d09d70eef46b8f52e
--- /dev/null
+++ b/IndicTrans2/scripts/flores_codes_map_indic.py
@@ -0,0 +1,74 @@
+"""
+FLORES language code mapping to 2 letter ISO language code for compatibility
+with Indic NLP Library (https://github.com/anoopkunchukuttan/indic_nlp_library)
+"""
+flores_codes = {
+ "asm_Beng": "as",
+ "awa_Deva": "hi",
+ "ben_Beng": "bn",
+ "bho_Deva": "hi",
+ "brx_Deva": "hi",
+ "doi_Deva": "hi",
+ "eng_Latn": "en",
+ "gom_Deva": "kK",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "hne_Deva": "hi",
+ "kan_Knda": "kn",
+ "kas_Arab": "ur",
+ "kas_Deva": "hi",
+ "kha_Latn": "en",
+ "lus_Latn": "en",
+ "mag_Deva": "hi",
+ "mai_Deva": "hi",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "bn",
+ "mni_Mtei": "hi",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "hi",
+ "sat_Olck": "or",
+ "snd_Arab": "ur",
+ "snd_Deva": "hi",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+}
+
+
+flores_to_iso = {
+ "asm_Beng": "as",
+ "awa_Deva": "awa",
+ "ben_Beng": "bn",
+ "bho_Deva": "bho",
+ "brx_Deva": "brx",
+ "doi_Deva": "doi",
+ "eng_Latn": "en",
+ "gom_Deva": "gom",
+ "guj_Gujr": "gu",
+ "hin_Deva": "hi",
+ "hne_Deva": "hne",
+ "kan_Knda": "kn",
+ "kas_Arab": "ksa",
+ "kas_Deva": "ksd",
+ "kha_Latn": "kha",
+ "lus_Latn": "lus",
+ "mag_Deva": "mag",
+ "mai_Deva": "mai",
+ "mal_Mlym": "ml",
+ "mar_Deva": "mr",
+ "mni_Beng": "mnib",
+ "mni_Mtei": "mnim",
+ "npi_Deva": "ne",
+ "ory_Orya": "or",
+ "pan_Guru": "pa",
+ "san_Deva": "sa",
+ "sat_Olck": "sat",
+ "snd_Arab": "sda",
+ "snd_Deva": "sdd",
+ "tam_Taml": "ta",
+ "tel_Telu": "te",
+ "urd_Arab": "ur",
+}
diff --git a/IndicTrans2/scripts/indic_num_map.py b/IndicTrans2/scripts/indic_num_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd339a7d90eb41eab0605f15f7ea9339641cb465
--- /dev/null
+++ b/IndicTrans2/scripts/indic_num_map.py
@@ -0,0 +1,117 @@
+"""
+A dictionary mapping intended to normalize the numerals in Indic languages from
+native script to Roman script. This is done to ensure that the figures / numbers
+mentioned in native script are perfectly preserved during translation.
+"""
+INDIC_NUM_MAP = {
+ "\u09e6": "0",
+ "0": "0",
+ "\u0ae6": "0",
+ "\u0ce6": "0",
+ "\u0966": "0",
+ "\u0660": "0",
+ "\uabf0": "0",
+ "\u0b66": "0",
+ "\u0a66": "0",
+ "\u1c50": "0",
+ "\u06f0": "0",
+ "\u09e7": "1",
+ "1": "1",
+ "\u0ae7": "1",
+ "\u0967": "1",
+ "\u0ce7": "1",
+ "\u06f1": "1",
+ "\uabf1": "1",
+ "\u0b67": "1",
+ "\u0a67": "1",
+ "\u1c51": "1",
+ "\u0c67": "1",
+ "\u09e8": "2",
+ "2": "2",
+ "\u0ae8": "2",
+ "\u0968": "2",
+ "\u0ce8": "2",
+ "\u06f2": "2",
+ "\uabf2": "2",
+ "\u0b68": "2",
+ "\u0a68": "2",
+ "\u1c52": "2",
+ "\u0c68": "2",
+ "\u09e9": "3",
+ "3": "3",
+ "\u0ae9": "3",
+ "\u0969": "3",
+ "\u0ce9": "3",
+ "\u06f3": "3",
+ "\uabf3": "3",
+ "\u0b69": "3",
+ "\u0a69": "3",
+ "\u1c53": "3",
+ "\u0c69": "3",
+ "\u09ea": "4",
+ "4": "4",
+ "\u0aea": "4",
+ "\u096a": "4",
+ "\u0cea": "4",
+ "\u06f4": "4",
+ "\uabf4": "4",
+ "\u0b6a": "4",
+ "\u0a6a": "4",
+ "\u1c54": "4",
+ "\u0c6a": "4",
+ "\u09eb": "5",
+ "5": "5",
+ "\u0aeb": "5",
+ "\u096b": "5",
+ "\u0ceb": "5",
+ "\u06f5": "5",
+ "\uabf5": "5",
+ "\u0b6b": "5",
+ "\u0a6b": "5",
+ "\u1c55": "5",
+ "\u0c6b": "5",
+ "\u09ec": "6",
+ "6": "6",
+ "\u0aec": "6",
+ "\u096c": "6",
+ "\u0cec": "6",
+ "\u06f6": "6",
+ "\uabf6": "6",
+ "\u0b6c": "6",
+ "\u0a6c": "6",
+ "\u1c56": "6",
+ "\u0c6c": "6",
+ "\u09ed": "7",
+ "7": "7",
+ "\u0aed": "7",
+ "\u096d": "7",
+ "\u0ced": "7",
+ "\u06f7": "7",
+ "\uabf7": "7",
+ "\u0b6d": "7",
+ "\u0a6d": "7",
+ "\u1c57": "7",
+ "\u0c6d": "7",
+ "\u09ee": "8",
+ "8": "8",
+ "\u0aee": "8",
+ "\u096e": "8",
+ "\u0cee": "8",
+ "\u06f8": "8",
+ "\uabf8": "8",
+ "\u0b6e": "8",
+ "\u0a6e": "8",
+ "\u1c58": "8",
+ "\u0c6e": "8",
+ "\u09ef": "9",
+ "9": "9",
+ "\u0aef": "9",
+ "\u096f": "9",
+ "\u0cef": "9",
+ "\u06f9": "9",
+ "\uabf9": "9",
+ "\u0b6f": "9",
+ "\u0a6f": "9",
+ "\u1c59": "9",
+ "\u0c6f": "9",
+}
diff --git a/IndicTrans2/scripts/merge_bpcc.py b/IndicTrans2/scripts/merge_bpcc.py
new file mode 100644
index 0000000000000000000000000000000000000000..e23aea332cfa5a91fcd8e433febe1a3efcb0c203
--- /dev/null
+++ b/IndicTrans2/scripts/merge_bpcc.py
@@ -0,0 +1,54 @@
+import os
+from sys import argv
+import multiprocessing as mp
+
+
+def process_language(lang):
+
+ all_pairs = []
+ print(f"lang: {lang}")
+
+ for domain in domains:
+ src_fname = f"{base_path}/{domain}/eng_Latn-{lang}/train.eng_Latn"
+ tgt_fname = f"{base_path}/{domain}/eng_Latn-{lang}/train.{lang}"
+
+ try:
+ with open(src_fname, "r", encoding="utf-8") as f1, open(
+ tgt_fname, "r", encoding="utf-8"
+ ) as f2:
+ src_sents = [x.strip() for x in f1]
+ tgt_sents = [x.strip() for x in f2]
+ all_pairs.extend([(a, b) for (a, b) in zip(src_sents, tgt_sents)])
+ except Exception as e:
+ pass
+
+ all_pairs = list(set(all_pairs))
+ src_sents, tgt_sents = zip(*all_pairs)
+
+ os.makedirs(f"{out_dir}/eng_Latn-{lang}", exist_ok=True)
+ with open(
+ f"{out_dir}/eng_Latn-{lang}/train.eng_Latn", "w", encoding="utf-8"
+ ) as f1, open(
+ f"{out_dir}/eng_Latn-{lang}/train.{lang}", "w", encoding="utf-8"
+ ) as f2:
+ f1.write("\n".join(src_sents))
+ f2.write("\n".join(tgt_sents))
+
+
+if __name__ == "__main__":
+
+ base_path = argv[1]
+ out_dir = argv[2]
+
+ language_codes = [
+ 'asm_Beng', 'ben_Beng', 'brx_Deva', 'doi_Deva', 'gom_Deva',
+ 'guj_Gujr', 'hin_Deva', 'kan_Knda', 'kas_Arab', 'kas_Deva',
+ 'mai_Deva', 'mal_Mlym', 'mar_Deva', 'mni_Beng', 'mni_Mtei',
+ 'npi_Deva', 'ory_Orya', 'pan_Guru', 'san_Deva', 'sat_Olck',
+ 'snd_Arab', 'snd_Deva', 'tam_Taml', 'tel_Telu', 'urd_Arab'
+ ]
+
+ domains = os.listdir(base_path)
+
+ with mp.Pool(mp.cpu_count()) as pool:
+ pool.map(process_language, language_codes)
diff --git a/IndicTrans2/scripts/normalize_regex.py b/IndicTrans2/scripts/normalize_regex.py
new file mode 100644
index 0000000000000000000000000000000000000000..268ded5ec0a0d540f901632f82cae65bc49407d3
--- /dev/null
+++ b/IndicTrans2/scripts/normalize_regex.py
@@ -0,0 +1,108 @@
+from typing import Tuple, List
+import regex as re
+import sys
+from tqdm import tqdm
+from joblib import Parallel, delayed
+from indic_num_map import INDIC_NUM_MAP
+
+
+URL_PATTERN = r'\b(? Tuple[str, str]:
+ """
+ Wraps all occurences of a given pattern match that are present in both source and target sentences
+ with a do not translate tags (`` {input string} ``). This will be particularly useful
+ when some span of input string needs to be forwarded as it is and not translated.
+
+ Args:
+ src (str): source sentence.
+ tgt (str): target sentence.
+ pattern (str): pattern to search for in the source and target sentence.
+
+ Returns:
+ Tuple[str, str]: A tuple containing source and target sentences where source sentences
+ are wrapped in `` and `` tags in case of pattern matches.
+ """
+
+ # find matches in src and tgt sentence
+ src_matches = set(re.findall(pattern, src))
+ tgt_matches = set(re.findall(pattern, tgt))
+
+ # find matches that are present in both src and tgt
+ common_matches = src_matches.intersection(tgt_matches)
+
+ # wrap common match with and tag
+ for match in common_matches:
+ src = src.replace(match, f' {match} ')
+ tgt = tgt.replace(match, f' {match} ')
+
+ src = re.sub("\s+", " ", src)
+ tgt = re.sub("\s+", " ", tgt)
+
+ return src, tgt
+
+
+def normalize(src_line: str, tgt_line: str, patterns: List[str]) -> Tuple[str, str]:
+ """
+ Normalizes and wraps the spans of text that are present in both source and target sentence
+ with `` and `` tags. It first normalizes the Indic numerals in the input string to
+ Roman script. Later, it uses the source and target sentence with normalized Indic numerals to
+ wrap the spans of source sentence matching the pattern with `` and `` tags.
+
+ Args:
+ src_line (str): source sentence.
+ tgt_line (str): source sentence.
+ pattern (List[str]): list of patterns to search for in the input string.
+
+ Returns:
+ Tuple[str, str]: A tuple containing source and target sentences where source sentences
+ are wrapped in `` and `` tags in case of pattern matches.
+ """
+ src_line = normalize_indic_numerals(src_line.strip("\n"))
+ tgt_line = normalize_indic_numerals(tgt_line.strip("\n"))
+ for pattern in patterns:
+ src_line, tgt_line = wrap_with_dnt_tag(src_line, tgt_line, pattern)
+ return src_line, tgt_line
+
+
+if __name__ == "__main__":
+
+ src_infname = sys.argv[1]
+ tgt_infname = sys.argv[2]
+ src_outfname = sys.argv[3]
+ tgt_outfname = sys.argv[4]
+
+ num_lines = sum(1 for line in open(src_infname, "r"))
+ patterns = [EMAIL_PATTERN, URL_PATTERN, NUMERAL_PATTERN, OTHER_PATTERN]
+
+ with open(src_infname, "r", encoding="utf-8") as src_infile, \
+ open(tgt_infname, "r", encoding="utf-8") as tgt_infile, \
+ open(src_outfname, "w", encoding="utf-8") as src_outfile, \
+ open(tgt_outfname, "w", encoding="utf-8") as tgt_outfile:
+
+ out_lines = Parallel(n_jobs=-1, backend="multiprocessing")(
+ delayed(normalize)(src_line, tgt_line, patterns) for src_line, tgt_line in tqdm(zip(src_infile, tgt_infile), total=num_lines)
+ )
+
+ for src_line, tgt_line in tqdm(out_lines):
+ src_outfile.write(src_line + "\n")
+ tgt_outfile.write(tgt_line + "\n")
diff --git a/IndicTrans2/scripts/normalize_regex_inference.py b/IndicTrans2/scripts/normalize_regex_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8d6601c139086ee2e55c4004b0fdd74d6552e22
--- /dev/null
+++ b/IndicTrans2/scripts/normalize_regex_inference.py
@@ -0,0 +1,87 @@
+from typing import List
+import regex as re
+import sys
+from tqdm import tqdm
+from joblib import Parallel, delayed
+from indic_num_map import INDIC_NUM_MAP
+
+
+URL_PATTERN = r'\b(? str:
+ """
+ Normalize the numerals in Indic languages from native script to Roman script (if present).
+
+ Args:
+ line (str): an input string with Indic numerals to be normalized.
+
+ Returns:
+ str: an input string with the all Indic numerals normalized to Roman script.
+ """
+ return "".join([INDIC_NUM_MAP.get(c, c) for c in line])
+
+
+def wrap_with_dnt_tag(text: str, pattern: str) -> str:
+ """
+ Wraps all occurences of a given pattern match in the input string with a do not translate
+ tags (`` {input string} ``). This will be particularly useful when some span of
+ input string needs to be forwarded as it and not translated.
+
+ Args:
+ text (str): input string.
+ pattern (str): pattern to search for in the input string.
+
+ Returns:
+ str: input string with spans wrapped in `` and `` tags in case of pattern matches.
+ """
+ # find matches in input text
+ matches = set(re.findall(pattern, text))
+
+ # wrap common match with and tag
+ for match in matches:
+ text = text.replace(match, f' {match} ')
+
+ text = re.sub("\s+", " ", text)
+
+ return text
+
+
+def normalize(text: str, patterns: List[str]) -> str:
+ """
+ Normalizes and wraps the spans of input string with `` and `` tags. It first normalizes
+ the Indic numerals in the input string to Roman script. Later, it uses the input string with normalized
+ Indic numerals to wrap the spans of text matching the pattern with `` and `` tags.
+
+ Args:
+ text (str): input string.
+ pattern (List[str]): list of patterns to search for in the input string.
+
+ Returns:
+ str: normalized input string wrapped with `` and `` tags.
+ """
+ text = normalize_indic_numerals(text.strip("\n"))
+ for pattern in patterns:
+ text = wrap_with_dnt_tag(text, pattern)
+ return text
+
+
+if __name__ == "__main__":
+
+ src_infname = sys.argv[1]
+ src_outfname = sys.argv[2]
+
+ num_lines = sum(1 for line in open(src_infname, "r"))
+ patterns = [EMAIL_PATTERN, URL_PATTERN, NUMERAL_PATTERN, OTHER_PATTERN]
+
+ with open(src_infname, "r", encoding="utf-8") as src_infile, \
+ open(src_outfname, "w", encoding="utf-8") as src_outfile:
+
+ for src_line in tqdm(src_infile):
+ src_line = normalize(src_line, patterns)
+ src_outfile.write(src_line.strip() + "\n")
diff --git a/IndicTrans2/scripts/postprocess_translate.py b/IndicTrans2/scripts/postprocess_translate.py
new file mode 100644
index 0000000000000000000000000000000000000000..abe127307c26a5ba4b2494885bfdfdc145bd6c60
--- /dev/null
+++ b/IndicTrans2/scripts/postprocess_translate.py
@@ -0,0 +1,106 @@
+INDIC_NLP_LIB_HOME = "indic_nlp_library"
+INDIC_NLP_RESOURCES = "indic_nlp_resources"
+import sys
+
+from indicnlp import transliterate
+
+sys.path.append(r"{}".format(INDIC_NLP_LIB_HOME))
+from indicnlp import common
+
+common.set_resources_path(INDIC_NLP_RESOURCES)
+from indicnlp import loader
+
+loader.load()
+from sacremoses import MosesPunctNormalizer
+from sacremoses import MosesTokenizer
+from sacremoses import MosesDetokenizer
+from collections import defaultdict
+
+import indicnlp
+from indicnlp.tokenize import indic_tokenize
+from indicnlp.tokenize import indic_detokenize
+from indicnlp.normalize import indic_normalize
+from indicnlp.transliterate import unicode_transliterate
+
+from flores_codes_map_indic import flores_codes
+import sentencepiece as spm
+
+import re
+
+en_detok = MosesDetokenizer(lang="en")
+
+
+def postprocess(
+ infname: str,
+ outfname: str,
+ input_size: int,
+ lang: str,
+ transliterate: bool = False,
+ spm_model_path: str = None,
+):
+ """
+ Postprocess the output of a machine translation model in the following order:
+ - parse fairseq interactive output
+ - convert script back to native Indic script (in case of Indic languages)
+ - detokenize
+
+ Args:
+ infname (str): path to the input file containing the machine translation output.
+ outfname (str): path to the output file where the postprocessed output will be written.
+ input_size (int): number of sentences in the input file.
+ lang (str): language code of the output language.
+ transliterate (bool, optional): whether to transliterate the output text to devanagari (default: False).
+ spm_model_path (str): path of the sentence piece model.
+ """
+ if spm_model_path is None:
+ raise Exception("Please provide sentence piece model path for decoding")
+
+ sp = spm.SentencePieceProcessor(model_file=spm_model_path)
+
+ iso_lang = flores_codes[lang]
+
+ consolidated_testoutput = []
+ consolidated_testoutput = [(x, 0.0, "") for x in range(input_size)]
+
+ temp_testoutput = []
+ with open(infname, "r", encoding="utf-8") as infile:
+ temp_testoutput = list(
+ map(
+ lambda x: x.strip().split("\t"),
+ filter(lambda x: x.startswith("H-"), infile),
+ )
+ )
+ temp_testoutput = list(
+ map(lambda x: (int(x[0].split("-")[1]), float(x[1]), x[2]), temp_testoutput)
+ )
+ for sid, score, hyp in temp_testoutput:
+ consolidated_testoutput[sid] = (sid, score, hyp)
+ consolidated_testoutput = [x[2] for x in consolidated_testoutput]
+ consolidated_testoutput = [sp.decode(x.split(" ")) for x in consolidated_testoutput]
+
+ if iso_lang == "en":
+ with open(outfname, "w", encoding="utf-8") as outfile:
+ for sent in consolidated_testoutput:
+ outfile.write(en_detok.detokenize(sent.split(" ")) + "\n")
+ else:
+ xliterator = unicode_transliterate.UnicodeIndicTransliterator()
+ with open(outfname, "w", encoding="utf-8") as outfile:
+ for sent in consolidated_testoutput:
+ if transliterate:
+ outstr = indic_detokenize.trivial_detokenize(
+ xliterator.transliterate(sent, "hi", iso_lang), iso_lang
+ )
+ else:
+ outstr = indic_detokenize.trivial_detokenize(sent, iso_lang)
+ outfile.write(outstr + "\n")
+
+
+if __name__ == "__main__":
+ infname = sys.argv[1]
+ outfname = sys.argv[2]
+ input_size = int(sys.argv[3])
+ lang = sys.argv[4]
+ transliterate = sys.argv[5]
+ spm_model_path = sys.argv[6]
+
+ postprocess(infname, outfname, input_size, lang, transliterate, spm_model_path)
diff --git a/IndicTrans2/scripts/preprocess_translate.py b/IndicTrans2/scripts/preprocess_translate.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf7f43d8a243c5cba48c805d765b5d37a0644eb7
--- /dev/null
+++ b/IndicTrans2/scripts/preprocess_translate.py
@@ -0,0 +1,164 @@
+INDIC_NLP_LIB_HOME = "indic_nlp_library"
+INDIC_NLP_RESOURCES = "indic_nlp_resources"
+import sys
+
+sys.path.append(r"{}".format(INDIC_NLP_LIB_HOME))
+from indicnlp import common
+
+common.set_resources_path(INDIC_NLP_RESOURCES)
+from indicnlp import loader
+
+loader.load()
+from sacremoses import MosesPunctNormalizer
+from sacremoses import MosesTokenizer
+from sacremoses import MosesDetokenizer
+from collections import defaultdict
+
+from tqdm import tqdm
+from joblib import Parallel, delayed
+
+from indicnlp.tokenize import indic_tokenize
+from indicnlp.tokenize import indic_detokenize
+from indicnlp.normalize import indic_normalize
+from indicnlp.transliterate import unicode_transliterate
+
+import re
+from typing import Union
+from flores_codes_map_indic import flores_codes
+
+en_tok = MosesTokenizer(lang="en")
+en_normalizer = MosesPunctNormalizer()
+
+
+def preprocess_line(
+ line: str,
+ normalizer: Union[MosesPunctNormalizer, indic_normalize.IndicNormalizerFactory],
+ lang: str,
+ transliterate: bool = False,
+ remove_tag: bool = True
+) -> str:
+ """
+ Preprocess a line of text by normalizing, tokenization, and possibly transliterating it.
+
+ Args:
+ line (str): the line of text to preprocess.
+ normalizer (Union[MosesPunctNormalizer, indic_normalize.IndicNormalizerFactory]): an object that performs normalization on the text.
+ lang (str): the language of the line of text
+ transliterate (bool, optional): whether to transliterate the line of text to devanagari (default: False).
+ remove_tag (bool, optional): whether to remove the do not translate tags (`` and ``) from the line of text (default: True).
+
+ Returns:
+ str: preprocessed line of text.
+ """
+ iso_lang = flores_codes[lang]
+
+ pattern = r'(.*?)'
+ raw_matches = re.findall(pattern, line)
+
+ if iso_lang == "en":
+ processed_line = " ".join(en_tok.tokenize(en_normalizer.normalize(line.strip()), escape=False))
+ elif transliterate:
+ # transliterates from the any specific language to devanagari
+ # which is why we specify lang2_code as "hi".
+ # line = indic_detokenize.trivial_detokenize(line.strip(), lang)
+ processed_line = unicode_transliterate.UnicodeIndicTransliterator.transliterate(
+ " ".join(indic_tokenize.trivial_tokenize(normalizer.normalize(line.strip()), iso_lang)),
+ iso_lang,
+ "hi",
+ ).replace(" ् ", "्")
+ else:
+ # we only need to transliterate for joint training
+ processed_line = " ".join(
+ indic_tokenize.trivial_tokenize(normalizer.normalize(line.strip()), iso_lang)
+ )
+
+ processed_line = processed_line.replace("< dnt >", "")
+ processed_line = processed_line.replace("< / dnt >", "")
+
+ processed_line_matches = re.findall(pattern, processed_line)
+ for raw_match, processed_line_match in zip(raw_matches, processed_line_matches):
+ processed_line = processed_line.replace(processed_line_match, raw_match)
+
+ if remove_tag:
+ processed_line = re.sub("\s+", " ", processed_line.replace("", " ")).strip()
+ processed_line = re.sub("\s+", " ", processed_line.replace("", " ")).strip()
+
+ return processed_line
+
+
+def preprocess(
+ infname: str,
+ outfname: str,
+ lang: str,
+ transliterate: bool = False,
+ remove_tag: bool= True
+) -> int:
+ """
+ Preprocess the text in the input file by normalizing, tokenizing and
+ script conversation and write the output to a new file.
+
+ Args:
+ infname (str): path of the input file.
+ outfname (str): path of the output file.
+ lang (str): language of the text in the input file.
+ transliterate (bool, optional): whether to transliterate the text in input file to devanagari (default: False).
+ remove_tag (bool, optional): whether to remove the do not translate tags (`` and ``) from the text in input file (default: True).
+
+ Returns:
+ int: number of sentences in the input file
+ """
+ iso_lang = flores_codes[lang]
+
+ n = 0
+ num_lines = sum(1 for line in open(infname, "r"))
+
+ if iso_lang == "en":
+ with open(infname, "r", encoding="utf-8") as infile, open(
+ outfname, "w", encoding="utf-8"
+ ) as outfile:
+
+ out_lines = Parallel(n_jobs=-1, backend="multiprocessing")(
+ delayed(preprocess_line)(line, None, lang, transliterate, remove_tag) for line in tqdm(infile, total=num_lines)
+ )
+
+ for line in out_lines:
+ outfile.write(line + "\n")
+ n += 1
+ else:
+ normfactory = indic_normalize.IndicNormalizerFactory()
+ normalizer = normfactory.get_normalizer(iso_lang)
+ # reading
+ with open(infname, "r", encoding="utf-8") as infile, open(
+ outfname, "w", encoding="utf-8"
+ ) as outfile:
+
+ out_lines = Parallel(n_jobs=-1, backend="multiprocessing")(
+ delayed(preprocess_line)(line, normalizer, lang, transliterate, remove_tag)
+ for line in tqdm(infile, total=num_lines)
+ )
+
+ for line in out_lines:
+ outfile.write(line + "\n")
+ n += 1
+
+ return n
+
+
+if __name__ == "__main__":
+ infname = sys.argv[1]
+ outfname = sys.argv[2]
+ lang = sys.argv[3]
+ transliterate = sys.argv[4]
+ remove_tag = sys.argv[5]
+
+ if transliterate.lower() == "true":
+ transliterate = True
+ else:
+ transliterate = False
+
+ if remove_tag.lower() == "true":
+ remove_tag = True
+ else:
+ remove_tag = False
+
+ print(preprocess(infname, outfname, lang, transliterate, remove_tag))
diff --git a/IndicTrans2/scripts/remove_large_sentences.py b/IndicTrans2/scripts/remove_large_sentences.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb3fdccf6c821a192fa8fe8ed065650e9d2a4858
--- /dev/null
+++ b/IndicTrans2/scripts/remove_large_sentences.py
@@ -0,0 +1,70 @@
+import sys
+from tqdm import tqdm
+from typing import List, Tuple
+
+
+def remove_large_sentences(src_path: str, tgt_path: str) -> Tuple[int, List[str], List[str]]:
+ """
+ Removes large sentences from a parallel dataset of source and target data.
+
+ Args:
+ src_path (str): path to the file containing the source language data.
+ tgt_path (str): path to the file containing the target language data.
+
+ Returns:
+ Tuple[int, List[str], List[str]]: a tuple of
+ - an integer representing the number of sentences removed
+ - a list of strings containing the source language data after removing large sentences
+ - a list of strings containing the target language data after removing large sentences
+ """
+ count = 0
+ new_src_lines, new_tgt_lines = [], []
+
+ src_num_lines = sum(1 for line in open(src_path, "r", encoding="utf-8"))
+ tgt_num_lines = sum(1 for line in open(tgt_path, "r", encoding="utf-8"))
+ assert src_num_lines == tgt_num_lines
+
+ with open(src_path, encoding="utf-8") as f1, open(tgt_path, encoding="utf-8") as f2:
+ for src_line, tgt_line in tqdm(zip(f1, f2), total=src_num_lines):
+ src_tokens = src_line.strip().split(" ")
+ tgt_tokens = tgt_line.strip().split(" ")
+
+ if len(src_tokens) > 200 or len(tgt_tokens) > 200:
+ count += 1
+ continue
+
+ new_src_lines.append(src_line)
+ new_tgt_lines.append(tgt_line)
+
+ return count, new_src_lines, new_tgt_lines
+
+
+def create_txt(out_file: str, lines: List[str]):
+ """
+ Creates a text file and writes the given list of lines to file.
+
+ Args:
+ out_file (str): path to the output file to be created.
+ lines (List[str]): a list of strings to be written to the output file.
+ """
+ add_newline = not "\n" in lines[0]
+ outfile = open("{}".format(out_file), "w", encoding="utf-8")
+ for line in lines:
+ if add_newline:
+ outfile.write(line + "\n")
+ else:
+ outfile.write(line)
+ outfile.close()
+
+
+if __name__ == "__main__":
+
+ src_path = sys.argv[1]
+ tgt_path = sys.argv[2]
+ new_src_path = sys.argv[3]
+ new_tgt_path = sys.argv[4]
+
+ count, new_src_lines, new_tgt_lines = remove_large_sentences(src_path, tgt_path)
+ print(f"{count} lines removed due to seq_len > 200")
+ create_txt(new_src_path, new_src_lines)
+ create_txt(new_tgt_path, new_tgt_lines)
diff --git a/IndicTrans2/scripts/remove_train_devtest_overlaps.py b/IndicTrans2/scripts/remove_train_devtest_overlaps.py
new file mode 100644
index 0000000000000000000000000000000000000000..73dc38cf4d37a9acacb284caa52b2811c1212412
--- /dev/null
+++ b/IndicTrans2/scripts/remove_train_devtest_overlaps.py
@@ -0,0 +1,265 @@
+import os
+import sys
+import string
+from tqdm import tqdm
+from collections import defaultdict
+from typing import List, Tuple, Dict
+
+
+def read_lines(fname: str) -> List[str]:
+ """
+ Reads all lines from an input file and returns them as a list of strings.
+
+ Args:
+ fname (str): path to the input file to read
+
+ Returns:
+ List[str]: a list of strings, where each string is a line from the file
+ and returns an empty list if the file does not exist.
+ """
+ # if path doesnt exist, return empty list
+ if not os.path.exists(fname):
+ return []
+
+ with open(fname, "r") as f:
+ lines = f.readlines()
+ return lines
+
+
+def create_txt(out_file: str, lines: List[str]):
+ """
+ Creates a text file and writes the given list of lines to file.
+
+ Args:
+ out_file (str): path to the output file to be created.
+ lines (List[str]): a list of strings to be written to the output file.
+ """
+ add_newline = not "\n" in lines[0]
+ outfile = open("{}".format(out_file), "w", encoding="utf-8")
+ for line in lines:
+ if add_newline:
+ outfile.write(line + "\n")
+ else:
+ outfile.write(line)
+ outfile.close()
+
+
+def pair_dedup_lists(src_list: List[str], tgt_list: List[str]) -> Tuple[List[str], List[str]]:
+ """
+ Removes duplicates from two lists by pairing their elements and removing duplicates from the pairs.
+
+ Args:
+ src_list (List[str]): a list of strings from source language data.
+ tgt_list (List[str]): a list of strings from target language data.
+
+ Returns:
+ Tuple[List[str], List[str]]: a tuple of deduplicated version of "`(src_list, tgt_list)`".
+ """
+ src_tgt = list(set(zip(src_list, tgt_list)))
+ src_deduped, tgt_deduped = zip(*src_tgt)
+ return src_deduped, tgt_deduped
+
+
+def pair_dedup_files(src_file: str, tgt_file: str):
+ """
+ Removes duplicates from two files by pairing their lines and removing duplicates from the pairs.
+
+ Args:
+ src_file (str): path to the source language file to deduplicate.
+ tgt_file (str): path to the target language file to deduplicate.
+ """
+ src_lines = read_lines(src_file)
+ tgt_lines = read_lines(tgt_file)
+ len_before = len(src_lines)
+
+ src_dedupped, tgt_dedupped = pair_dedup_lists(src_lines, tgt_lines)
+
+ len_after = len(src_dedupped)
+ num_duplicates = len_before - len_after
+
+ print(f"Dropped duplicate pairs in {src_file} Num duplicates -> {num_duplicates}")
+ create_txt(src_file, src_dedupped)
+ create_txt(tgt_file, tgt_dedupped)
+
+
+def strip_and_normalize(line: str) -> str:
+ """
+ Strips and normalizes a string by lowercasing it, removing spaces and punctuation.
+
+ Args:
+ line (str): string to strip and normalize.
+
+ Returns:
+ str: stripped and normalized version of the input string.
+ """
+ # lowercase line, remove spaces and strip punctuation
+
+ # one of the fastest way to add an exclusion list and remove that
+ # list of characters from a string
+ # https://towardsdatascience.com/how-to-efficiently-remove-punctuations-from-a-string-899ad4a059fb
+ exclist = string.punctuation + "\u0964"
+ table_ = str.maketrans("", "", exclist)
+
+ line = line.replace(" ", "").lower()
+ # dont use this method, it is painfully slow
+ # line = "".join([i for i in line if i not in string.punctuation])
+ line = line.translate(table_)
+ return line
+
+
+def expand_tupled_list(list_of_tuples: List[Tuple[str, str]]) -> Tuple[List[str], List[str]]:
+ """
+ Expands a list of tuples into two lists by extracting the first and second elements of the tuples.
+
+ Args:
+ list_of_tuples (List[Tuple[str, str]]): a list of tuples, where each tuple contains two strings.
+
+ Returns:
+ Tuple[List[str], List[str]]: a tuple containing two lists, the first being the first elements of the
+ tuples in `list_of_tuples` and the second being the second elements.
+ """
+ # convert list of tuples into two lists
+ # https://stackoverflow.com/questions/8081545/how-to-convert-list-of-tuples-to-multiple-lists
+ # [(en, as), (as, bn), (bn, gu)] - > [en, as, bn], [as, bn, gu]
+ list_a, list_b = map(list, zip(*list_of_tuples))
+ return list_a, list_b
+
+
+def normalize_and_gather_all_benchmarks(devtest_dir: str) -> Dict[str, Dict[str, List[str]]]:
+ """
+ Normalizes and gathers all benchmark datasets from a directory into a dictionary.
+
+ Args:
+ devtest_dir (str): path to the directory containing the subdirectories named after the benchmark datasets, \
+ where each subdirectory is named in the format "`src_lang-tgt_lang`" and contain four files: `dev.src_lang`, \
+ `dev.tgt_lang`, `test.src_lang`, and `test.tgt_lang` representing the development and test sets for the language pair.
+
+ Returns:
+ Dict[str, Dict[str, List[str]]]: a dictionary mapping language pairs (in the format "`src_lang-tgt_lang`") \
+ to dictionaries containing two lists, the first being the normalized source language lines and the \
+ second being the normalized target language lines for all benchmark datasets.
+ """
+ devtest_pairs_normalized = defaultdict(lambda: defaultdict(list))
+
+ for benchmark in os.listdir(devtest_dir):
+ print(f"{devtest_dir}/{benchmark}")
+ for pair in tqdm(os.listdir(f"{devtest_dir}/{benchmark}")):
+ src_lang, tgt_lang = pair.split("-")
+
+ src_dev = read_lines(f"{devtest_dir}/{benchmark}/{pair}/dev.{src_lang}")
+ tgt_dev = read_lines(f"{devtest_dir}/{benchmark}/{pair}/dev.{tgt_lang}")
+ src_test = read_lines(f"{devtest_dir}/{benchmark}/{pair}/test.{src_lang}")
+ tgt_test = read_lines(f"{devtest_dir}/{benchmark}/{pair}/test.{tgt_lang}")
+
+ # if the tgt_pair data doesnt exist for a particular test set,
+ # it will be an empty list
+ if tgt_test == [] or tgt_dev == []:
+ print(f"{benchmark} does not have {src_lang}-{tgt_lang} data")
+ continue
+
+ # combine both dev and test sets into one
+ src_devtest = src_dev + src_test
+ tgt_devtest = tgt_dev + tgt_test
+
+ src_devtest = [strip_and_normalize(line) for line in src_devtest]
+ tgt_devtest = [strip_and_normalize(line) for line in tgt_devtest]
+
+ devtest_pairs_normalized[pair]["src"].extend(src_devtest)
+ devtest_pairs_normalized[pair]["tgt"].extend(tgt_devtest)
+
+ # dedup merged benchmark datasets
+ for pair in devtest_pairs_normalized:
+ src_devtest = devtest_pairs_normalized[pair]["src"]
+ tgt_devtest = devtest_pairs_normalized[pair]["tgt"]
+
+ src_devtest, tgt_devtest = pair_dedup_lists(src_devtest, tgt_devtest)
+ devtest_pairs_normalized[pair]["src"] = src_devtest
+ devtest_pairs_normalized[pair]["tgt"] = tgt_devtest
+
+ return devtest_pairs_normalized
+
+
+def remove_train_devtest_overlaps(train_dir: str, devtest_dir: str):
+ """
+ Removes overlapping data between the training and dev/test (benchmark)
+ datasets for all language pairs.
+
+ Args:
+ train_dir (str): path of the directory containing the training data.
+ devtest_dir (str): path of the directory containing the dev/test data.
+ """
+ devtest_pairs_normalized = normalize_and_gather_all_benchmarks(devtest_dir)
+
+ all_src_sentences_normalized = []
+ for key in devtest_pairs_normalized:
+ all_src_sentences_normalized.extend(devtest_pairs_normalized[key]["src"])
+ # remove duplicates in all test benchmarks across all lang pair
+ # this might not be the most optimal way but this is a tradeoff for generalizing the code at the moment
+ all_src_sentences_normalized = list(set(all_src_sentences_normalized))
+
+ src_overlaps = []
+ tgt_overlaps = []
+
+ pairs = os.listdir(train_dir)
+ for pair in pairs:
+ src_lang, tgt_lang = pair.split("-")
+
+ new_src_train, new_tgt_train = [], []
+
+ src_train = read_lines(f"{train_dir}/{pair}/train.{src_lang}")
+ tgt_train = read_lines(f"{train_dir}/{pair}/train.{tgt_lang}")
+
+ len_before = len(src_train)
+ if len_before == 0:
+ continue
+
+ src_train_normalized = [strip_and_normalize(line) for line in src_train]
+ tgt_train_normalized = [strip_and_normalize(line) for line in tgt_train]
+
+ src_devtest_normalized = all_src_sentences_normalized
+ tgt_devtest_normalized = devtest_pairs_normalized[pair]["tgt"]
+
+ # compute all src and tgt super strict overlaps for a lang pair
+ overlaps = set(src_train_normalized) & set(src_devtest_normalized)
+ src_overlaps.extend(list(overlaps))
+
+ overlaps = set(tgt_train_normalized) & set(tgt_devtest_normalized)
+ tgt_overlaps.extend(list(overlaps))
+
+ # dictionaries offer O(1) lookup
+ src_overlaps_dict, tgt_overlaps_dict = {}, {}
+ for line in src_overlaps:
+ src_overlaps_dict[line] = 1
+ for line in tgt_overlaps:
+ tgt_overlaps_dict[line] = 1
+
+ # loop to remove the ovelapped data
+ idx = 0
+ for src_line_norm, tgt_line_norm in tqdm(
+ zip(src_train_normalized, tgt_train_normalized), total=len_before
+ ):
+ if src_overlaps_dict.get(src_line_norm, None):
+ continue
+ if tgt_overlaps_dict.get(tgt_line_norm, None):
+ continue
+
+ new_src_train.append(src_train[idx])
+ new_tgt_train.append(tgt_train[idx])
+ idx += 1
+
+ len_after = len(new_src_train)
+ print(
+ f"Detected overlaps between train and devetest for {pair} is {len_before - len_after}"
+ )
+ print(f"saving new files at {train_dir}/{pair}/")
+ create_txt(f"{train_dir}/{pair}/train.{src_lang}", new_src_train)
+ create_txt(f"{train_dir}/{pair}/train.{tgt_lang}", new_tgt_train)
+
+
+if __name__ == "__main__":
+
+ train_data_dir = sys.argv[1]
+ # benchmarks directory should contains all the test sets
+ devtest_data_dir = sys.argv[2]
+
+ remove_train_devtest_overlaps(train_data_dir, devtest_data_dir)
diff --git a/IndicTrans2/train.sh b/IndicTrans2/train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ee60d0551eacce5bb6cdd65f99675c844c82dd0f
--- /dev/null
+++ b/IndicTrans2/train.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# This script trains the translation model on the binarized data using fairseq.
+
+
+echo `date`
+exp_dir=$1 # path of the experiment directory
+model_arch=${2:-"transformer_18_18"} # model architecture (defaults to `transformer_18_18`)
+
+fairseq-train $exp_dir/final_bin \
+--max-source-positions=256 \
+--max-target-positions=256 \
+--source-lang=SRC \
+--target-lang=TGT \
+--max-update=1000000 \
+--save-interval-updates=2500 \
+--arch=$model_arch \
+--activation-fn gelu \
+--criterion=label_smoothed_cross_entropy \
+--label-smoothing=0.1 \
+--optimizer adam \
+--adam-betas "(0.9, 0.98)" \
+--lr-scheduler=inverse_sqrt \
+--clip-norm 1.0 \
+--warmup-init-lr 1e-07 \
+--lr 5e-4 \
+--warmup-updates 4000 \
+--dropout 0.2 \
+--save-dir $exp_dir/model \
+--keep-last-epochs 5 \
+--keep-interval-updates 3 \
+--patience 10 \
+--skip-invalid-size-inputs-valid-test \
+--fp16 \
+--user-dir model_configs \
+--update-freq=32 \
+--distributed-world-size 8 \
+--num-workers 24 \
+--max-tokens 1024 \
+--eval-bleu \
+--eval-bleu-args "{\"beam\": 1, \"lenpen\": 1.0, \"max_len_a\": 1.2, \"max_len_b\": 10}" \
+--eval-bleu-detok moses \
+--eval-bleu-remove-bpe sentencepiece \
+--eval-bleu-print-samples \
+--best-checkpoint-metric bleu \
+--maximize-best-checkpoint-metric \
+--task translation
diff --git a/IndicTrans2/translation_guidelines.pdf b/IndicTrans2/translation_guidelines.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..efc8fa6a65825bb35d9dbf797b860362393795b2
--- /dev/null
+++ b/IndicTrans2/translation_guidelines.pdf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20d6b1f2afa23013a5169403ee73ba4042a8ffad2e7bf14f2af790b1c7d9b3a9
+size 105266
diff --git a/IndicTrans2/utils.map_token_lang.tsv b/IndicTrans2/utils.map_token_lang.tsv
new file mode 100644
index 0000000000000000000000000000000000000000..e5db3f34ce57efd79dca9b7e5a6d58273179df0b
--- /dev/null
+++ b/IndicTrans2/utils.map_token_lang.tsv
@@ -0,0 +1,26 @@
+asm_Beng hi
+ben_Beng hi
+brx_Deva hi
+doi_Deva hi
+gom_Deva hi
+eng_Latn en
+guj_Gujr hi
+hin_Deva hi
+kan_Knda hi
+kas_Arab ar
+kas_Deva hi
+mai_Deva hi
+mar_Deva hi
+mal_Mlym hi
+mni_Beng hi
+mni_Mtei en
+npi_Deva hi
+ory_Orya hi
+pan_Guru hi
+san_Deva hi
+sat_Olck hi
+snd_Arab ar
+snd_Deva hi
+tam_Taml hi
+tel_Telu hi
+urd_Arab ar