#/bin/bash root_dir=$(pwd) echo "Setting up the environment in the $root_dir" # -------------------------------------------------------------- # create and activate the virtual environment # -------------------------------------------------------------- echo "Creating a virtual environment with python3" conda create -n itv2_hf python=3.9 -y conda activate itv2_hf echo "Installing all the dependencies" conda install pip python3 -m pip install --upgrade pip # -------------------------------------------------------------- # PyTorch Installation # -------------------------------------------------------------- python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu118 # -------------------------------------------------------------- # Install additional utility packages # -------------------------------------------------------------- python3 -m pip install nltk sacremoses pandas regex mock transformers>=4.33.2 mosestokenizer python3 -c "import nltk; nltk.download('punkt')" python3 -m pip install bitsandbytes scipy accelerate datasets flash-attn>=2.1 # -------------------------------------------------------------- # Sentencepiece for tokenization # -------------------------------------------------------------- # build the cpp binaries from the source repo in order to use the command line utility # source repo: https://github.com/google/sentencepiece python3 -m pip install sentencepiece # ----------------------------------------------------------------- # Install IndicTrans2 tokenizer and its dependencies # ----------------------------------------------------------------- git clone https://github.com/VarunGumma/IndicTransToolkit cd IndicTransToolkit python3 -m pip install --editable ./ cd $root_dir echo "Setup completed!"