Tzktz's picture
Upload 7664 files
6fc683c verified
raw
history blame contribute delete
831 Bytes
#!/bin/bash
wget https://guillaumejaume.github.io/FUNSD/dataset.zip
unzip dataset.zip && mv dataset data && rm -rf dataset.zip __MACOSX
python preprocess.py --data_dir data/training_data/annotations \
--data_split train \
--output_dir data \
--model_name_or_path bert-base-uncased \
--max_len 510
python preprocess.py --data_dir data/testing_data/annotations \
--data_split test \
--output_dir data \
--model_name_or_path bert-base-uncased \
--max_len 510
cat data/train.txt | cut -d$'\t' -f 2 | grep -v "^$"| sort | uniq > data/labels.txt