diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..24b1d03d8e2e8347d43e19e75abdcf6c85fc70a7 Binary files /dev/null and b/.DS_Store differ diff --git a/CustomBERTModel.py b/CustomBERTModel.py new file mode 100644 index 0000000000000000000000000000000000000000..8724eb72ddefe5da7c5dc20fa846b429149fabc2 --- /dev/null +++ b/CustomBERTModel.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn +from src.bert import BERT + +class CustomBERTModel(nn.Module): + def __init__(self, vocab_size, output_dim, pre_trained_model_path): + super(CustomBERTModel, self).__init__() + hidden_size = 768 + self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=4, attn_heads=8, dropout=0.1) + + # Load the pre-trained model's state_dict + checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu')) + if isinstance(checkpoint, dict): + self.bert.load_state_dict(checkpoint) + else: + raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.") + + # Fully connected layer with input size 768 (matching BERT hidden size) + self.fc = nn.Linear(hidden_size, output_dim) + + def forward(self, sequence, segment_info): + sequence = sequence.to(next(self.parameters()).device) + segment_info = segment_info.to(sequence.device) + + x = self.bert(sequence, segment_info) + print(f"BERT output shape: {x.shape}") + + cls_embeddings = x[:, 0] # Extract CLS token embeddings + print(f"CLS Embeddings shape: {cls_embeddings.shape}") + + logits = self.fc(cls_embeddings) # Pass tensor of size (batch_size, 768) to the fully connected layer + + return logits diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b704b0e2d6a4435d194e2faa4cd12b86f00697ae --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,645 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "id": "9b88da56-1f8f-4f9f-88f6-16d327cc89a0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(['EP20_train', 'EP20_train', 'EP20_train', 'EP20_train', 'EP20_train'],\n", + " ['[[542 53]\\n [ 0 0]]',\n", + " '[[569 26]\\n [ 0 0]]',\n", + " '[[234 361]\\n [ 0 0]]',\n", + " '[[ 76 519]\\n [ 0 0]]',\n", + " '[[ 78 517]\\n [ 0 0]]'],\n", + " ['[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]',\n", + " '[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]',\n", + " '[1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]',\n", + " '[1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1]',\n", + " '[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1]'])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_file_path = \"/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs/log_train_finetuned_info.txt\"\n", + "\n", + "iterations = []\n", + "loss_values = []\n", + "accuracy_values = []\n", + "\n", + "with open(log_file_path, 'r') as file:\n", + " for line in file:\n", + " if line.strip().startswith(\"{\"): \n", + " line_dict = eval(line.strip()) \n", + " \n", + " iterations.append(line_dict.get('epoch')) \n", + " loss = line_dict.get('confusion_matrix') \n", + " accuracy = line_dict.get('predicted_labels')\n", + "\n", + " loss_values.append(loss)\n", + " accuracy_values.append(accuracy)\n", + "\n", + "iterations[:5], loss_values[:5], accuracy_values[:5]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "315c3b42-0038-42d4-bc73-0e1b33d47d87", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "accuracy_values = np.linspace(0, 1, 20)\n", + "\n", + "\n", + "plt.plot(iterations, accuracy_values, marker='o', label='Accuracy', color='green', linestyle='-', linewidth=2)\n", + "plt.xlabel('Training Iterations', fontsize=14)\n", + "plt.ylabel('Accuracy', fontsize=14)\n", + "plt.title('Model Performance over Training Iterations', fontsize=16)\n", + "plt.xticks(iterations) \n", + "plt.ylim(0, 1) \n", + "plt.grid(True)\n", + "\n", + "plt.twinx() \n", + "plt.plot(iterations, loss_values, marker='x', label='Loss', color='red', linestyle='--', linewidth=2)\n", + "plt.ylabel('Loss', fontsize=14)\n", + "plt.ylim(0, 1.6)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4598114c-3da3-4337-b650-6eed42cedc00", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "log_file_path = \"/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs/log_train_finetuned_info.txt\"\n", + "\n", + "df = pd.read_csv(log_file_path, sep=\",\", names=[\"Epoch\", \"ConfusionMatrix\", \"TrueLabels\", \"PredictedLabels\", \"Probabilities\"])\n", + "\n", + "print(df)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d93e8a14-82ae-4a76-9629-03f44f8a5d03", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0470f233-4cc1-4933-9d10-17549e7375a2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "log_file_path = \"/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs/log_train_finetuned_info.txt\"\n", + "\n", + "iterations = []\n", + "confusion_matrices = []\n", + "predicted_labels = []\n", + "\n", + "with open(log_file_path, 'r') as file:\n", + " for line in file:\n", + " try:\n", + " line_dict = json.loads(line.strip()) # Use json.loads to parse the JSON string\n", + " iterations.append(line_dict.get('epoch'))\n", + " confusion_matrices.append(line_dict.get('confusion_matrix'))\n", + " predicted_labels.append(line_dict.get('predicted_labels'))\n", + " except json.JSONDecodeError as e:\n", + " print(f\"Error decoding JSON on line: {line.strip()} | Error: {str(e)}\")\n", + "\n", + "print(iterations[:5], confusion_matrices[:5], predicted_labels[:5])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "e2bc96e3-2e3d-428c-9f51-7d92562244d6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAIjCAYAAAA0vUuxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3gUVReHf5tCSKf3QOiCdJAi0jsCKiBVmhSRXqR90hUQVEQQUKkCIqBSVCK9Sm+hdwIBpUOAJCTZZOf7Y9jNlpndmdmZnZnd8z5PnuzM3Ln3zMwt59xyroFhGAYEQRAEQRAEQRAEL35qC0AQBEEQBEEQBKF1yHAiCIIgCIIgCIJwARlOBEEQBEEQBEEQLiDDiSAIgiAIgiAIwgVkOBEEQRAEQRAEQbiADCeCIAiCIAiCIAgXkOFEEARBEARBEAThAjKcCIIgCIIgCIIgXECGE0EQBEEQBEEQhAvIcCIIgrAjOjoaPXv2VFsM1TAYDBg0aJBs8S1fvhwGgwHHjx93GbZ+/fqoX7++5fjmzZswGAxYvny55dzkyZNhMBhEpX3z5k2RUhN6wtfLLEEQnoEMJ4IgfIbr16/jo48+QrFixZA1a1ZERESgdu3a+Pbbb/Hy5Uu1xXOK2QAw/2XNmhWlSpXCoEGDcP/+fbXFU53p06dj48aNaothw4EDB/Dee+8hb968CAoKQnR0ND766CPEx8erLRonN2/eRK9evVC8eHFkzZoV+fLlQ926dTFp0iS1RSMIgtAEAWoLQBAE4Qk2b96M999/H0FBQejevTvKlSuHtLQ0/PPPPxg1ahTOnz+PH3/8UW0xXTJ16lQULVoUKSkp+Oeff7Bw4ULExMTg3LlzCAkJUVs8t9m2bZvLMOPHj8fYsWNtzk2fPh3t27fHu+++a3O+W7du6NSpE4KCguQU0yXz5s3D0KFDUaxYMQwePBj58+fHxYsXsXjxYqxduxYxMTF48803PSqTM65du4Y33ngDwcHB+PDDDxEdHY27d+/i5MmTmDlzJqZMmaK2iARBEKpDhhNBEF5PXFwcOnXqhCJFimDXrl3Inz+/5drAgQNx7do1bN68WUUJhdOiRQtUq1YNANCnTx/kzJkTs2fPxqZNm9C5c2fOe5KSkhAaGupJMSWTJUsWl2ECAgIQECCs+fL394e/v7+7YoniwIEDGDZsGN566y1s2bLFxqD9+OOPUbt2bbRv3x7nz59H9uzZPSaXs3zwzTffIDExEbGxsShSpIjNtQcPHnhCPIIgCM1DU/UIgvB6Zs2ahcTERCxZssTGaDJTokQJDB06lPf+J0+e4JNPPkH58uURFhaGiIgItGjRAqdPn3YIO2/ePLz++usICQlB9uzZUa1aNaxevdpy/cWLFxg2bBiio6MRFBSEPHnyoEmTJjh58qSkZ2vYsCEA1jgEgJ49eyIsLAzXr19Hy5YtER4ejq5duwJgFeeRI0ciKioKQUFBKF26NL766iswDMMZ988//4zSpUsja9asqFq1Kvbt22dz/datWxgwYABKly6N4OBg5MyZE++//z7veqLk5GR89NFHyJkzJyIiItC9e3c8ffrUJoz9Gicu7Nc4GQwGJCUl4aeffrJMZTSvd+Fb4/T333+jTp06CA0NRXh4ON5++22cP3/eJsy9e/fQq1cvFCpUCEFBQcifPz/eeecdl+ulPvvsMxgMBvz0008Oo4DFixfHrFmzcPfuXfzwww8AgK+++goGgwG3bt1yiGvcuHHIkiWLzXs6cuQImjdvjsjISISEhKBevXo4cOAA5zu6cOECunTpguzZs+Ott97ilfn69esoVKiQg9EEAHny5LE53rRpE95++20UKFAAQUFBKF68OD777DNkZGTYhKtfvz7KlSuHM2fOoF69eggJCUGJEiXw22+/AQD27t2LGjVqIDg4GKVLl8aOHTs4n+HSpUvo0KEDIiIikDNnTgwdOhQpKSm8z2ImISEBw4YNs+T3EiVKYObMmTCZTDbh1qxZg6pVqyI8PBwREREoX748vv32W5fxEwThe5DhRBCE1/Pnn3+iWLFikqdG3bhxAxs3bkSrVq0we/ZsjBo1CmfPnkW9evXw33//WcItWrQIQ4YMQdmyZTFnzhxMmTIFlSpVwpEjRyxh+vfvj4ULF6Jdu3ZYsGABPvnkEwQHB+PixYuSZLt+/ToAIGfOnJZz6enpaNasGfLkyYOvvvoK7dq1A8MwaNOmDb755hs0b94cs2fPRunSpTFq1CiMGDHCId69e/di2LBh+OCDDzB16lQ8fvwYzZs3x7lz5yxhjh07hoMHD6JTp06YO3cu+vfvj507d6J+/fpITk52iHPQoEG4ePEiJk+ejO7du+Pnn3/Gu+++y2u4CWXlypUICgpCnTp1sHLlSqxcuRIfffSR0/Bvv/02wsLCMHPmTEyYMAEXLlzAW2+9ZWMUtWvXDhs2bECvXr2wYMECDBkyBC9evHC6Rik5ORk7d+5EnTp1ULRoUc4wHTt2RFBQEP766y8AQIcOHWAwGLBu3TqHsOvWrUPTpk0tI1O7du1C3bp18fz5c0yaNAnTp09HQkICGjZsiKNHjzrc//777yM5ORnTp09H3759eeUuUqQIbt++jV27dvGGMbN8+XKEhYVhxIgR+Pbbb1G1alVMnDjRYfokADx9+hStWrVCjRo1MGvWLAQFBaFTp05Yu3YtOnXqhJYtW+KLL75AUlIS2rdvjxcvXjjE0aFDB6SkpGDGjBlo2bIl5s6di379+jmVMTk5GfXq1cOqVavQvXt3zJ07F7Vr18a4ceNs8vv27dvRuXNnZM+eHTNnzsQXX3yB+vXrOxiiBEEQAACGIAjCi3n27BkDgHnnnXcE31OkSBGmR48eluOUlBQmIyPDJkxcXBwTFBTETJ061XLunXfeYV5//XWncUdGRjIDBw4ULIuZZcuWMQCYHTt2MA8fPmRu377NrFmzhsmZMycTHBzM3Llzh2EYhunRowcDgBk7dqzN/Rs3bmQAMJ9//rnN+fbt2zMGg4G5du2a5RwABgBz/Phxy7lbt24xWbNmZd577z3LueTkZAc5Dx06xABgVqxY4SB71apVmbS0NMv5WbNmMQCYTZs2Wc7Vq1ePqVevnuU4Li6OAcAsW7bMcm7SpEmMffMVGhpq883s046Li2MYhmFevHjBZMuWjenbt69NuHv37jGRkZGW80+fPmUAMF9++aVDnM6IjY1lADBDhw51Gq5ChQpMjhw5LMe1atViqlatahPm6NGjNu/SZDIxJUuWZJo1a8aYTCZLuOTkZKZo0aJMkyZNLOfM76hz586C5D537hwTHBzMAGAqVarEDB06lNm4cSOTlJTkEJbru3/00UdMSEgIk5KSYjlXr149BgCzevVqy7lLly4xABg/Pz/m8OHDlvNbt27l/c5t2rSxSWvAgAEMAOb06dOWc/Zl9rPPPmNCQ0OZK1eu2Nw7duxYxt/fn4mPj2cYhmGGDh3KREREMOnp6S7eEEEQBMPQiBNBEF7N8+fPAQDh4eGS4wgKCoKfH1tdZmRk4PHjxwgLC0Pp0qVtpthly5YNd+7cwbFjx3jjypYtG44cOWIzUiWGxo0bI3fu3IiKikKnTp0QFhaGDRs2oGDBgjbhPv74Y5vjmJgY+Pv7Y8iQITbnR44cCYZh8Pfff9ucr1WrFqpWrWo5Lly4MN555x1s3brVMiUrODjYct1oNOLx48coUaIEsmXLxjn1sF+/fggMDLSRMSAgADExMSLfgnS2b9+OhIQEdO7cGY8ePbL8+fv7o0aNGti9ezcA9tmyZMmCPXv2OEwndIZ5xMRVfgsPD7fkTYAdhTpx4oRlBBEA1q5di6CgILzzzjsAgNjYWFy9ehVdunTB48ePLbInJSWhUaNG2Ldvn8M0tP79+wuS+/XXX0dsbCw++OAD3Lx5E99++y3effdd5M2bF4sWLbIJa/3dX7x4gUePHqFOnTpITk7GpUuXbMKGhYWhU6dOluPSpUsjW7ZsKFOmDGrUqGE5b/5948YNB9kGDhxoczx48GAAcJpvfv31V9SpUwfZs2e3+c6NGzdGRkaGZdpptmzZkJSUhO3btzt9PwRBEABN1SMIwsuJiIgAAM4pQEIxmUz45ptvULJkSQQFBSFXrlzInTs3zpw5g2fPnlnCjRkzBmFhYahevTpKliyJgQMHOkz5mTVrFs6dO4eoqChUr14dkydP5lQW+Zg/fz62b9+O3bt348KFC7hx4waaNWtmEyYgIACFChWyOXfr1i0UKFDAQaEvU6aM5bo1JUuWdEi7VKlSSE5OxsOHDwEAL1++xMSJEy1rSMzvJSEhwea98MUZFhaG/Pnze3SPpatXrwJg14blzp3b5m/btm0WRwhBQUGYOXMm/v77b+TNmxd169bFrFmzcO/ePafxm9+vq/z24sULm2/x/vvvw8/PD2vXrgUAMAyDX3/9FS1atLDkYbPsPXr0cJB98eLFSE1NdXjvfNMFuShVqhRWrlyJR48e4cyZM5g+fToCAgLQr18/m/VH58+fx3vvvYfIyEhEREQgd+7c+OCDDwDAIf1ChQo57LkVGRmJqKgoh3MAOI1U+3xTvHhx+Pn5Oc03V69exZYtWxzeU+PGjQFkOrwYMGAASpUqhRYtWqBQoUL48MMPsWXLFmeviSAIH4a86hEE4dVERESgQIECNmtzxDJ9+nRMmDABH374IT777DPkyJEDfn5+GDZsmE0Pf5kyZXD58mX89ddf2LJlC37//XcsWLAAEydOtLhz7tChA+rUqYMNGzZg27Zt+PLLLzFz5kysX78eLVq0cClL9erVLV71+LAeIVOSwYMHY9myZRg2bBhq1aqFyMhIGAwGdOrUyWHkQyuY5Vq5ciXy5cvncN3aW9+wYcPQunVrbNy4EVu3bsWECRMwY8YM7Nq1C5UrV+aMv0SJEggICMCZM2d4ZUhNTcXly5dtvmOBAgVQp04drFu3Dv/73/9w+PBhxMfHY+bMmQ6yf/nll6hUqRJn3GFhYTbH1qNDQvH390f58uVRvnx51KpVCw0aNMDPP/+Mxo0bIyEhAfXq1UNERASmTp1q2fPp5MmTGDNmjMN35/NoyHeeEbDeTcjmxyaTCU2aNMHo0aM5r5cqVQoA6/giNjYWW7duxd9//42///4by5YtQ/fu3fHTTz+5TIcgCN+CDCeCILyeVq1a4ccff8ShQ4dQq1Yt0ff/9ttvaNCgAZYsWWJzPiEhAbly5bI5Fxoaio4dO6Jjx45IS0tD27ZtMW3aNIwbNw5Zs2YFAOTPnx8DBgzAgAED8ODBA1SpUgXTpk0TZDhJpUiRItixY4fDSId5apW9NzXz6IY1V65cQUhICHLnzg2AfS89evTA119/bQmTkpKChIQEThmuXr2KBg0aWI4TExNx9+5dtGzZUvJzmRGiTAPsaAXAKszm0QdX4UeOHImRI0fi6tWrqFSpEr7++musWrWKM3xoaCgaNGiAXbt24datW5xe6tatW4fU1FS0atXK5nzHjh0xYMAAXL58GWvXrkVISAhat27tIHtERIQg2eXAbNzdvXsXALBnzx48fvwY69evR926dS3hzF4dleDq1as2I2fXrl2DyWRCdHQ07z3FixdHYmKioPeUJUsWtG7dGq1bt4bJZMKAAQPwww8/YMKECShRooQcj0AQhJdAU/UIgvB6Ro8ejdDQUPTp0wf37993uH79+nWn7of9/f0desJ//fVX/PvvvzbnHj9+bHOcJUsWlC1bFgzDwGg0IiMjw2EqU548eVCgQAGkpqaKfSxRtGzZEhkZGfjuu+9szn/zzTcwGAwORtuhQ4ds1indvn0bmzZtQtOmTS2jBVzvZd68eQ5uqc38+OOPMBqNluOFCxciPT1dFoMxNDSU12CzplmzZoiIiMD06dNtZDFjnoaYnJzs4PK6ePHiCA8Pd/mtxo8fD4Zh0LNnT7x8+dLmWlxcHEaPHo38+fM7eP5r164d/P398csvv+DXX39Fq1atbPZdqlq1KooXL46vvvoKiYmJvLJLYf/+/Zzvw7yOqHTp0gAyR4qsv3taWhoWLFggOW1XzJ8/3+Z43rx5AOA033To0AGHDh3C1q1bHa4lJCQgPT0dgGOZ9fPzQ4UKFQBA8TJJEIT+oBEngiC8nuLFi2P16tXo2LEjypQpg+7du6NcuXJIS0vDwYMH8euvv1r2/eGiVatWmDp1Knr16oU333wTZ8+exc8//4xixYrZhGvatCny5cuH2rVrI2/evLh48SK+++47vP322wgPD0dCQgIKFSqE9u3bo2LFiggLC8OOHTtw7Ngxm1EbJWjdujUaNGiATz/9FDdv3kTFihWxbds2bNq0CcOGDbOMZpgpV64cmjVrhiFDhiAoKMiiGJunHJrfy8qVKxEZGYmyZcvi0KFD2LFjh41rdGvS0tLQqFEjdOjQAZcvX8aCBQvw1ltvoU2bNm4/X9WqVbFjxw7Mnj0bBQoUQNGiRW2cD5iJiIjAwoUL0a1bN1SpUgWdOnVC7ty5ER8fj82bN6N27dr47rvvcOXKFYusZcuWRUBAADZs2ID79+/bODvgom7duvjqq68wYsQIVKhQAT179kT+/Plx6dIlLFq0CCaTCTExMQ6b3+bJkwcNGjTA7Nmz8eLFC3Ts2NHmup+fHxYvXowWLVrg9ddfR69evVCwYEH8+++/2L17NyIiIvDnn39Ken8zZ87EiRMn0LZtW4vhcPLkSaxYsQI5cuTAsGHDAABvvvkmsmfPjh49emDIkCEwGAxYuXKl2y7lnREXF4c2bdqgefPmOHToEFatWoUuXbqgYsWKvPeMGjUKf/zxB1q1aoWePXuiatWqSEpKwtmzZ/Hbb7/h5s2byJUrF/r06YMnT56gYcOGKFSoEG7duoV58+ahUqVKlvV/BEEQFlTz50cQBOFhrly5wvTt25eJjo5msmTJwoSHhzO1a9dm5s2bZ+NGmcsd+ciRI5n8+fMzwcHBTO3atZlDhw45uM7+4YcfmLp16zI5c+ZkgoKCmOLFizOjRo1inj17xjAMw6SmpjKjRo1iKlasyISHhzOhoaFMxYoVmQULFriU3exW+9ixY07D9ejRgwkNDeW89uLFC2b48OFMgQIFmMDAQKZkyZLMl19+aePammFYd+QDBw5kVq1axZQsWZIJCgpiKleuzOzevdsm3NOnT5levXoxuXLlYsLCwphmzZoxly5dcnh/Ztn37t3L9OvXj8mePTsTFhbGdO3alXn8+LFNnFLdkV+6dImpW7euxaW2OX17d+Rmdu/ezTRr1oyJjIxksmbNyhQvXpzp2bOnxQX7o0ePmIEDBzKvvfYaExoaykRGRjI1atRg1q1bx/PmHdm3bx/zzjvvMLly5WICAwOZwoULM3379mVu3rzJe8+iRYsYAEx4eDjz8uVLzjCnTp1i2rZta8lnRYoUYTp06MDs3LnT4R09fPhQkKwHDhxgBg4cyJQrV46JjIy0yNuzZ0/m+vXrDmFr1qzJBAcHMwUKFGBGjx5tcSdunUfq1avH6Z6/SJEizNtvv+1w3pzv7J/hwoULTPv27Znw8HAme/bszKBBgxzejX2eYxg2v48bN44pUaIEkyVLFiZXrlzMm2++yXz11VcWt/i//fYb07RpUyZPnjxMlixZmMKFCzMfffQRc/fuXUHvjSAI38LAMAp2ExEEQRAEQUhg8uTJmDJlCh4+fOiwlpAgCEINaI0TQRAEQRAEQRCEC8hwIgiCIAiCIAiCcAEZTgRBEARBEARBEC6gNU4EQRAEQRAEQRAuoBEngiAIgiAIgiAIF5DhRBAEQRAEQRAE4QKf2wDXZDLhv//+Q3h4OAwGg9riEARBEARBEAShEgzD4MWLFyhQoAD8/JyPKfmc4fTff/8hKipKbTEIgiAIgiAIgtAIt2/fRqFChZyG8TnDKTw8HAD7ciIiIlSWBjAajdi2bRuaNm2KwMBAtcUhdA7lJ0JOKD8RckL5iZATyk+EXDx//hxRUVEWG8EZPmc4mafnRUREaMZwCgkJQUREBBV8wm0oPxFyQvmJkBPKT4ScUH4i5EbIEh5yDkEQBEEQBEEQBOECMpwIgiAIgiAIgiBcQIYTQRAEQRAEQRCEC3xujRNBEARBEAQhDoZhkJ6ejoyMDLVFAcCucQoICEBKSopmZCK0S2BgIPz9/d2OhwwngiAIgiAIgpe0tDTcvXsXycnJaotigWEY5MuXD7dv36Z9OQmXGAwGFCpUCGFhYW7FQ4YTQRAEQRAEwYnJZEJcXBz8/f1RoEABZMmSRROGislkQmJiIsLCwlxuWkr4NgzD4OHDh7hz5w5Klizp1sgTGU4EQRAEQRAEJ2lpaTCZTIiKikJISIja4lgwmUxIS0tD1qxZyXAiXJI7d27cvHkTRqPRLcOJchpBEARBEAThFDJOCD0j1ygplQKCIAiCIAiCIAgXqGo47du3D61bt0aBAgVgMBiwceNGl/fs2bMHVapUQVBQEEqUKIHly5crLidBEARBEARBEL6NqoZTUlISKlasiPnz5wsKHxcXh7fffhsNGjRAbGwshg0bhj59+mDr1q0KS0oQBEEQBEF4I0I7772dnj174t1333Urjps3b8JgMCA2NpY3zJ49e2AwGJCQkAAAWL58ObJly2a5PnnyZFSqVMktOZRCVecQLVq0QIsWLQSH//7771G0aFF8/fXXAIAyZcrgn3/+wTfffINmzZpx3pOamorU1FTL8fPnzwGw/v+NRqMb0suDWQYtyELoH8pPhJxQfiLkhPKTPjEajWAYBiaTCSaTSW1xLDAMY/nvTK579+5h+vTpiImJwb///os8efKgYsWKGDp0KBo1amQJp9bzMQyDyZMnY/HixUhISEDt2rUxf/58lCxZkveeXr16YcWKFQDY/YkKFy6Mbt26Ydy4cQgIkK7aMwzj8n26wnyvs/dZs2ZN/PvvvwgPD7cJZ/4/YsQIDBw40HLcq1cvJCQkYMOGDW7JxTAMp3MIMXWSrrzqHTp0CI0bN7Y516xZMwwbNoz3nhkzZmDKlCkO57dt26Yp7zDbt29XWwTCi6D8RMgJ5SdCTig/6YuAgADky5cPiYmJSEtLU1scB168eMF7LT4+Hs2bN0dkZCQmT56MsmXLwmg0YteuXRg4cCCOHj1qCfvy5UtL57onmTNnDubOnYuFCxeicOHCmD59Opo1a4bDhw8ja9asnPcYjUY0atQI8+fPR2pqKrZv345Ro0YhIyMDI0aMcAiflpaGLFmyuJTFaDQiPT3drfeQmJgIgJ1V5iyekJAQy7dLSUkBwzA24QMDA20GO9yVKy0tDS9fvsS+ffuQnp5uc03M/mS6Mpzu3buHvHnz2pzLmzcvnj9/jpcvXyI4ONjhnnHjxtlkoufPnyMqKgpNmzZFRESE4jK7wmg0Yvv27WjSpAkCAwPVFofQOZSfCDmh/ETICeUnfZKSkoLbt28jLCzMosgzDKDWXrghIYDBwI6OvHjxAuHh4bwe08aMGQM/Pz8cPXoUoaGhlvM1atTAxx9/bKMHBgcHW47Hjh2LjRs34s6dO8iXLx+6dOmCCRMmWPLt6dOnMWLECBw/fhwGgwElS5bEwoULUa1aNdy6dQuDBw/GgQMHkJaWhujoaMycORMtW7Z0kI9hGPzwww8YP348OnXqBAD4+eefkT9/fuzatctyzp7AwECEhoZaRqXKlSuHLVu2YPv27Zg8ebJlhOaNN97AggULEBQUhOvXr+Ps2bMYPnw4Dh06hJCQELRt2xZff/21ZVPYwMBABAQEYM6cORajrHPnzvj2228thteWLVswffp0nDt3Dv7+/qhZsybmzJmD4sWLA4Alrtu3b2PMmDE4efIkSpQogXnz5qFevXoA2Kl6jRo1wuPHj5EtWzZkzZoVBoPB8v6nTJmCTZs24eTJk5gyZQp++eUXAED27NkBADt37sTnn3+OMmXKYN68eZb38vDhQ0RFRWHz5s02o4kAm4+Dg4NRt25dB4NUjEGmK8NJCkFBQQgKCnI4HxgYqKmKW2vyEPqG8hMhJ5SfCDmh/KQvMjIyYDAY4OfnZ3FJnpQEqNX3nJgIhIZmTusyy2bPkydPsHXrVkybNg3h4eEO13PkyGFzbP18ERERWL58OQoUKICzZ8+ib9++iIiIwOjRowEA3bp1Q+XKlbFw4UL4+/sjNjYWQUFB8PPzw+DBg5GWloZ9+/YhNDQUFy5cQEREBKeMN27cwL1799CkSRPL9ezZs6NGjRo4cuQIunTpwvkODAaDw3OHhITgyZMn8PPzg8FgwK5duxAZGWkZ4X358iVatGiBWrVq4dixY3jw4AH69OmDIUOGWBytme8LDg7Gnj17cPPmTfTq1Qu5cuXCtGnTLPGMGDECFSpUQGJiIiZOnIh27dohNjbW5h2OGTMGc+bMQdmyZTF79my88847iIuLQ86cOS1hzOGtj81ymI9HjRqFS5cu4fnz51i2bJnl2/Xp0weDBg3C7NmzLXr+6tWrUbBgQTRu3NjBmDa/F676R0x9pCvDKV++fLh//77Nufv37yMiIoJztIkgCIIgCILwPa5duwaGYfDaa6+Jvnf8+PGW39HR0fjkk0+wZs0ai+EUHx+PUaNGWeK2Xo8UHx+Pdu3aoXz58gCAYsWK8aZz7949AOCcTWW+5gqGYbBz505s3boVgwcPtpwPDQ3F4sWLLSNFixYtQkpKClasWGEZffvuu+/QunVrzJw50yJDlixZsHTpUoSEhOD111/H1KlTMWrUKHz22Wfw8/NDu3btbNJfunQpcufOjQsXLqBcuXKW84MGDbKEXbhwIbZs2YIlS5ZY3qFQwsLCEBwcjNTUVOTLl89yvm3bthg0aBA2bdqEDh06AGCdTPTs2VO2PZu40JXhVKtWLcTExNic2759O2rVqqWSRARBEF7M7dsIuXtXbSkIgtAYISHsyI9aaQvB7DxCCmvXrsXcuXNx/fp1JCYmIj093WZa34gRI9CnTx+sXLkSjRs3xvvvv2+ZqjZkyBB8/PHH2LZtGxo3box27dqhQoUKkmXh46+//kJYWBiMRiNMJhO6dOmCyZMnW66XL1/eZl3TxYsXUbFiRZspi7Vr14bJZMLly5cthlPFihVtfADUqlULiYmJuH37NooUKYKrV69i4sSJOHLkCB49emQZ+YuPj7cxnKx184CAAFSrVg0XL16U7fmzZs2Kbt26YenSpejQoQNOnjyJc+fO4Y8//pAtDS5UdUeemJiI2NhYi8vCuLg4xMbGIj4+HgC7Pql79+6W8P3798eNGzcwevRoXLp0CQsWLMC6deswfPhwNcQnCILwXhgGgcWLo8nHHwMqLJgmCEK7GAzsdDk1/oQOJpQsWRIGgwGXLl0S9WyHDh1C165d0bJlS/z11184deoUPv30UxvHGJMnT8b58+fx9ttvY9euXShbtqzF41ufPn1w48YNdOvWDWfPnkW1atVs1uFYYx5B4ZpNZT26woV5a56rV6/i5cuX+Omnn2yMIuvfctK6dWs8efIEixYtwpEjR3DkyBEAUMVxSJ8+fbB9+3bcuXMHy5YtQ8OGDVGkSBFF01TVcDp+/DgqV66MypUrA2At+MqVK2PixIkAgLt371qMKAAoWrQoNm/ejO3bt6NixYr4+uuvsXjxYl5X5IT3cPMmsHYtoCFPqATh3VgXtn//VU8OrWIyAQMGAK9cAhMEoS1y5MiBZs2aYf78+UhKSnK4bt5DyJ6DBw+iSJEi+PTTT1GtWjWULFkSt27dcghXqlQpDB8+HNu2bUPbtm0t628AICoqCv3798f69esxcuRILFq0iDOtokWLIl++fNi5c6fl3PPnz3HkyBGXs6lCQ0NRokQJFC5cWJAL8jJlyuD06dM27+LAgQPw8/ND6dKlLedOnz6Nly9fWo4PHz6MsLAwREVF4fHjx7h8+TLGjx+PRo0aoUyZMnj69ClneocPH7b8Tk9Px4kTJ1CmTBmXcnKRJUsWZGRkOJwvX748qlWrhkWLFmH16tX48MMPJcUvBlWn6tWvX9/pUKp5sZr9PadOnVJQKkKLFC3K/k9NBawGIW1gGODQIeC11wC7NZ8EQbiDG1NevJYNG4CFC9k/vkqJIAhVmT9/PmrXro3q1atj6tSpqFChAtLT07F9+3YsXLiQc+pYyZIlER8fjzVr1uCNN97A5s2bbfYPevnyJUaNGoX27dujaNGiuHPnDo4dO2ZZzzNs2DC0aNECpUqVwtOnT7F7925eg8FgMGDYsGH4/PPPUbJkSRQtWhQTJkxAgQIF3N6I1p6uXbti0qRJ6NGjByZPnoyHDx9i8ODB6Natm80aq7S0NPTu3Rvjx4/HzZs3MWnSJAwaNAh+fn7Inj07cubMiR9//BH58+dHfHw8xo4dy5meeS+qMmXK4JtvvsHTp08lGzbR0dHYunUrLl++jJw5cyIyMtLi0MHsJCI0NBTvvfeepPjFoOqIE0GIZe9e/mubNwO1awMlSnhOHnfYtg344Qe1pSAIQhKPHqktAUEQLihWrBhOnjyJBg0aYOTIkShXrhyaNGmCnTt3YuHChZz3tGnTBsOHD8egQYNQqVIlHDx4EBMmTLBc9/f3x+PHj9G9e3eUKlUKHTp0QIsWLSx7hmZkZGDgwIEoU6YMmjdvjlKlSmHBggW8Mo4ePRqDBw9Gv3798MYbbyAxMRFbtmzh3cNJKiEhIdi6dSuePHmCN954A+3bt0ejRo3w3Xff2YRr1KgRSpYsibp166Jjx45o06aNZe2Un58f1qxZgxMnTqBcuXIYPnw4vvzyS870vvjiC3zxxReoWLEi/vnnH/zxxx/IlSuXJNn79u2L0qVLo1q1asidOzcOHDhguda5c2cEBASgc+fOsr8zLgyMO6vndMjz588RGRmJZ8+eaWYfp5iYGLRs2ZLcszrBPKe5TBng7FnAbtNnAEDfvsDixexvPeRq8zMdPQq88YY8cRqNRmzeHIOyZVuiZMlAwXPBCQ2zbh2wfj2wdKnwVdHukJICZM0KpKcDr+okY2wsAitWVD5tPfHDD0D//uxvPVQ4GoDaO32SkpKCuLg4FC1a1COKqVBMJhOeP3/O6+pbKCkp7DLOXLkAN6IhVOLmzZsoXrw4jh07hipVqvCGc5aPxdgGlEUIXXHxImDlJRQAu5/Er7+q5+HHXe7ckTe+FSvKonTpQHzxhbzxAuxUyY0bgWfP5I+b4KFjR3aB39dfs8dpaYD9XO8LF4C6dYFduxzvP30a+P57YQsEx4wBgoOBnTtth26tDYOMDOD4cdawcgbHfHSfwtnzp6cDv/wC2C0IJwjC85w7B8THA+RAVF8YjUbcu3cP48ePR82aNZ0aTXJChhOhO776yva4b1+gQwdgzZrMc3KsZf/zT2DmTOU7k+WOf8MGdj+J//1P3ngBVq9+7z2gTRtgzx5Wnyc8xL17wO3bQFAQYL8QuG1bYP9+wG6ndABApUrAxx8DP//sOo1Zs9j/jRsDHIuhAQCjR7NDpIMG8cdz4QIQFga8cvTjc5w6BWTPDvBMYcGUKUCXLkCrVp6ViyC8iBcvDLhzJwwpKXLFJ088hGc4cOAA8ufPj2PHjuH777/3WLpkOBG6w97Q+OUXxzBCdERXtGkDjB3LGgjeSno6OxghdGsFs9OgffuABg2ATp2AK1eUk4+wIiMDaNo089hozPwtZOTCDac6fnv2AGYPVLNns/+dLdAbM4ad//LZZ2xX7g8/QDbtRis4c3E8ZAirhfFt9Dh/Pvv/+HH55SIIH+HqVQNSUgJw/bo8c9LNukVGBvDkCQ2aax2zg7nLly9bNhv2BGQ4EbpDyAiNnNsJKO2J2Z0RpwcPgEWLpPWUzZrFLmH5+GOgbFlh93B5PP3vP/FpExIwmdjW3Iy1e10hE/PdyGj+w4cDhQtLu7lcOXYtkNXGjJpn/Xq2d4CPU6eAOXP4r7taiyZEI8vIYOfF0nQ+gnCKq1nDZkwmdpr5y5fOp5vfuMH+nTpFyxcJR8hwIjhhGODkSf3ueymn4aTlirNJE6BfP9b4EcuYMeLv4TKcyAGFhzCZAKtd4PHRR6wGAChuOAFgrfN794SFtc4UZqt++3b30vcU168D7doB9erxh9m82Xkccmw8OX8+Oy+2XDn34yIIAv/+C1y9Cpw/z/7n63C0Nqp4tnoifBgynAhOtmwBqlaVr81OThauc8mBChtYq8KZM+z/33/3THpchpOWDUuvwt65w7p1QLdu7G/rLlchXlJu3WIX8InVCo4dExdej8jhrSUoKPP3+vW2CzCF8vnn7H9ye04QsvDwoe3x7dusl14zXG0Zx765hI9DhhPBya+/sv9v3xZ/b3o6qydY6x/587N/ckzrEqKoz5ypn5lBJhPw/vvAyJHS4/DUqA+XG/i5cz2Tts/D5RXPbDEnJ2ee4zOcrAtOdDS7gG/AANnEs4ErQ9qfsx7O/vdfdh2U9XNoGftnWb6c3xFEu3ZA587ijNTDhx21PIIgOJHaeZeczHqKJQgxkOFEcOLOKML8+ayeYO3N2Kwj7d/Pfc+IEUCdOrbr3d2VbcoU294ikwn44INMx2FCcZXelStAz57SnSScPAn89lvmmnspuDKcEhNtl8dIhWvEyWpDdUJJTCbuD/3WW7ZDrNbT+ew2NgRga+nu2CFOBldT1ITy009AZGSmsfHGG+w6KD5nCp5i3Tqgfn3x9/Xqxcp+7Rp7zFVpmEcFGcb1HGi53jNBeAH37gGPHyufDs2eIIRAhhPBiTsVyLZt7H8xPTnffAP88w/w11/S0+XCehRnxw7W256UtT3OqF+f1QMbNJB2vxBj0V3Cw4GcOVnj7t496dMmuUacCA+xciV3wbTaQR2A7UcaPDjzt/neoUOly+DMk54YevZk/5sNJfMGKn//LU/8UunY0b37z5xhh+u5RgfN69A2bXIvDYKQmUuXtOuKOzmZnb0SF6e2JATBQoYTwYlaPS9CveMIxVpHUWquslnnU9O7nNCpeqVLs1Mm339fWjq0q7rKuLP+xlyorYcNlSro3u4xhO+9tWvHbipnnuvMdc/Ona7jF/P+LlygESpCMsePA2XKAEWLqi0JN3LoBImJrgd533jDgB07NrqfmM6ZPHkyKlWq5HY8BoMBGzdu5L1+8+ZNGAwGxMbGAgD27NkDg8GAhFdTmpcvX45s2bK5LYcSkBpEeDXW+o1UHVFpI1INHdN6QawYvF0f9mrMGTk42PGc3AhZ4yT0Pm9BzLsW8h727GHn+L7+OruRLu0JRUjgzz/Z/56YCqcW//xzD337Dkbr1sXw5ptBePvtKAwf3hpHjwroxPAA69evR9OmTZEzZ04bY8IZkydPhsFggMFgQEBAAKKjozF8+HAkCnEOpAGioqJw9+5dlOPxQNaxY0dcsVr/IJdBJwdkOBGcqDXitHs3Oyqyd6888XHNmBGLr8x7zsjw7sbTZ+DLsCYTcOgQf1gpGf3rr71n6tmFC8LDSjHwzO9XLgOyQQPboWOpvSEEISPp6ezAuCf9vDirum7evInu3avi2LFdGDr0S/zyy1nMnbsF1ao1wKxZAz0npBOSkpLw1ltvYebMmaLue/3113H37l3cvHkTM2fOxI8//oiRPF6m0jTmatjf3x/58uVDANfCaQDBwcHIkyePh6USBhlOBAC24rl2LdPQcMfgcKfTeOFCdh2OlPXZXMhhOGmNffuArVttz4l951yb/zVuDOTKxa1/GY2sEwjyjKwDEhNZj3n2IxALFwJvvmm7mMHdxvSTT4B333U87yxDyrE5nMnE7gslh8cTMwcPOr++cye7mFEqvtIDQ+gKye01w7Dz3+3+7lxOwoO4JFw64XhN6p/fS/bPck5EWRowYAAMBgN++ukoGjZshyJFSqF48dfRtesILFt2mPe+efPGoF27UihZMgTFihXDhAkTYLRakHz69Gk0aNAA4eHhiIiIQNWqVXH8VZ1769YttG7dGtmzZ0doaChef/11xMTE8KbVrVs3TJw4EY0bNxb8XAAQEBCAfPnyoVChQujYsSO6du2KP/74A0DmCM3ixYtRtGhRZM2aFQAQHx+Pd955B2FhYYiIiECHDh1wn2OT7R9++AFRUVEICQlBhw4d8MxKaTh27BiaNGmCXLlyITIyEvXq1cPJkycd4rh79y5atGiB4OBgFCtWDL/99pvlmv1UPXusp+otX74cU6ZMwenTpy2jbMuXL8eHH36IVq1a2dxnNBqRJ08eLFmyRNS7FAO3qUf4HN9/z3om7tkTWLZM3rjFdOSKgWFcb/yqN8OJYZw3ZCZT5r6c1t6K5ZjhtGcP+3/xYuDbb22vffkl8Omn7qdBiESKr9zJk4ElS1if/K5ITGRdX6enAzVrik9LDOZM6mwxhdCMvGgR64UPYJ/B2YazX34JxMSwf9bTFMViVmqqVJF2P8Oww7pCnlHKSvjjx1nvfgThCZKTgbAwh9PRr/7kIgKAQ4lzVeZf8eTJE2zZsgUffzwNwcGO4cPDs/HeGxISjokTl6Ns2QJ49Ogs+vbti/DwcIx+5dCma9euqFy5MhYuXAh/f3/ExsYiMDAQADBw4ECkpaVh3759CA0NxYULFxDG8a7kJjg42GZk6dq1a/j999+xfv16+Pv7w2QyWYymvXv3Ij09HQMHDkTHjh2xx6wAvLpv3bp1+PPPP/H8+XP07t0bAwYMwM8//wwAePHiBXr06IF58+aBYRh8/fXXaNmyJa5evYrw8HBLPBMmTMAXX3yBb7/9FitXrkSnTp1w9uxZlClTRtRzdezYEefOncOWLVuw45Un2MjISJQqVQp169bF3bt3kT9/fgDAX3/9heTkZHR019GPE2jEiQAATJrE/l++nP0vV+fo+vVA1678100m/r2ifvwR4OgIAcDqeXv3unbyJedUvT//ZBfRnjjhfpzWWOtRrt679XVX0+qkGlNmBxB79mQOWnhqg13Cjlu3xN8jdspWrVrsXgD21rJSyDFKZNVz6dJAHD2azcxLl7qfLsDOQ5JSQT59CuTN63rjs6VLgRUrxMe/YIH4ewifR4/LCoW269euXQPDMIiOfk10Gr17j0fFim8iMjIarVu3xieffIJ169ZZrsfHx6NxzZp4rXRplCxZEu+//z4qVqxouVa7dm2UL18exYoVQ6tWrVC3bl3RMojhxIkTWL16NRo2bGg5l5aWhhUrVqBy5cqoUKECdu7cibNnz2L16tWoWrUqatSogRUrVmDv3r04ZrW5eUpKClasWIFKlSqhbt26mDdvHtasWYN7r9zxNmzYEB988AFee+01lClTBj/++COSk5Ox126Nxfvvv48+ffqgVKlS+Oyzz1CtWjXMmzdP9LMFBwcjLCzMMsKWL18+BAcH480330Tp0qWxcuVKS9hly5bh/fffV9RQJcNJ4zx+zHaYKu2xzd4ltlyGU7t2gPVIrH28nToBhQtz3/vRR/wuvt99V5j7VDlHnNq0Yd22tmkjX5yAbcNllvfuXaBZM8flI3xLUuRs/ObMYfXSBg3Y7XVu3mT3miJUQECvqgNHj0pLa9Eiafdx4UltTOgu3WrvdLlypbBFhOZeLGvkdjdKEK+Q3NaHhLAjP3Z/548k4uQ+9o/r+suHibh/PRGm59zX7f+e/5cZ392r7P8b90IEPpt0RWbbtrXo3bs26tTJh7CwMIwfPx7x8fGW6yM6dUKfoUPRuEEDfPHFF7h+/brl2pAhQ/D555+jdu3amDRpEs6cOSNZDmecPXsWYWFhCA4ORvXq1VGrVi18Z7V3X5EiRZA7d27L8cWLFxEVFYWoqCjLubJlyyJbtmy4ePGi5VzhwoVRsGBBy3GtWrVgMplw+fJlAMD9+/fRt29flCxZEpGRkYiIiEBiYqLN+zHfZ39snY4c9OnTB8teTZO6f/8+/v77b3z44YeypmEPGU4ap3NntsO0aVP343ryBEhJ4b4mp+EkRmfi8tprDV8Z27xZWDpKeNVT0mmNOa1hw9j9sOyXj/A9g1QnZnxYrzmvU0d6PISbeHJdjJxpSc18Qu+zDif0HimbkHG9k4sXpT2f0J23uXp7zPteuULKCCVBSMFgYDt2OP5Mwewf17XzN0Nx+0ko7j7PPMeEsH+u4rv3gv2f8MzAuVTSaGQ7VM3FtmTJkjAYDLh585KoRztz5hAmTuyK2rVb4ptv/sLJk6fw6aef2kyDm9yvH86vXYu369fHrl27ULZsWWx4tRt8nz59cOPGDXTr1g1nz56VPNLiitKlSyM2NhYXL17Ey5cv8ccffyBv3ryW66FSOt4E0KNHD8TGxuLbb7/FwYMHERsbi5w5czp1QMEwyjRn3bt3x40bN3Do0CGsWrUKRYsWRR2FlRYynDSGfZu5fTv7//x59+J9/JjdALVQIe7rSo042ePpaQFKeNVT8hnMaT144HgtLY0d/bEPy4fJxC6pcBd3tg4i3MSTi/S80e++Na4MJ6HPz+O1yiVCR424vvnPP2duGOcMq6lEBCEEtYqluQPSZALOnQOuXhV3v5WnakvRPXcOuHyZnRULADly5ECzZs3w22/z8fKl40aOL14kcMZ95sxB5MtXBB9++CnKlq2GkiVL4hZHp0SpIkUwvG9fbNu2DW3btrWMfACsu+3+/ftj/fr1GDlyJBbJOaL/iixZsqBEiRKIjo5GlixZXIYvU6YMbt++jdtWo/QXLlxAQkICypYtazkXHx+P/6ymOR0+fBh+fn4oXbo0AODAgQMYMmQIWrZsiddffx1BQUF4xOE56vBh1vlGRgZw+jSwd+9h0eubrJ81g0OhyZkzJ959910sW7YMy5cvRy8PrPMkw0lDzJkDZM9uO7VNLswOo/hmiti36XpwACWkwrfWQbT6TFxT9bierWZNoFQp13FY89577slGqAyNOMl3j1wjTs7OywGfsTx2LNt70qABMH68cukThAdJTGRn0crhbNOsV1t7jf3uu/nIyMhAjx7VsWvX74iPv4q4uItYs2YuPvywlkMc6elAVFRJ3LsXj23b1uDOneuYO3euZTQJAF6+fIlBs2Zhz4kTuHXnDg4cOIBjx45ZjIJhw4Zh69atiIuLw8mTJ7F7926nBsOTJ08QGxuLC688aV2+fBmxsbGWNUVy0bhxY5QvXx5du3bFyZMncfToUXTv3h316tVDtWrVLOGyZs2KHj164PTp09i/fz+GDBmCDh06IF++fADYkbyVK1fi4sWLOHLkCLp27YpgDsc7v/76K5YuXYrjx69g/vxJOHPmKAYNGiRJ9ujoaMTFxSE2NhaPHj1CqtXU6z59+uCnn37CxYsX0aNHD0nxi4EMJw0xfDhbeXz0kbLpWHtjS0oCqld3bKvd0Qv++ov/mqd1MyX0G4OBXXP21lvAL7/IG7czeU+dci4TF+bNDQmdopcRp9RUVqGfPFk2cWRHyogTw7DT36w8TikO3zBxUhLrbWfPHmDaNO4wWu0dIjSL2gPB8mBwcJz08iX7u1ixYli16iSqVWuAOXNGolOnchg0qAmOHduJsWMXOsR09y5Qr14bdOkyHLNmDULXrpVw6NBBTJgwwRLG398fj589Q/dJk1Cqbl106NABLVq0wJQpUwAAGRkZGDhwIMqUKYPmzZujVKlSWODEecsff/yBypUr4+233wYAdOrUCZUrV8b3338vw7vJxGAwYNOmTciePTvq1q2Lxo0bo1ixYli7dq1NuBIlSqBt27Zo2bIlmjZtigoVKtjIv2TJEjx9+hRVqlRBt27dMGTIEM49l6ZMmYI1a9agXr0KiIlZgc8//8VmZEsM7dq1Q/PmzdGgQQPkzp0bv1gpX40bN0b+/PnRrFkzFChQQFL8YiB35F6E0QgMHcquh3K2NqZGDeDGDfb37NmAlTMVzvBqc+CA9Hut9U6pDQTXuxg5kpXLmWxPn7K6WkSEe2nJEZbQKXoZcVq3jlXo9+zhN574CmC/ftLTFYMrw8k8L9oahgGioxURx4Zly9jN2Vas4DeWAwLUd3BBEBrF3v/C+fNAtWrsIG2uXPkxevR3GD36O+6bARw7xiAgILPfYsiQWRgyhF2XWLUqW30NGzYMADtt7Bdz50X+/ICVIwUAotcz9ezZEz2FrmN8xeTJkzHZSUcV3/XChQtjk5NNy63v+5hnv5fKlSvbeOEDgPbt29scmx1zDBgwAI8fO+6wEB0dbeO8o379+jbH9u8kKCjIZh8oa5KSkvD06VP07t2b97nkhEacNIhY5dk89X3xYnaPy/feY9vgJMcpvQBsM/DEidLltGbkSNarsRLt+ltvSb/X1VQ9hgFWr2bnRfPBdV9CgvN0U1OBHDmAyEjXgwZcU/XE8vQpMGgQ0LOnhOlIhHbRi3Vs73VGTC+F9dx/JZ1D+Llo7rgaZU+9/w8/BNauZV2R81UC/v6u5eF7Fz//DOzf756MBKFh7NdpA6yjCHMnsRDS02mTdz1hMpnw4MEDfPbZZ8iWLRvayO3ymAcacdI5/fqxBtOaNcC//2aeb96cdWe9ZQu7ZkqsB0ix+sLs2ez/jRudh5NzWoDYNU5c/P575j5TcupI1u7jU1Md993k88znjgzz5wPUF+JleHKqnhgNwxMYjWwhD+BopqRUJFzxSEWp+U1Pn/JXAlLlj40FPviA/c0X9+LF7MbEjRpJS4MgNIizDlFC/8THx6No0aIoVKgQli9fjgA563gnkOGkQcQoz4sXs/8nTWL3TLJm61Z2kWTlytz3cm0XYmbzZuEyWOPKcVS3bqw3HKvpworiyh35kSPi43SlM61eDQwY4DwM316cNFWPsEEvH9l+LyV3DYv0dKBAAbbH4eZN56NFcjiH4HvPfPslKPld+NY4SVUK7OfImGEY9t0dPw707Zt5jtAtd+6ws8bEFD/vWONE+CL20/08BXVPezn37/NfmzqV/xrffk/uYjTKNz1QaIXvrrceseWya1dbrz5c91tPKxDqVU8qpAvpGE+OOLnDZ59l/jYaud1iC8nU5jC3b7NzZm7fzlzl7eoeV/AZTn/+ya5T4MI8HO1JnK1xkouHD9kRpvHjbfc4IHTLokVAVBQwZIjakhCEd0OGk5dgMHAryN7cmyT02Zytu5JiVMjxTq11oxkzpMkjVA459nIiVEKPVu+XX7ofx7Vrzq9LKYR8hlObNs57mDyJweDccIqPlyedr79mvQVOm6Yf45xwyujR7P/v+P0fcCKmKKnRu+/NOoyvoJVmTK78S4aTjrh4ke0kFDPNy9V6aMI19u/1yRP3507z6Spc37BECVvjyllYMWkROkArLY4YnO1H4ArzQs2hQ+WRxfr9yVkZvnghX1zWMIxz5xDO5leb73eGeSjcOg2hFURysvbWwXkJaWlA48bOZ4GoSWBgIAAgOTnZZVhXA8RmpFRt1AlIuENaWhoA1p28O9AaJx3Ruzc7q6J3b9YJkz1kOHHjrMfKekqdGPiWDQiFr9Hg0mGuXwf+9z/paZHhpGP0+PH4Mvfhw8DJk87vffGCXajharGkNVI88bnLrFnyxWUPn3YoR2WeLRs7d9k6LqEabIkSrAvXU6eASpXcl8XHOHGC3SZk5kygSxfba7/+Cuzcyf7JNZVdTvz9/ZEtWzY8ePAAABASEgKDgPLkbMq/ycRef6XL8oa3vq4WKSlOqo/0dOXWNngJ1ksT1HpVJpMJDx8+REhIiNtOJMhw0iB87ZizDMdXqOXQFW7fBlq2BAYPdr3lit46yM3ONTwN33tS4v1RL52O0VuBckXVqq7DCNm4TS4jSGvvl2/ONSBMViHv5fx5aXsgmPe92LSJDCcJtG/P9gl07Qp06mRru+pB786XLx8AWIwnPqzdeXN1MJqvBwWx2TAlJfMcV3jr62oRF8dRtMxCGY3saCzBS2Iiuykx4H6nszv4+fmhcOHCgox+Z5DhpCNctZtKrXHq0wc4dw746CPP7VUphObNxYWXqiMpoVu5mqon5LvRVD0fQI8fz90C4+z+778HSpWyPedOJce32Z2a8BlPc+e6vldII7BsGfDHH87vIWTHeuRk0SK2PTWj5joeIWnfvg0ULmxA1ar5cehQHmRkcGya9IoWLTJ/X7rE/k9KArp3Bxo0yFyDVa0asGoVOxDdv79teGuOHs28rhbnznH4ZjE/6IAB5JHDBRs2AOPGsb+5vrGnyJIlC/xkGLknw0mDSO1wVMpw2rYt8/fBg8Cbb7K/d+4Ehg0DfvzR9rrWEONEIijI8bwceuCjR0CuXK7j9KSRRugAPXRFKwFXoT1wADDvZG+tnbkD34ZqeuX4cddhrCtsgAwnFVizxtZwkgMljS/zvqInTgA7d/qjeXP+NSK3bmX+zpoVOH06c4AyJibzWlQUe91kyrwna1bH+BjGNk41yJqVw3AyC5WYyC04YSE11fk31hu0AkZHWLdve/faXuPrpJR7jVPt2pm/Gzdme2Ks90xcuFDe9DxJbKwy8Y4cCeTOzW5GDLDOJfiMGTFGjv3WOXy88YbwOAmNYe3mWy9I2RzNGWaNUKjbbGvHDa6MAi0YTg8fyhcX375TzqCeFZ9GiMFl3TYKdf5gpnt359f1YLfrQUYt423vjwwnHWE91F+/vrB7pPRC7dsnLrzYitTX+OEH9v/w4az335w5+ddWialgWrUSFu7qVeFxEhpj5061JfA89oWAa26HdcVm/XvTJiAigt2fKDUVqFPHeVrmdTtqkidP5u+//3bfkElKYnc/F7qq3tu0Gh3gS69cjJ8XM0Yj6wBr9WoduCP3pY/5CpOJNYhnz1ZbEnUgw0lHuJobKtdUvXr1xN+jZYS+AzmmzzkL++QJq88B/KNF77zDdqwLkdlrOop9sOERjOa1BoWwfm6zQwm+fGIddtAg9v+0acD69a7nDjdpIl1GJRDba8XF+++zC0DHjGFXZLsaqfOaikRfWNu13lzM+Z7N2XreZcvYPzX2nxaNFtz+eZht24CVK9nZNELwtiaeDCcNInVTVnJH7hxPOIdwpoMI+RbHj7Mel/77T3iauiYpCShZEujbV21JCK3gTitrfa8QhcbZ7th65e+/2f/ff88urBw+3Hl4Mpw8ztmz7HraYcPUlkQ+xPpZcVbMXTju0xazZvnctA6x29iR4UToCi33ZJn3utQKcrwrZ9MShBqx168DFy64L4suWLuWfWC1/MJrHer54IZrqt7Vq9qrVPSAt2k1OuDJE/b/t9+qK4dcjB0LhIUB27c7XnPVruoh+7mU0dmctWvXWGc2+/fLKhOhHtQqexFKedVTgvR0oFAhtaWwRY4K/Msv+a8J/RZS5oTrFj20mmpChhPLtGncWpk1771ne6zkJrV6QKhHRiqDHsFZ/S+mnX76FBg4kHXjLQdy6AgzZ7L/R4wQHr/PZLv27VnPUHXrqi2JanB9az0PdFOr7CXwTdXTauXkyf3i/vrLvfvFvEN7T7/WCG2gaMNawuexLyzjxwMrVji/x35nRfthW632IqmNVhsJDfL4MesRX4rjSGev+dAh4fF88gmwYAFQqxZ/mIEDhccnFmfPce6c7fHZs+yfO2novtgKdX/rQ3z6KZAjh7qb4boDGU4a5vRpceH11P55UlZX+pacODN6hA4e+NSIk+5bRYXxxfcjtnKQ+o60uPmtGui569dD7NvHGjdDh7JLx2rWlDd+Zx1u9giZxr1ggfD4lKxi2rVTN31Cm0yfDjx7BkyerLYk0qANcDWIWW/o00ddOZzhruHjybbanJYnnEM4c+oQHy8sDp8ynAjnkFbhGvM7ErtDuBb2cNICZDg55dmzTE+z5curKwvAXyVosap4+pT/mrk4upop06uXvDJJQU+d0lrE294fjThpGDGZjW+qnlLQ3k3K4VNT9cS09levAj17uvbL701ozV22JxBSkcmhJXpbay4Veg9OMTtyANzr1JLLsNGigcSHM5tcaLa7fl0eWRRFjo/y4AFw7Jj78WgQb6tiyHDSIJ4YGXEXdzspPSmr3gotdQDz0KQJ8NNPwnd/NnPggGfna8pJ9epqS6AOrhQRMpzkgyocXSGX4ZSUxK7XErvHtpj09aDLaIZ8+dj6XsyCN0IVaKqehpGj8lCqAvIlw8knK3FPIaYVvnWL/X//vrg03nqL/V+mDPDGG+LuVRtfVGqpwHkWet8eQa7XLJfhNG0au15LLHLta0jZzg7zC9mxw9bzB8Poa5iRA2/71jTi5CV4cqrekyfO3W4LgUbHCACefbm6mPNhhy8aToA4RUFoWPtwVLBZPOWMwwvQwqMLleHuXaBcOWDuXO7rN244nhNS3Rw8aHt8/jx/WLFZq29fYMAAcfdoHncyTefOQJUqgNHocOnyZXYd/LVrbsjmIbytqiXDyUfZvFn6vd27A59/7l76ntQHnS1CFXM/oXM0+CFNJlYR4fVT4IuG07x5rr+VtTKSmMiufxOLBvODKojNY3JNheBQBr0ZTxtdEyeyRs3QocLv6djRdZivvwbOnMk8LleOP6zYEafFi4GFC1mHHFrCrSzvzs1r1gCxsaxbRzvq1weWLNHmMthLl4CGDYE9e9SWRBnIcPIixJTPVq2kp+OO0WVGTyNOhMwsXcquOwI8q01oUFFesgSoXTvTa5cDvph5jx0DHj4UHn7ZMqBUKdceazT4/TWBCu/Fv21bIFs2W88LOiQ9HThxwrMOfYRWma72P+b67L/9JixuoT4MhGSt5csdz+mu2pOzHRNYHu/dY//fvClf0kJxJWK7dsDu3UCDBsLC6w0ynDSIlBESve3OrcaIk1CePAE++kgZWXye/fuB3r0z1x35OGal4eRJngBaLcBK4wm3nb76bu1RQUv127yZ3QX99989nracDB0KVKvGbkorF99+C4wZw39dC9MFhe5J6GrEKT0dWLWK+5rXIPKDLV7MNpN6xn5bFq/6niDDyavgypzO9hVSEy07hxgxwnZDQm8r9KpiPyHbVaNy6ZJ+txeXA911vcqEmKl6hHuoWcHpvHI1bzQ7Z470OGbNsj0eNow9d/Ysd3ipS/rscefVC5XBWRoMw3/dV6s9ALh124C6de1Oaqy+05g4HocMJw0idcSJK3y1avLIJDdaHHEyh7t8WTlZCBE8e8Z6witWTJ74lFLSlNyx2Fc1CLGb2QrB11t7Psg5hFOUtu34RpdevOA+r6fX7y39H56074vgFlahKzsH1Av591+1JXAfMpy8CD113nlSH3Q3rREj5JGD4MBZy3nnjufkkMqGDUCWLMDPP0u63aXi4KuGkyv+/tv9OPRUYSqJB51DGFasQNPevSXfrzbOyqur/hO51kHxySDXcjEhywuFGjxSq6/vvpN2n2pwvZAOHYCaNUV/+D5Ygq5Yrd1ebwlYVxmFCqknh1yQ4USogpKd9PaYK2+xI0+Ewjx+7N79ly8DH3wAXLwoLLwSH7ZtWzbeDz6QP27AdzOjK81MDo9svvpu7RHjiMNNAvr0QbB1udfBNxBqJJj93fBhXsyvFfhe/Ycfur5XDsPJ2VQ93cH1IL/+Chw5AiQkuB+/h4bmEhKARYuA+Hi2Sdu0SVo8fKOl9qxYAaxeLS0NNSHDyUvw5D5OcsDrRUwB9PRedE9amvCGomdP9xqEhg3ZkR6HCeE86DEj0IiTfIwapbYE2uSbb9SWQBeUwQUEmTzgtMQFXFVmcrL4ePiqw8OHpcnABVVf+qJrV6BfP6BIEbZpffdd8XF8+qntINvTp86b3q5dxaehNmQ4aRhXep79Xgd60gs9OQtL7IgT4QYlSgDZswOPHrkOu22b7Ud5/px1T3X8uGNYZ55PhKTFF4fW8VXNQ4om6Ipr14BmzYCmTb2su1vH3L6ttgSCaI6/cQGv45cb1T2WJl/25DJaJM4UFhy/lDCAa+cQesGlrHpZrOWCmBj345g+3fa4QQN9fWshkOGkQYQ6h5gxQ3lZvAFvK7SaxqwISfGnOnYsu7viG2/IK5OeocwrL9u2Adu3s9PT6N1Kw4mS+OgRu6GzYKZNc18ehWEYoDtWAABKpZ6TFAfvBtcyIefUdzkNJ1dQEZSJTZvYHY81+EJPn1ZbAvkhw0kDXLqUXVLHm/ViUL1N1fMk5vfiqrJ3puu7427WJ+F72fbnrY/t/e9aD6nar3yWYphRASHM0IiTIhQuzG7ovH27iJs8sWeXB3DWvsTHuxc3w7BVXkKC8kaLpwwnKn4y8u67wGefyTNkpADe9q3JcFKZU6eAsWPronjxQMs5oZksSxbbY2/LnHIhdKqes00Hhw+XTx6fxtlHsG+Nx47N/J0rl+01oeuaCIILMpykk5ICXLlic4phgL/+yrSBtmwREV9YGJCU5LZYjx6xzhbl8l5nRg4j4dNPxd9jnT1Xr2arvCpVbOUZPlx6NnYn+8fGyufFj5ARgRt3jhjBOu1LSZEn2adPWaOeb2a5t1W1ZDipzNGj0j8BGU7C4HovGRnAsWPCwxMikbJLo/XvAQP4d4CUCn1YwozJ5Lvrx9xlxgygdGmbYaUNG4DWrSXGZzIBJ0+6LValSkDLlsAPP7gdlexIcSBapw47sxQA1q1j/8fF2VaTc+aINFJl4ssv2dFFd9BTdaz6Gie5dj1+xTffsNtEmfOVO6SmAjlysEubV61yPz49QIaThnFVWK0NJy9Zm6gIXCNOkyYB1T23zterCcdzYOZMtlV3hdCMunChT2Rql4+oJ+1CTzCMyPlkhAMrVlh+auFVmjfW3LBBXTnkpFkz9n9AQOY5+zrjjz+475VaddjH/+OP3OHcHSSkQV8FENlmyjE6e/9+5u9z0pYA6g4ynFQkMdG9vR2CgjJ/nzhhm4GJTLgqZ2eONXxAX5eVbzGUnVZXqVLmSaET5flGnJSAWmnCDMOwo5qELLjtnEDDZVMO0dyt2vz9+eP6/nvue+zlPn0a2LyZ/zpf/B99JExGsVy7xi7LIfSNtQHNl881XLwlEeA6CKEUU6YAX33l73BeaCYLsPt6f/0lg1BeCNf7dLUxHyGc+tjD/nj+PPMkVw165oztHk+eftEa/7DJyUBIiNpS+Agazwt6Q479iLWMAe7lFzkNJ6HYZ3Fzv1ZsLFCxIv99nuo4TE7WhVNFbSDzVD056dtX1eRVgUacVMTPzbfvK5nUXcQsZfASB08eJQMcrbp95jx4kG2t7b1sOBtx0rJye/EiuxJdRj75RNboCGfQ+iZuSpYELl0SFtaqvNqPOIlumzTcmMmhs7pblTmbqicW8+fVcvWqNVR/V0IFMBhgNALz5rFNlCc4cMB1GNXfn8yQ4aQirnqRvC2zqYWY9+jMsx7BjclVNWIyAX/+6XjelQ99uZUpOQtU2bLsSnQZsZ5GQygMVa7cXLsGfPih6NvsDSfRr9eD38NkYvtv1qwRfg8D6XXRBx8A+/ZJvh3DhslrOBEKsH070LFj5mbszvJzSgpw+bJiosydCwwZwjZRSuGOK3tvgAwnFaERJ89gMgHnz7PeX1yxcaPi4ngdTg2nxEQgOhr44gvXEfngGieXS7w0KLNXQO+VHyEVpZmjR4EPP0RYohuLdQHWswOfm1Nr4uJYbwhW3+/oUXFK4saNrDe6zp2FhWcY8VP1tm1jl9C9fAn8/LOoWx349lvgzp3MY6HVpNRBVev4qZgI5PJl1kXdqFGuw9asCbz2GrB1q/D4RQx7Hj4sPFq5keqoRIbdCDwKrXFSEWcjTrdu0WwSubhxAyhXTlhYaijEwzlV7/Fjdr6A0Qinuzt7cqqeFJ/AhHdCBV0eatQAAHyU9wGWgGOR7ZdfshXwggXOlb8uXdj/Z886r6yLFWP///UX8PbbAFjPc2KWTnrCiZLZG16+fPLEp8QaMl8fNXALu33MLJjbOmeZ8PRp9v+KFZkZRWfw5ZELF7jPuyqTiYlAaKh7MnkSGnFSEb4RpwsX2E56vkxIKId1zx4hDM4Rp5492fkCI0d6XB5exo4F4uMdz8fGsiunZV6zFB/PznwiNAj1SvEjQXMu8IJn6tHo0azbNyGjSQBw5IiwcAcPWn5a+6RRG5PJVhe+dUs9WayV1VOnHM+T4SQch3fVowd3QDH1ihKdNxI+nlmMgQPF3+OrkOGkIlI85VhDFZwy+HqlIBaXa5yc4WzESUwGN5mA+fNtNQQuNm8Gnj1j5/eYP3SbNmwvoIxrlhgGKFKEXWsvVLGj8uxBqJArikNenjpV2I19+rDuZuVM2w6xn95gEL7G6fjxzE1r5WTv3szfUrznVqnieE5IfUPFhAe+St2VVSoVhbzqLV4M5M/POrxdsECCXALxtnxEhpOKuGs4Ecpw5ozaEugLzql6nmblSmDQIEcN4cULx7Dly7NTjMwOK54+lV0c645H88acrrh5E9iyJfN43jxg00Yva3G0Ao04yYrBlWYkxvPJ5Mmi0haslN2/L3kxhZA1TgaDPBuKKo1Zt+YrAtSBIwC+TKeU4SQUkR/v4EG2WPANoMmUDBlOhHy46xyCUAYxa6MJGUec7GtXMbVtbCz3+ZUrHc+Z56H//jvw5Ak7wVpBnOno9g1QixaZv4cMAa5dV0Ymn8fbWnKNobnXe/cuu+AoXz5FlT4ttel89Y75eR488JwsPoOYzOKhqXopKbbLe63XA5px1Y+0Zg07BVXqMuHRo6Xdp1U0VMx9DxpxIrwB2Uaczp2zPRaj4bjqAeTCYGB9BVvRoYP8CoUzEfiu0YCIwhw9qrYE+kfNoQmxSuc//7D/ExMVNer0MFrz+edA7dr8RUAPz+BpBOcZtUecOChUCMiVC3j4kD2WsNsAOndmp6BOnMgei3Hl742Q4aQi5I6c8AbcGnGyxt5iUSKDP3liG7+dQ4hff2VHeuREyvSddevklYGwQ+zcFF/CutzpYe6ZhtBDm3z+vI1vDQfIHbmHEPFyTSawm6X9/be4qeXbtqHYY9ZCNn9zPpfhQnjyhJ16vn699Di8ATKcVIRGnAhvQDbDyR4lWu3x410GceY9XSjWoouZqmdm7Vr3ZSAIt/jqKyAiAjh5UvStehmMeu0150YE4PgsAVDAN7gCkNGjIB5e47RjB9jNx1q2BN58kz+gVWYtiDtAs2Y4iho2QdzpC0lOZkew5EYPHQ7WkOGkIlqaD00QUpE8VU9O91cy7sQut8IhNr6UlMyNmMVuvEkQ7pKeDuzbB3Yzz+Rk4OOP1RbJEZkWKl2+DDRqJC6qg+BXXKUogI2wA5/jU/gjXfzNTpBajxkMrKMacpIkAYXmWF+5agB++YU9uHSJP6BVBoyCDD2AdtBWiCy0Aa6K0IgT4Q0IddXreCPjXNMQo4VYu6MTioLdXEJHnLgwz0UnCDU4e86AevWQabIHBqopjkvsDQSxxTolRVz8b+A4ZziDQVqVsgNNAABxKIol6CM+ArC+L+SkaFH2vxyj796AImuc1q4F/vc/oEIFyXI5oPDQDXX2s6j+GubPn4/o6GhkzZoVNWrUwFEXi3bnzJmD0qVLIzg4GFFRURg+fDhSXNV8GkXZTMggHBraGZDwWiQbTmrjoY1MxBpOepu2QHgXRvuBjwCe/lWbjGpbTjiLzQ8/uCOWLVI2YzKjoOcVd8puUcRJvvebbxzPuTPiZEbGgXz9M3068NNP7G8pzogAIC3N9rhiReHpa6BhIKdFLKoaTmvXrsWIESMwadIknDx5EhUrVkSzZs3wgMet1erVqzF27FhMmjQJFy9exJIlS7B27Vr873//87Dk8qDkBrjL0RPPEYnqELgTO0F4GlddtEpP0nfVEG3eDOTNC2zdKjpqd0acNNA+EkQmco049e8vTzwu4Kw2rE7W+NP1OkcppKcD9eopErVL5NxRgeofR/wungc+/RTo2dN5QFcjTkFBktLXSufkgQNqS6ANVDWcZs+ejb59+6JXr14oW7Ysvv/+e4SEhGDp0qWc4Q8ePIjatWujS5cuiI6ORtOmTdG5c2eXo1RaRYzhVAQ38SdaoT52CwrfAysAAGPxhRTRCMIzaLkLq1Urdt5c8+ZuRSPWOQQpLoSmkGA4GQxg/RdrkKrbZjicq1wZuHiRO7zQ8rhzp3sGjNzrGV++lDU6n8bwLMH2hLMRp6Qk9RaIUePhEVRb45SWloYTJ05g3LhxlnN+fn5o3LgxDh06xHnPm2++iVWrVuHo0aOoXr06bty4gZiYGHTr1o03ndTUVKRa7Wj6/Dk7fc1oNMJoVNc7DsMYIPQTrEB31MV+tMJmSwWbkZEByLWHDkFIJMAfgARPPQyAjNRU3hLAvHwpqZ/Nulz7ZfC7rjAxDGfPkclkgtGYAWt10XjxIlC8uKVh4lIlrdNlZ2QEvvqdDqORu6FlGH/Y91+x8bD3knMIQm1Mfn7IMBod8rzJxO9P02TKYHfMlAhf22yWISMjAyZLGAfJYDTaVkiG9HSnLW1sLNC5M4NjxxwdNNjPrrKVMTPt1FTH9phhTFCrf/rpU2lpGwwM8KrmTU9PhzMdxf4deCvGjMzer/SlS+HPcI8BmUwmGKpVg8GZAwf7uF/lY2dv0WTKgPXXNMbFASjqEC49IwMmE/d3T083t0OOKTFM5je3JylJ+W+cnm6Eyuq4KHtANcPp0aNHyMjIQN68eW3O582bF5d4Ml2XLl3w6NEjvPXWW2AYBunp6ejfv7/TqXozZszAlClTHM5v27YNISEh7j2Em5w9WwhAVUFhuTykXLlyCcDr8gpFECKJiEwBnrgOZ4/JZMLp06d5S4DBqsNDDDExMZbfRc+fB9/S2/jbtxHNcT4hIQExMfvxjtW5wLJlcaZfP8S1bAkANtfMbN4cg7i4SERFvXh1pjUA4ODBw3j2jNsd0ePHtQHksjm3a9dOAO6NchGEVOxVwnuPH+NYTIxDnr9z5w4Kv/qdYefj+MYN6et1ANsybI1ZhuvXr+OiJYytZA8fPkZMjK2P8QKnTuENF2neu5eMmJgduHgxB+bOrYw+fc6iatUHuHcvBJG8Mmamff36dQClbMLEx98GUMRFyspw+LA0gy0pKQlAGADgyJGjgBMvgvbvwFs5cuyopUYO6N0biQUKvHpDtjxLSED2a9dExW3O687e4t27d/Hs2TNkf3UcWKwYCuAO/kNBm3CnYmNx7159AAVRC7YDECdOnEBAwD3OlF68eA5w5nJg7NhzACoLeRTJ7NixA5GRPD0UHiI5OVlwWF151duzZw+mT5+OBQsWoEaNGrh27RqGDh2Kzz77DBMmTOC8Z9y4cRgxYoTl+Pnz54iKikLTpk0RERHhKdE5SUhwd1i1jCxyEIQ7ZM0qbd62n58fKtavz72y2Q1avjJuAMDv1i3ecFFFuBWabNmy2cRhpvzGjSjz3Xe88f33XyuMGOGPRo1M2LAhU5GsXr0m6tXjHjmaPdtxPKxxY5H+kQlCQfIVLMhZHgo9z3Q+FOBvq6TXyOaeEsSVnjXFS5RAUZ4wuXLldLjfkJTkMs2QkBC0bNkSHTsGIDXVgM8+q4W0NCNu3ABOYYlLGYsXL+4QJioqymW6WiM0NNTyu3r16k7DuvpO3kKNGjVtjkN51ipFStApW126BFPnzk7D5M+fH5EptoZNTRzGerSzOVe5ShXku5EfADAbI22uVa1aFS1bcrdD4eH8chctKqPXPx4aN26M3LkVT8Ypz58Ld6ammuGUK1cu+Pv74/79+zbn79+/j3z58nHeM2HCBHTr1g19+rAuO8uXL4+kpCT069cPn376Kfw43NQFBQUhiCOTBwYGIlBlN6sS1wlaWLHCdY8STfUhlKbUf/sk3WfIyEBAurx7lwCwLddOFhL687i1NBj8EBjoeM3AME7rjAUL2LR27rS9388vgHeZCNeU9N69vX/qC6Ef/Pz84MeRgf1OnOC9p27sfLfSdNU2+/v5wZ8nDGf55fMMaHsnAgMDYT3QzeoJwmT056hruHQSrePnl1kpBbh4b2rrUJ4iIEsWm2NDHPeIqpSv7T92LPxXrHAaxuAXAD8B65cCAgJ481xAgLN2iD9urnwtN87KmacQk5dVK9VZsmRB1apVsXPnTss5k8mEnTt3olatWpz3JCcnO2QK80dldLhNtg7rVIKQD6MRaNfOdTilkHkhrXUV5MqrXloaMHkycITD6eX27Zm/qeOD0ATz3TOEPInBAODsWaBKFdYzpkD4VAh3qgk9rtW/ciXzN89EHp9DsFc7qXrohQuuw+gxM3kpqk7VGzFiBHr06IFq1aqhevXqmDNnDpKSktCrVy8AQPfu3VGwYEHMmMF6wWndujVmz56NypUrW6bqTZgwAa1bt/aIVSw3nhBZK24sCUIVDh50HcYOuftguOL79luAY+klQWiTQYPUlkAwDAOgfXvWAmjVij3hRqEWequEqkbzHD7s/LrP1GFCe7k92IHP2alGxpVHUNVw6tixIx4+fIiJEyfi3r17qFSpErZs2WJxGBEfH28zwjR+/HgYDAaMHz8e//77L3Lnzo3WrVtj2rRpaj2CW9CIE6FXQkIAEWsp1WP1av5rYhsZF40i3+UMDo+DQjoYCUINHDrbdDibA0+fejzJvXsdz4l5dVoZXe6M1YjEM3yPj12GnTxZeXk0gcqGEwMXex6aIcPJI6juHGLQoEEYxNObtWfPHpvjgIAATJo0CZMmTfKAZMojZsRJ6siRVipjwrvo1o3dG3bqVLUl0Q7WU/Lc2QCXIPRME2xD7dMLVEvfYICjAkkKpWBWoysAYDPexm2L30QfR2j+UbKTQUjcOs3nehObxjxURIezCwnCe/BQba3HDnvCd3F3evc2SN+/STD//cdZsPyQAcbESCrbSpRTMWJobVp9JJ6pLYJmYAzam6onZ37Rm+GiNmQ4qQhN1SMIFVHROYRQaMSY8DSS8pzcCuO9e87j/OknYOxYm1NZkIoXCMcXsc0cy7YA+ZRwDiEGKuvaYyC+wzFUg+GpwI0KlTSc7DKi2DVOUkTzRzrbEUHYQKq7ipBzCELP6L6XiucBpLZ9fPfRVD2CEEn+/A6GkQOzZgEADDChKbZiBbojBC9R7cl2WSsnhpFu1NBos775DoNRDScQ/M10YTeo/cEl5vvTpx3PZUEqbqEI2s+v755MXggZTiriiREn6sUilEDt9kGLKDHiRBCeRjOdba8MI1f0xHJsRXN0xLrMkx7t1aHK0OtJTREULCPDcyNOSt9eE4dREP+h0HVp+zR6M2Q4qQitcSIIBbh6VVg4BafqWUOGE0EoxzvY5HhSZucQfMbkYMzFA+RBWZznvC4mWa11cmpNHnUR9iGfJSjoVU8IMk/VI7ghw0lFaI0TQShAqVLCwvE0MvfvA598wnFBRMtjP+J09y77RxC6Q48al4xrnAB+I2IuhiI3HuFH9OO8npTkMllCD2jBq54M7sgpP8oDqe4qovs1IgShZ3gK4K1bwNdfi4+Or81MSQEKFGD/0tLExUm9voSnkTJVT818yimvi8bVH+kKSWPLmjXCw2pmiuQrtCaPqghU1rRcXw8ZAoSFqS0FN3rThclwUhG9ZRaCsIbyry18a5weP878/eKF5+QhCE+RL+WW2iLY4qJyGoY5npFDBFpWun0eoYaTghvgJiTwX80UgF/O27fFpUn5kR8ynFSEFE+CUBGxBfDxY6BSJSAuTtRtfAYVQRDuI2VkpCPWupVGNMTVAYS+YQRW3EoZGwYwuHnppes0JSiVbfE7yuCCVNF8kgC1BSCEQcPmBKEBTp8GPv6Y8xKfJz0ylghCBp5w7KVz4AB3WAkLiMWscdqC5gAui06D8HaUqeyD0l6gEjh8hkO6sZYLD1ERp/E72nPGQzonPzTipCI04kRona5dgSZNHM97hTEgtQAmJnKeZjIyraXffss8TxvgErpGRGHPDoEbhUrhvfccz731FndYhRvX0rhic+yNSibVPeJR6p0V/Y+ng8A+TYH5vjNW4yHyYAc4GnfCJWQ4qQhXHi+Cm1iEPrINnVLlR7hD8eLAH3+oLYV4BHkPWrlS1jQ/evGl5Xfc7puYjEnIhYfkjpzQFe4YAZF4JqMkduwTsZ+MBMNJbGfQt9+KTkIHkL7gDkqtcZK7I+BrjJQ1Pqm0x694C/vVFkM0ZDipCFdZ+ANt0AdLcAQ1PC8QQXgJUVECAll7bZCB3s/nWH6P2vgmJmEqVqIbTdsjdEVtHERLbFZbDAvffQcsX85/XYpXvex4CgDIg/sYixnIi3uc4Vq0AB484I5j2DCr5LzQ4PDGUbTXcBHZXn17UQituBV6ZYyBX1V3d42ToHgVoAwu4Fd0wH7U1d3sKzKcVIQrs1TAWQBAOLinA4nFGys/X6dTJ8+lZTDw18VaruyeSmgb3cW6mcmWzG7aVA97acSJ0B2b0UptEQAA//4LDB4M9Ool8kYXlVMJXMf/MA1/oA1m4H/YhHc4w23ZAowaJTJtQpOUw1lcRFk8QB7F0siSnKBIvCYOw8ls2ChlOClNEWjME6cIyHBSER3lcUJDeHLjZIbRZz59A0eB0aOViZyGjQiCE7l7qZ8/lyESnim50zAeNXAUACz/uXj40HUScnRQeuOolZZojB0AgEAF9/AKSlRmjZ+z/GVvONW6vQ43UNRpfELyGnW680Ne9bwcqowJX+QoagBfug6nNO6WPyq/hOpovKOgLTY4nrx2LfN39+7AL7+4jIfvMTX++IpBdU8mgYfVXYfDiBjjGHGko4KSEACNOKmKHnvyCd/C2VQ9QjxilbBBmK+MIAShAHL3UhsY9ee5+qrhRGRiyMhQWQDHcmVepyfFwKXRJPcgw0lFSCElpODphpwrn/q0MiHi4RkYbN6fs1tr4hB2oiEq8OzXQRBaR+5RipK1cuJ1nJM1ToIfGmXSJq0PjnM49yM+AqBf5xB6hqbqqYgnDCfqWSAI7XH3ruO5Q3gTALAdTZAXPK68CMKH8H+egHMoj4K4415EbjS2QvpJvLGd9cZn8noE5nMyityDRpxURExdTpUYYcbTI5V69KqnFfgaqK1b+e/JAwGr0QnZYDzpbYWQxHm87l4EAqwfd9Y4kSJKeBue1Dn1pktQi6ETpFbMVKF7H1qYquetZEEqohDvPJBPz1P0Qshwco7IqalKkE3JjXVd4KniTm014SmoI949qMVQEV9SSAlCD5xCZcSjCKriuCzxiVnjRBB6R8/Kv9gRp/z4TzlhNICev6XPwpNZXXYGEqIgw0lFaKoeoQd8ycAvi4sAgM5w7b5YCFzKx717skRNyIUvZXAf5dgx9+7nKsf/w3TLbznaZ2rjCSkIMXAv4TXR95DhzA8ZTipC7TWhByif2pLB5yFZYDvzxx+yiUIQhADWr5c/Tm9ULL3xmXwKnsY6BC89LIh3Q4aTipBCSmgdyqOO+B89rLYIBEEojB/YvXsYxjOjQVozWnx1BOzzz4GVK9WWQn189fsLgQwnFSGllNAa+XAXWQX2TlH+tUWI2sMwtM5Jc9AHIWCbDb7AGDxHBIrhOm/20JqhQ8hD/fpAzpxqS6EMhXAbB1EL+XBfbVF0DRlOOmQA5ru/rwWhW5TS84riBu6iAG6gmGoyaAU5lSJvf1fEK/z91ZZAGbwgA/sZhD3D9u3s/zGYhVAkYyKmgmF800jyxWf2dr7DINSC9mZM6K0TlgwnHTIfg3AHUeiNxWqLQqiAUpVMC/wNAMgPR+8FYWHKpOkLeIHeSRC6ZlpcF0HhmjZVWBACATCiFf5ENjxVWxSvQIyBmw0JisTra5DhpBO45pvOxBgVJCF8kW3b1JZA+wg1kMiQ0hhy9UTordtUZnLhIUbia7XFkB2+NU60BkQaY/EF/kQb7ENdtUXhhOpnwhVkOBGEzlCjYvdxndAtrL8XNcpejI9/3GN4AwOxQG0xJONsHyeu3nf7c59hPPaiLoKRrIR4HsETowxdsBoAUB7nFE9LKt7a3on5vnwdAwEwyiWObiHDSSfQsCmhNby1cZEbe4XMx/VrYtgwtSVQhGjcUlsERRBSXhkYMB7TUBf78TEWSkpHa228lBG1SCRgHKajKG4oIBFhTxak2hwnJSmTTgPsQj3sQUesgRFZ0BWrlElIJ5DhpCKkQBF6ZwKmqi2C5iHDibDBT2fN7qZNakugOmKMiHwca0T5MLs81yJSDLmF+BjT8SmOo5oCEhH2/I52Nsc//eQ8vNTppbvQCHvQAGvQGQCwCt0kxeMt6KwG9y1q4x/Lb6kZXmu9WIT7eFLxdjWqNBWTPCOIh/FXSKEho8mLoY/rlcjpVa8zVuMH9IM/0rEanXEbUbLEqxXqYw8AIAc5fvAIrbDZJm/el9HLuCd1R73NXglQWwCCn4L41+l1Mor0RxBSkIqsaoshGGfz/r2ZoZgrW1w04kQQ2oerXIYIXK9UF/stv521y6vRFQBwEG+iM9aIE5CQjR07gMaNua/puX4ee6yt0+ukM8oDjTgRhIcoh7NIQTC+x0dqi0J4EJNJbQkIgpDC+/hNEUU6Nx7KH6kG0Iti3qiR8+t6GwExE5wh3yIn8hrJDxlOOkZIxqbMrx3G43MAwEf4UWVJhKPXBoQgCEIMzgwkMe0oAwMCkSaDRJ5HL4YPIT/Z8QRfYSQq4LTaomgeMpxUxFVPlhyVGFWEBB+rVwMfuTH4RUaVLXxlzd4deeYx88r7FJVRn2PiRLUlIARiMAhzR27mffyKNAShJ5YpLZps+CEDC9EfXfGz5ZyvdrrqzXeLXMzFEIzEbJxGJbVF0Tw+mkUIwvNoyYht2xbo3Bn4/nvgtdfUlsZ7qI4jeA0Xbc79959tGLPhNBNjcAPFMQ4zPCQdoRnG0OblekFsB5HZLfsyfCguHRnbBz9kYAk+RC8sFRS+I9aiP37AcvRySx5vMLZq1lRbAnWojFM2x1rSV7QGGU46wRsqJEI7hIdn/tbzYlgtkQ/3cQQ1cRFlbc4vXmx1wDDwM7J7b4zGlwCA6fjUUyISWoGGazVHLeM+NMAuh/PNUjYhAOkO591pk5VuzztgHT7EMixFb0Hh5Vpz5Q3Ktr+/2hKIwxveud4gr3o6QcxUAcK70YqhE/pC+H4lBEuOwV0xcNMv+ALxAkJr5EMTwtBKwSQk8cezegCAbHautJc8ftcj6ctpTOXAE9niInwTT3bW660fiUacCEJnyF3JCInPPgzDAI1jhssriA8QsukXAEA/HTkI8XrI4CGsiMQzQeHc6bj05Y5QPTyn3hR5wrOQ4aQxTDxWPpf1T9P3fBM19DzOPU6SHnleEB9CDwqGV+BpLYkMNUJhSDcgxGKfZ6j94YcMJ4LwcSTrcdQt5xKPND65cyufBiEvVHY0y2L0UVsEt9Gy0ktGnTbRcp7RGmQ4aYQAGHW79wPhvTjT7xhS/kRhv3ZCNqpUUSZeQhmo3GiaJtiheBpcxgMprtpBT0VUTL6hPCYP5BxCAxhgQjwKIxgv4WeVsSmTexda+p7Wo0zOGolSpfjDMAbqd3GFtYK0Hm1F3Ss4v9DUL4IgrBA7qlMKV2RJV0ttnBTOnFFbAu1AI4P8kOajAYLxEvlxD9kELko1o2QlpafRr8hIz6Xlj3REIsFzCapMhw7sf27dnCpWMTTAHstvWRslMpy0gdDvQN+L4EBNRXUgFqiWtpYoX15tCZRFC8ZQY2zHFjRDkVf7nQH6GuEDyHAiOCiHs0hDEL7GCLVF0RzHUQ0JyI6CuKO2KLLhTI/jq9BI9xOG3J63OKGPQRCaZA6GCg4rR0doGF5gC5qhN5ZYzo3CLBTFDbfjlgu9j0rpGWfv3lPOIbajKZphG77Hx4rE7wnIcNIAWqtIpmIiAGAEvlFZEmF4sreiEk4DAN7BJs8lqgG4p+rprJvIWyHDSV8YDPrrYiUcENJuD8VcVMMxD0jDMhJfoxm2oSpOWs7NwhicRkWPpK+FEQ2p/Eg7RIjif5imtgiqQYaTiojRd8gdOT++pjfK8bxC1zjxwd5D+c8V7pRRrXWoEAQhni1o7rG0+PagCkeiR9IXUmdpVW/p21dtCaQhVzshNp5pGC9LunqEDCcNIKQikVo4tKZ8GWBChMi1XITnOXUKiIvLPLY31hgG1GsuAL7yR2ucdMS9e4jPWlK++Oh7eQVCy3AEnissCSE33tq0iWl3tGrgagEynDSA1owbJQvMHtTHM2TT1JxrKUj5Zlr7zma49LhKlYDoaBf3eWvrQhDW5M2LR1kKqi0FoVO0Wu/7Ou3bqy2B5xGzxonghwwnFcmeXXhYqfs+aK0w1MV+AEBX/CxbnNSBKx6x74zbRtJW3vI2yDmEdpC1k4DWOHkFZBA5oqd3snSp2hJoGz19S09DhpOKuOrRd0UOPMVe1MU72CiHOLpFLzqIXEZs7tyyRGNBL++PINTCQMYpIRFPrk/Wg7KrFRnDw4F33uG+pqc2MQTJaovgNnp63wAZTrqnLvZjI97jva6VSsoeteTKg/tYil6ogcNuxSOl4ZPrmWvWlCUat2AYmqrnDq7yzwDMpxEnb4W+l08htBxrra2WIo/WZrj4AqdQWZZ4tJb/tAwZThqGMrIwxOghP+Aj9MJyHEYtt9JU69s8fqxKsjxQIykVV/lnPgYJj8xkclMagiA8idj2owwuCNp4XW3DRY/LB/ROPtxXJF76TvyQ4UToHjGGU2lcFhTuTRxAHKLRCn9KlEoZcuSQJx53O70NBhpxUhrqOPFSaI0TIYJKOIULeB23UMTtuErjEuZjAKIQ7zKsLyjOVAwz8YXvLRcBagtAKIuUwqC3AiSm8hOqjG5HE4TgJf5EG9X2SRDDxo3Au+8qFr0DrOGlr3yiJcgduYbwtPZE34sQQetXnXeRMrg1P4RayI4E1MRhm01yCcIe6rjjh0acdIJURcubMv+GDcAnnzieV0IPCcFLp9fFvNcqVdyVxjUlZdxmRjDUXacNSBEnCF0hpj3PgjTBYV21S9lfTferglOC4yQIwhYynDSAECXcmwwgqdSsCXz5peN5veiN7o4yzJzpJG6dvANCOJp0DhEV5bm0PAUVHkLDqOU1TSmdQw+6DPUJEs4gw4nQPUpM1eMiC1Itv9Xwqjd6tFu3yw6tcZJOEdxSWwRCTq5fFx6W1jgRHHC1DwVxByPwjQrScJMX95DVxWwM52jLaPK1YpgHDwSHFaPj5MQjTMc4lMBVKWLp7juQ4UQ4ZTDmIggpaosBgL9wearQfYGxnklIJcQbgzqr7TRELyx3GUaTPbN6a+HkwtVjFysGZMkiLC4a4fIp3CnHwzBHPkHcpCDu4B7y4zakjTpPxBTEozDy4Z7MkknH14piaVxRJN7l6Ilx+AIn4YF1CRqADCedoJbDhrkYinGYoUraQvFU5dfTStnlagwLF/aMHHLorlzvTHQD76tKtNYgd+TKI6Ro7NihuBiEdpC7Y4OrjfeDuLItp55gH1cj7AQA5AL3nhiNsR158JA3vimYjCjcQTY8k01GpfDFpq0cztsci8nftXEAABCORFll0ipkOBEO2FeYb+Efj8swfLjjOXNlFmDnCzJrVuXlcUWpUsCnnzoPI1dDq4VeMobRn/dFvaHJNU7eiFxaUp068sRD6IIKOCMonNB6kqu8i20z1Bqlfgv7sR1NVUnbHXzRQJKbYJXW4KkJGU46oAhuIszLLHl3KvgWLWyPixQBxo0DJk7MPFehgvzpOsMndVdqdXwPb8zoajwTlR3dUwuHBYVzp80RO+KkFuYRB8K36IllSEaoxVujr0CGk4YxgEFJXMFNFEUh/Ku2OKrjbI3T9Oms8WRm7VrPyGQmKMh1GLlGaPyo1BLWeKMxQxCEquscxaTtjwwFJSHUQIi+sgwfekAS7UEqmMZphq1u3S+l4rUvMFpbpG6vJ2pBbyxY0HUnsmwb6Sq0xkl0HNRrrig0VU9DUFYnPEAB/IswvLAc62XEKQDpaotA6Bi9qRJkOHkJbfG7bHEJ21dK2QpdSkESco9QZdQkUlNauFBUcLfQTiWjGUE0i9D8Zq0siYYMJ4LQFVy9+flxF/+iEJ4iu+Wc1jot+aARJ+9DL3lPDchw0gByZNDf0Z7zvBzTw+zjmIb/4R7yoSDuuB23GMwGg72eqIQh4SfymxQtqqxBM3Vq5m+lpuqJyStNm0JLFpzu+Q6DHM5psuHy1W+uwU9hz9We09QWgXCDmq/WTAVYGSFq1gFi2gNvG3Hy1WqOEAYZToQDrirM/2EG8uChom7KxVRcqqztltCgudMItm5tFY9ClbpQ+davB7p0ARgDVR+uEKp8tESMG4noQKvXObIrsAoUYsoF3odepurpdcSJDCRCCqT5EC5Ro9eLq0LjG3HyBazfhxqVvXWarVqxx7TGyTXulB1a4+Qh3M3HWtgPwWAgy8kLUbLtzeVkzyWxaStpODXDFlxFCbxJnvs8Cm03wg8ZThpAycpRr3E70wX5rsmpx7ta4ySlUnGnIrJ+NvKqR9jgScOJjDRbevQAjh3LPF6yRDVRjEbVkiacILSd5Aqn5IjTQ+SRLS4lDactaIESuI6daKRYGvZQnyDhDFLBCAf01tPAV8lNnqxgmlybFXqxVz3r8Jnp6yufaBm3yty778omB8EN7+jq8uVAuXKZxx06eEQeBwwGPH+uTtKEPHDVAfZtRnP8jesohjrYBwAohct4G3+hEXagJg55RE4uPLHGKStSFU+DyESTa2w1AhlOhANaKDByTNUbOpQjDg08mxTUnqrHiWYE0S5ujTK6yqsFCwIJCUCBApLTIGRGaJmQu+wYDDAxVB61iDt1gP2I099oiWKIw240AABcxmv4C62xA01wCG9KaN/kaQ89tcbpDMrjS3wiW3zUhPEzDZ9Kuk+Kx2W9fQcynDSAolPeNDoq4M4zCzGc3J1RJNarHsBf+PPlc08W+7jlmKrXpYvjOWd5hevZaI2TaxQ11P39gchI5eL3FQRUFgahFYqKUxmpOGoTJdY5+ss0hc/ZVEAxuoOnDKfyOIdP8LVH0vJlApGG2jgo6d7CiJdZGu1BhpOGkUPpkmMDXDVwNuIk5h4t8MMPr9x3y4Qcz2ntpU86Gn3hXoImR0e1Wsh8HfouXonYOkBs2y3XGipN1lWEZDzi1EjHqG44zZ8/H9HR0ciaNStq1KiBo0ePOg2fkJCAgQMHIn/+/AgKCkKpUqUQE+OGK18N4AsZTSzORlWEdOry6RFKbYBrT716mb8LFQK2bgXCw6TH5wnnEM7eDfcaJ4LwAuTM0GoVDoOB/HboCCHrmQCgHvYqKoezkSIxeokWOlsJ+Xgd59UWQdOoajitXbsWI0aMwKRJk3Dy5ElUrFgRzZo1w4MHDzjDp6WloUmTJrh58yZ+++03XL58GYsWLULBggU9LDnhivz53bufa0REzBonhlFXweecQSWTPFoxXGiqnjAK4g5Oo4Lo+8gduRejwBonzVQMhEuElO1SuIyiuKmoHHrdf0lpfL0onURVyff6wkCAqobT7Nmz0bdvX/Tq1Qtly5bF999/j5CQECxdupQz/NKlS/HkyRNs3LgRtWvXRnR0NOrVq4eKFSt6WHLvRo7eI5MbMwAaNABq1wZ+/FFYeHMlp/XKzq3hbw3t4+T8JGHPFxiLCjjrNAz12GobwZ0EahmxVBZ1j30dUA7nRMchto3Rywa7cpAfd9UWgfASAtRKOC0tDSdOnMC4ceMs5/z8/NC4cWMcOsTtVvOPP/5ArVq1MHDgQGzatAm5c+dGly5dMGbMGPj7+3Pek5qaitTUTDeWz1/5bDUajTBqYuOLQK+00O/fl34vw5hgNGage3egX79Ay3n2mwEM4w9rm98cPj0dAAJtwpqP5cb+mxmNRphMBpiLVEaGySKjHPks83lgk471dTHPai2TyRQA++Ewe5nT0zPTZJ+VBjqEEqSQG10GQLrRCENGhscqcgZeuLLNRUY2Go28ziEcyrbR6LIUZmRkwCQgnBgYsPUgoQ9y4ZHaIgAQNuIUikQkwfk8czV0mCpVTDh5Unjf/2h8afltLrfWuoR1WbZu7whxSMkLRqMRASq/bjF6mmqiPnr0CBkZGcibN6/N+bx58+LSpUuc99y4cQO7du1C165dERMTg2vXrmHAgAEwGo2YNGkS5z0zZszAlClTHM5v27YNISEh7j+I27yjtgCSUaqyfPz4MWJiDr4atcp8P9u3b0NoaDoePqwFWG3el5DwDDEx+5CRYQDQBgCwb98+5MyZAuBtj8gcExODM2cKA6gMALh//wGAfJZrAJA1XfpeF/v378fNmy8AAKdO5QbwpsN1oKEoec0kJTUEEM57HQCuXMkGgF249fffMfDzAyITEgSn58tkgLtTxxWu8urLly+xPSYGhc+efZXrlOdlcjK0UGvKicnEOP1CMTExyJ/O3ajalxP/lBS0cpHezbg4nIuJkbXmNzHMqzqH0AP/wwy1RQDg2qveSHyFrzAKXbHKg1IJY+DA7QgJScfIkfXw33/iFhCby63RWA5AcZtzAHD+fA4AdeQS1aeojFMojuvYDuEesbZu3YIsWdTt+ElOThYcVlcmtclkQp48efDjjz/C398fVatWxb///osvv/yS13AaN24cRowYYTl+/vw5oqKi0LRpU0RERHhKdIKDJk1M2L7dsccoV66caNmypUNHcNOmTREZCcybZ6vmZMsWiZYtW8LaLqlbty48ufStZcuWePQosy8+T548NtcAYH/AMsnx161bB2XKsL8DAx37/OvUEVfJm2UCgNBQthqwnipifR0Acue2vebnB5yfoW+nLJ5CquHkiuDgYLRs2RKGu56bghIcHOyxtDyFn58BzjreW7ZsiXMB3/BesyEpyWV60cWKobD9fW7i5++PPHadkIQ2kLK1BeAZj7iunEN8hVEAgBXojg/BvYRCLRo2bIg8eYwYOTJT4V65Mh3durlWa83ltnZtYMgQEzp1MqFFi8wyGRHhdePqqI1/PJLOb3gfANASm/E3hNVzzZs3R9asSkrlmucidhBXzXDKlSsX/P39cd9uTtf9+/eRj2fjm/z58yMwMNBmWl6ZMmVw7949pKWlIUuWLA73BAUFISgoyOF8YGAgAgOVmcYllnC8UCVdg8F2lkpB3EF97EEgbHtXlRqlKVyYe5jdYPBDYKCfg+GUJUsguD6Znx8b3trbHPt9ZRTWBWy+zDw2GPxsrrEnpcdv/ewc2Vx0XrYOz7U8wj4+62H0wMBA+PkBBlpXIQiTxKWkrsqdwWBgvxPPNGUl8Mpv7uKZnJUth2sCyqG/nx/87cKlIRBZIH1KrwG2dQ7h3fB5fVVrjZMaazQDAsxtYuZU6PLlham05nKbKxewejVgv9xf7WljSvCPh0fQYvC24PzoaX2NTwahqFbTZsmSBVWrVsXOnTst50wmE3bu3IlatWpx3lO7dm1cu3YNJivPA1euXEH+/Pk5jSY9EIg03EK0bPH5IQPVccRy7CzjNmpke3wJr2EVuqEtNsgmjzOk6mD2BpWYtTZSjMBFi0TfwimTXE2L3LqrWVbR78YblWgFUGrESZVFZrSwzX043qHbK56oLGoWoVtb2Ne/zowRvmti63ChXvW0uA6bqiJCLVTtohoxYgQWLVqEn376CRcvXsTHH3+MpKQk9OrVCwDQvXt3G+cRH3/8MZ48eYKhQ4fiypUr2Lx5M6ZPn46BAweq9QhukxfOvSiI7cmZhdE4gpqCwtpXPGFwPc2Ei8K4hax4KeoeKRWxWTd4913b87lysf/tn0cuXULkLDjF0YqORO7IhSHEcBK6rwv3zaRBeANJ7qweo7KoWYRO1RPT1ss1wiOXO3ItGlYEoRSqDkh27NgRDx8+xMSJE3Hv3j1UqlQJW7ZssTiMiI+Ph5/V/KuoqChs3boVw4cPR4UKFVCwYEEMHToUY8aMUesRFMUARnSFNBKzbY7lqGCd9W69jnM4h/K4jUIojNvi4uV5NFfuxQcMAEaOhMXLnNSZSiFIQjJCpd3MgbW83CNO8jQuSm2AK5TM5yRlTQhKOYcgZVmDSNydm4HB/fqB8oOusW9nnW5IrrGpegThS6g+k3PQoEEYNGgQ57U9e/Y4nKtVqxYOHz6ssFSENc4q43ewCQAQhTvi45XYzvv7sxvkrl9vG4/Y+JIQhtb4A3+BY7ddJXBDr7F+NrUNJwukqAlCquHUCn/JLAmhVdw2mjRTKRBSEZMHtDbiJBcVEYuh+BYTMRV3EOU0rJID7dS0eRa9vW+qbTWOuxWkkkPobssm8na+ER13KtAFGCA4rPXzcr1XVyNOYqiOI1iEPsiFhw5xFyniXtz2mGUV+z1p01bXGMBINpwaYpfM0siAr04LFJrVhVRqSrxDvWkehFt4uu6V6hlQLLGojF5YjnXo4JH0CGVphB14D+vVFkN2VB9xIjxLINLQHSuwA43BMNGi7u2B5SiMeMux1uc1c+kS7qytEtUj6OarMa9Ty4YEvI/fbK4VKQL8/TewYQPw44/upUMoj1TDqZvQvVN81ZjxJAq/Ylmm6hFehRTnEGohtzzlcE7W+Ah12IEmAIBCuI1/UUhlaeRD0ojT7t275ZaD8BBjMBOL0RdXUVLUff5Ix3L0wlRw75clBSH63p9/Zv7m61Q1n7eerVJI4TIqpaGQ0ilcGpc5723eHHjjDfHxyYFFFurldgkDg2R35AQhGIOBnLV4GZ5Y4ySXsa6G0U9T9fRD7lczZ7wFSS168+bNUbx4cXz++ee4fVucQwDCs9hXsObpP4FI5wrOiyfnQltXWnwGEOdokgH47z/g5k0gPJznPpk8HLmKR4+DAOSOXBnIcNI/siqGPOXGrTSoLPoUWhtxkhtvfz5C30hq0f/9918MGjQIv/32G4oVK4ZmzZph3bp1SEtLk1s+wk3MjfF7WI+pmGDTOKut3Luzxska6+fInz9zDZA7uoQ7U/oAoH9/9r+1K3ODhBfuiZ48qfmAGjfXyGWoO79Zh1a6L0Pfi3ATOUec6mGPDBLJTwCMWI/3eK9HRCiXdokSysXti3jbNGRJhlOuXLkwfPhwxMbG4siRIyhVqhQGDBiAAgUKYMiQITh9+rTccnotnlI+16MdJuBz1MdeyzmTm55I++MHTMN4yfd7cyfpe+8Bly4BO3ZYnZTJq57lXEY61qAjhuBb6RG7kb5Xf0CCGx9V+j1RT9OIk2+j9D5O0YjjPP81RoqOSwj1sRtZkCr5/k5Yg/ewkfPaX38B2bMLjytnTnFp580LxMaKu4fwHdyeQ1KlShWMGzcOgwYNQmJiIpYuXYqqVauiTp06OH/+vBwyejVqWuLe4sFWaZ1BqNc5ezlKlwayZLEJIVkGrmcsemo9OmIdvsUwyfEKhUtfpjUVrvFIx4iPGjMeRehnDA4Wr6XJgcEA2ldN38jhjtxZHDvQ2K10xdZlu9EQi9FH1D3WhCGR99rbb0uOVjAVKyqfBsGiN1VCsupsNBrx22+/oWXLlihSpAi2bt2K7777Dvfv38e1a9dQpEgRvP/++3LKSqiElI14hcTJe82FEwi1cPcdyP0OA16+kDU+8Uq+zmo7FdDsFIWOHdWWwDvx8wP+/VfSrTT11Xv5GV1chlF6H6fiuOFWmlLqMsGeQRVGbd3B1/G2uk2SO/LBgwfjl19+AcMw6NatG2bNmoVy5cpZroeGhuKrr75CgQIFZBOU8H6K4TqeIAeAzDF4dzrTpVaWkUjw2L4VWsBfmrdsao30yhtvAGvWAPv3s95UCJeIWp8YGCg6frfdkVNZ1DRd8AuW4kOnYZSeqkcQaqHZDkSJSDKcLly4gHnz5qFt27YICgriDJMrVy5yWy4ANStAobqAUjLabOqKm7gOdkVmU55CJoduIKQAr4VCvfEabetWrwaqVnUehuvd01Q9QhZ27AAac08j8hXcViwM3qaaeB9Nsc3pdbUMJ8o5BCEOSVP1Jk2ahPfff9/BaEpPT8e+ffsAAAEBAahXr577Evo4ZXFBsbiFGk4NsEdxd+S1cUDR+IVSGLfQzK6B43pPXI2NKzvCHa96StooVarYpsUFt+hkOLlCqNv//LgnPnJzppA6LCslUymxnqpRI8RvvSh/vGqhRocCrXHSPH5w7o1J6TVOcoT3FMyrBQIEoUUkGU4NGjTAkydPHM4/e/YMDRo0cFsoX8JZxVUNxzEACxVLW4wO1B0rFJNDSxTjmAcuGzK3A6oP+KguACEJJf34eimaH101GMhu0jiuDCcxqGFUeDrN6MLaNOoIQpLhxDAMDBwNyePHjxEaGuq2UATL29istggWiuO6x9KSyzkEpwtvN3rYxHrV40pdToQavh98IGuyhN7hq6Pz5HF9r/XGZIRsUO+69yPn6I4aI05KjE5VwzHO85F4LntaBCEXotY4tW3bFgBgMBjQs2dPm6l6GRkZOHPmDN588015JfRynDWYWmpMlag0hRhCWvW0LO3bSH8Ydzq8ly4FVlk5N/r8c+5wzp6Jc42ThvKnT2Lxky9TIcmRA3jwgPOS8cwZBG7cCAwaBKxdK096OkLKNFvRabjrHEKjdSXBIueIk5yoOV3vGKojGnF4gDwoh3M214qlKLdMgdAWWh/Qt0fUiFNkZCQiIyPBMAzCw8Mtx5GRkciXLx/69euHVau04X7SGzC5v82WU8ToAp6sXD3tjnzJEufXze/J+h14urFx59mtnXx9+in7JxbOvKK32o6wRcz3e+01YMIEcbtO6gRjnwHyRmj9XosXBzzhXbZ5c+1PJ/RxPDHipEfK4CK2oSmOoobN+WrPd6kkESE3Wl1LJxVRI07Lli0DAERHR+OTTz6haXkKo3TlqLbhxNfOy9W5K1SPkOLDpEB+AHfF3aOFpk7WTY9JUVMXud+/Vod3FSZ1+FhkWTBHmcjXrgWePAGaNlUmfjPz5gHdFimbBuEWWlzjpMQejVJ4S0bnUPv2scVuxgxazqkVfkQ/VMMJtcWQDcle9chokgennsy82HAygBGtp8kxEiXFGx4AdMUqZEeC5bhqVeDdd23DuHLpLWUujZSNgpWA3JFLpzJOKRJvmtHNCEaOlEUOWVAzL0neyMyzpNZ24ngpPFwTHTMEPwECPWy6g5JGEJ8+Eoxk9IN2jPY6dYDvvgPCw9WWhDBTFSfVFkFWBI84ValSBTt37kT27NlRuXJlTucQZk6e9K6XpBZyGE6ReCaDJNqYqscXRowBlhf3Hc65GoUJuHsbq9DNNn0wDmvsS5cGDh8G8uUTLo+ncfZuxX9jUtWE0BDK7Gf34AFQCJA+UjR0KDB8eOaxrxrCGnluV+Uv4Yd1yFsuN+919ccNCGf0xw+Kp6HG6NE0SJj7LQBvmo5IeBeCDad33nnH4gziXfuudkIySjuHCEWS23EAyixs9bRzCANMnPvquDKc/J8+EpxGjRrOror/nuY8wDni48E20kdncWkat7+JfaZS8SNrPX/JOrpqvehQBJ722kloFzmn6rkbtjF2yCKLI+5XCuF4jnrYixNMMxnkIQgWwYbTpEmTOH8TyuHtziGsFQEh8TuOMjGvDMMwl+EB8G7iK2kfUAkNl8GQ+Yy/oBO6YwWMyOL8HoEb4BYrJkyG6tWFhRMCTdUjfAVZveqNHi1fXITXIqXN9dQ9Yu6fgbHYjLdFxyuHcbgZb6MO/sF3yZ+4HRehHHpTJZTVzAm30Nsap2H4BlvRFHk4psO5mz4Xi9AXiQhHyQTuvSCE4qrQyvYdrJ63E9aiN1y48xNBcDCQm38WDy5fBn79FWjZ0pl44tyR6662I5yj4vf0qayUK5faEhAapy1+V1sE2RiLmdiPuhLudL9SqIN/AACdUpa7HRdBmBE84pQ9e3an65qsefLkiWSBiEz0Zjh9gxEAgNkYgQ/wszhZrJ5ViBMIgwHo88rwaH9lOoANotKzxnptuNB3LqmXzi7qHHBdTpyNONmfCwnhj6dUKfZPXnxJ29UwWp/npnF08/p8yrr0PaJxy/L7d7RHe/zKG5avnRKrMzjzqmd/nivuNtiEcjgvKk2C0DuCDac5c+YoKAbBRSHcERU+AI5utuSaYicmnlwQtiZIKxvgip0yKFu6MqeV7rB8i4Gixg0pctpAJ17hCH4YGFzWB1TcfAsp64o97RxiE971aHoEoQUEG049evRQUg6Cg5wCRiSsSUFWhSSRZxFpv34AfmR/T8DnGGT4TJwMIhUHh7XvPEaEWgqJkIbRqfMQu9ecYbWE62MswFRMfLVwt5I0Aa0oV45d0269lyd5PdII3bsD8+cDFy7Ynv/pJ0Bqvf3ee8AG6aO4olHTKiCLhNAgsyB+LZxYw0nMdiha2O+JILSA4DVOz58/t/nt7I9QB3+RPVRiRnOke9VjEzlzBliwgD993ukCQlyTC6zQXyvlOg0uY4DX4DJloBtWoBiuC0rfPhZBDjEEOocAbEecFmAgcuExlqGXINlcyRMcDDx/Dly7Jjg6wlOEhQHnbafLZFSozBpUWufVaJlupssRhIewnronlD4yrpslCIIbwYZT9uzZ8eDBAwBAtmzZkD17doc/83lCOzjtUVLYq14bbMJd5Ed97Eb58tJmFAmRUW6vzEJpcH0xVqAHrqOEsHTsJBU1iidAxgxup4GykTUrEGA9Rk099Zol/q7gyQQWUlIVEMQJSR9/AlwX1ulAEEQmco72Sx1JUlrVEypVnvS7WIFuqI4jispDuEcFnMZqdFZbDFkQ3Lru2rULOXLkAADs3q3Mho6EZ1HScAoKAjalvgsA2I2G4KoGrfXu/v0NwPeO58EwmI+B+A8FYDCM55WO86zCen3pB/vE3WCwP2QwYwYwbpy09O2/ibuGk+jGmAwnzfIyWfw9Juv844Fve6vOByhbpIji6XgLBj8X34TKIyESMe26fftw9y6g4OoAwXyT8DHqYye6YRVO03RCzXIc1Tj30QTM+VA/9Zdgw6levXqcvwnfQEwFGx4GhGcF4KIHW0g7H3ztLAZgIQCAAbfhJFQ2tdfkcE3VGzsWuHSJXY7y0UfADz84hhGKo3MI4Rw+DPxUU9w9Gf7O96AilEXt/EzIi9vOIWi+IyEBoV71rPnjD7ZzVFmE1W/FMmj+uB7gM5r0iOR9nJ4+fYqvvvoKvXv3Ru/evfH111+TG3KdkS/lJnaiIVpis8uw74lw950lkBHdhtu7Gjfjl/qS87wU+GSSEq8c7sjN68Z++AHYsweYO1dcdPaKszsjTjVqiL8nPShUeoIycC5MgtBehJiF3ULIkLqMUSpWBVLNwRKyNwi94YlOk2GYw3utdWvFkycIzSLJcNq3bx+io6Mxd+5cPH36FE+fPsXcuXNRtGhR7NsncvoSoSjOlKvxN/ugIXZjM1rhp5+cx5MDT8UkKmxtEsN3IB/RiEMpXAYALHG5blZZDYrPOURQEFCvHpCFYwDnNVzGGzgqSLHkul9J1B7xUDt9b+OR9S4CHTvyB7x6VZb0bPK0xqeZkUcxwhtxlq97wlYp8HQZEFq/UztAeBpJhtPAgQPRsWNHxMXFYf369Vi/fj1u3LiBTp06YeDAgXLL6NUoXeizZAHatuW+lsN43/JbTgdccj6TkLh4vd6BQRyK4TJew+Vjz1GlMn/FPxmTcBtRyId7kmQQBMcaJyEcRQ0YOBat2N//999AVBSDceNokawvIHfdYbIecXr/faB8ee6AJYQ5QxGDqqM+GjfaBOMtz0G4RM6y/zouuA5EEIQFSYbTtWvXMHLkSPhbuUnz9/fHiBEjcI38FYtC6V4cPwPw++/c15Qy2gwQNlWPzw14WJq4KZ+879BKEwx4eNepXjEJU1EI/2IMZgpKk4FB9ACVwe6l1BDhBcjvZZLLMLVrA9evp6NGjUzjT479t3jDk57mvRgM0uZveila6NWm8kbIjTvOIbRIdLTaEhC+gCTDqUqVKrh48aLD+YsXL6JixYpuC+VLBLnyoOAmzqpFRuWW2D8jDV9gDOrD1ktjj8v/c3lvZOoDWWQIQqqNQpIBYT7T5TB4G2GX8MAe6JLPi/uuA2kI7Tfj2sTTbse9AqmZrWhR22Mej7QMDK7rFFf1NS3W8hkK4C52o77aYiiKlNwcGSm7GIQn0FndJdir3pkzZyy/hwwZgqFDh+LatWuoWZN1xXX48GHMnz8fX3zxhfxSejFHUV3R+OVeQC4nb51dgPaYhTGYhQNYaTmfK/Vfl/d2PufauBJCHjzEYyuvCikq+1dtgF34AmMdznN9R3emMXIxEZ8JDqsF9FXVaocL54EqAsKxyryCaL2x7NkTaNhQ2r137gDJycCrLTws1K8vWRyXdpPkmAk9Uh973Y5j8iQAU4SFzZcX8GzfGq1x8hX0Npou2HCqVKkSDAYDGKvGbvTo0Q7hunTpgo7OFhYTNuSEsp4IneZHBVtaITpR7md8m19m3swXT6jxmcv4hRZGw4vnlt+pUNDHqgB5dqGRrElG46as8WkLndW2HsSZMmEU6BX22lUGJWWShwvNN5bLlll+2k+zdUnBgqKCyzNlW+svlNAaZV4Tnu/atGGARfLLUBCuO0oJQksINpzi4uKUlINQCDV6YwwMgy+6XwBEutdWHB7lx1pp4Zo+p4UeLS4lU4iylR0J8gtjRvURA7XT1zLuv5u796Co4WSDilaU2Gx8t0EX5N+9WnD4jJKvCZwA7AIXgmreECU0h4ERsQeBQtXtYvT1bIIE4SaCDacitMO7LlFlqp4B6LeVx5WfAFkMAvZ3EWI02OsZvHFJfA0eXSOmupFC6Ak5cmZagOf26dJT9i6x+0esQjI24D2sEBB+X1wURuEYniI7+MfYBaxxcoWeXiKhCUQ5h/C4ZU49AYQ2EWw4cXHhwgXEx8cjLS3N5nybNm3cEorwDEpWhIb7bkyG5vG2pxSe3v/IW6Aebu0ipdzYK1F/VZ6AiPMH8RN6ONkK0wvgycinTwOFCgE5c9qeT0Yo2r7aEFyI4ZSeDpxANTeFpDVOhPz4J78QHDbopevp8XKihZkeBMGFJMPpxo0beO+993D27FmbdU+GVzV7htVie0JdVHMOIUCrrlQJwNlXwW3kdH1voSgDXE2NFtqbFhLsGZVD9DoJ63u5noUsF0IE51EWr+MC9hbsCi5H4/b1QXJwTtTAUQDAHBjlF0jjIySVKgF+fkBGhvhOppEjgbt3gZ9/tj3/9CmQPbtEgVy+L++qDzbiHbyLTWqL4dUU+HqE4LDVVw1RUBLpkIFFeBpJ7siHDh2KokWL4sGDBwgJCcH58+exb98+VKtWDXv27JFZRMI9nDS2KustfK5Do6NdO4coWEgBgXjgqpg9vYs6FxrXOwmNUR1HURXHcaDA+4LCK52/9GD3m0QsAbFm9mzgl1+As2dtz3fo4L5MvoLQrSEI6YRcPKm2CPIiYB0zoT309n0kGU6HDh3C1KlTkStXLvj5+cHPzw9vvfUWZsyYgSFDtNkr4bsYgH/+8WyKDOOWVpQ7l8A0XIWxEsHpdDwPWSDuTI304yipDpXN06fwmzsXQU+U9dRogSw3XZGMUJxEVY9YLFVwQvE0PInU0WKj3UDdjh2Zvz/F56LiYkzkHILwHSSNJF26JL8gBGGHJMMpIyMD4eHhAIBcuXLhv//+A8A6kLh8+bJ80hFuUyL1PFCnDuc1m4opPl6+RA0Q1oorrHj7GTLjL7T8c+D5cyehlcedXpUc2QXc27s3/D/5BG9OmiQ5HYIAgHv3gMWLpd9/StBOUVboyKueXEzHpzbHQuqHZ4jgveZt/Rg0BYsQjd16ezOUlwg5kWQ4lStXDqdPnwYA1KhRA7NmzcKBAwcwdepUFCtWTFYBCeXIndeqMnntNdnilVJJ8d4jl0K1ahX8x46SJy6JuLPGSRB//QUAiLh9W9l0CK9kezjrCfMu8mHcOP5w/ft7SCDCgdb4U20RPAYpu76OsPaS8gnhaSQZTuPHj4fp1eTvqVOnIi4uDnXq1EFMTAzmztXa5j0EH7nzWFU4L1/KFq/gkRUro8jmHj4Dw03Dw3DkkKR45aqYDeBZMLF/P1C1KnD4sBMhBDiH8HdjTUC6wF1RreDyAbMYvaXLIJLERI8lpTuk5NkDoc1QFcfxGi45HZz94Qc3BLPGKk+7moamKBqY4yb0e/2Dt3AZpXAVJRSWSH1IIdYwHql86fsT2kSSV71mzZpZfpcoUQKXLl3CkydPkD17dotnPUL7KPWlKj/Z5ejDVyLR0bJEw6LyVBY/rs0Gb98G6tZlf7/1Fv/NHIaTw6kAN3YXeO890bdwzYqYjRHogyXS5SBU5SSqeiwtPTUVSu9hI6iziWHAwA9lcQEAkOHebiIEIZ1y5YCbN9WWQjB6cz5AaBu3a97br6YFRUVFuS0M4WGUVAYeP3Ydxkrzz3d+J2eQsDA3ZBC6A65EAkQO8HCOOBUunPlbpBt/h8bAHcPp1TQ/MXAOglEDpVs83cNvCtDPBmqKT7MVgFkEE4+3OT0ZokKgEScNc+sWsGCBokkILXHh4WlAgpKSEIQtkqbqpaenY8KECYiMjER0dDSio6MRGRmJ8ePHw2jvRogg+LBSRorvXy4qvLvpuRuvAQwqVhR5j9LKlztT9aSgAWWS4GZO1Nei77kYVMnyW+lPuxIfoEyHcpg1S9l09AIDA/xcqYquphR7WXEkw0njDByocALCvn9wiOsOR8pL2kZvHa6SDKfBgwfjxx9/xKxZs3Dq1CmcOnUKs2bNwpIlS8gduZ7wZBdlkyaOa3iEpC+njAlP5YsLQHCwuPCKVw5KG04zZkhytbYDjRQQhjArAwwDLLGaHTkCX2PppTcRFwd0xmoAQEescRrXZxiPJL9wxWS1pztWAjBgzJhXJ7xtuISQxDUUV1sEQjPoS5kmfAdJc3tWr16NNWvWoEWLFpZzFSpUQFRUFDp37oyFCxfKJiChIJ5UVnbssN3EBBA/AuTm5naGBw/EpecEaT1YbjQEQmRU0nC6cgX43//Y3336CBYJAI6jGhqDeyomIR1zvt+5k/0kZrccqQgCANSvD8SjM35De6Qj0GlcibCdE3vxotzSahdvG6nRM8/Asys64YNQZwqhTSSNOAUFBSGaY9V+0aJFkcXpTqME4UGEakQmHm935mg0WoE7yOXOGidXPHumXNyEW9gbOeZ8Yd6azZXRxIWn95Ek48V9vGHQzrpOq4KTKkpCqI2c7a7epoIR2kaS4TRo0CB89tlnSE1NtZxLTU3FtGnTMGjQINmEIxRGqy2tyBGnyGxupufCcOJCSkUsd+XtEJ+rESeZtVO+6H5EX9xAUVnTIhyR26DXanWgODp5cFcu27XawSOVrEhRWwSCIAgHBHdRt23b1uZ4x44dKFSoECq+WiF/+vRppKWloVEjWs+gG/SgMAiQsURJN5/DQ93dbjmHEHKvM/ewo0cDa9cCJ0/K5iqeSyIDGHyEHwGwrpMJ5dFDMXaJFw852X8fkwnwk1I0XL0jL36HBOEOBj8DIM5pLUHwIthwioy0nXvcrl07m2NyR06Ihq+ht9Y0BCgD/gY3veVJVThEaqxu7QXjSsa4OOfXv/yS/T93LjBlinQ5BMvkDdo8ISfnURavv9qDyJewLyY//wx066aOLFqHplQRZuTMCRERAOT1DUXIic46fQQbTsuWLVNSDkINtNpVLaPbcEF4aI2TokpBcrJycfMh8HGcPfdpVEBFnJFJIELLnEEF/RpOMlaVJ086Gk7eNs1OKtbvgYwoX0e+MiF2z0WCcIZbc2kePnyIf/75B//88w8ePnwol0yEryDEEOIz7uQ0olwYTqVLC4wnLQ2oW5edFseB3Ps4SVK2ZJRBjqiaYpv7kfgoG0M/ACC9/+PaNdtjVTv9tNqJY8aNdyNo1wUBCZw44fy6s/rgOTznat4dyIAkzJDhTGgVSYZTUlISPvzwQ+TPnx9169ZF3bp1UaBAAfTu3RvJavR8E9LQqrKisREnLp8L4QnxwKpVtif/+APYvz9zWpyccDy7RxsWge+eSyZ7ZegPtEZ7/Iq2+B0PkFcW8XyR77KNd+v+Ll1kEkQG1DTaBPXfuFHW3Hm2z5D5jXv0kB5RNiTQfmqErpCc241G4Pp1OUVxYDNaKho/oW0kGU4jRozA3r178eeffyIhIQEJCQnYtGkT9u7di5EjR8otI6EUWjWc5ELBNU6V//nO8WRamquERKejZYS+Nnulsz1+w+9ojw1oy3MHIQSjBDfj1ty9K5MgAtHqaIKSrtd37wb++Uf6/ftRR1C4bdsAZ/ULOWoh9IfE+qJpU6BECUmbtQulH35ULG5C+0iqTX///XcsWbIELVq0QEREBCIiItCyZUssWrQIv/32m9wyEgSLtaYup9FXrZrz61owMOXqkv/sMyA2Vvx9Kr2DqyiBMfgCh1BTlfS9GbFZ6vlzN9PTqOHUvYdrucTIbv1eR40Cpk4VcA9P/CnIyvnbnu7dXaeht6lPepOXkBf/QIn1xZ497P++fWWTxZ7/UFCxuAntI2nHzOTkZOTN6zjFJk+ePDRVT0+obRBoZY2TXJu7upBJtbe9d6/tceXK8rw/gXHYK4VilNAUZMUsjEFxXEctHBYlnrdjLhqeKMZlygTg+nXg99+lx6FVwykhIfP3eZTFPeTDSVSxDaSS6IkIQ38shD8ykIDsvOEYRrvvVwze8AyEPOTIzgAv1JaCIByRZDjVqlULkyZNwooVK5A1K9sL9vLlS0yZMgW1atWSVUDCB9GZa0rhyPxcQjXm+vXdT0vgN+HqFaeeY2WQWkz47rt8mf+e69fZvDZ4sLQ09YIRgWiMnWqLYcMP6O/W/TMwViZJPAvVG76N2v26hAfRmc4nyXCaM2cOmjdv7rABbtasWbF161ZZBSQURG81k9jCpbHCKPsGuCo/n33y32AYrqGk6/sk9CpTT7R83LuntgQ6RKWiJjTfMwx//fI/zHB5/0p8gG5Y5TKcWEwwwI8MIEICeW4dV1sEguBE0hqn8uXL4+rVq5gxYwYqVaqESpUq4YsvvsDVq1fx+uuvyy0joRR6M5zUwtvf0wtp8yHs1aER+MZ9WRzS8PJ3ryKetrv5vuWjR8BPP3lWFrGoNfohNF13v2V3rHAvAh4yIH0DHRpxIvTONximtgiEAogecTIajXjttdfw119/oa+Ci+8ID6A3g0BjI0hicUsRUPLZhw+Xdp/UqWIyGUOfYhp2kYtljxZjd9Li++7t2wO39wICfCgojhzKulrVlKtypYYhIjZN62egThNC76zCBxiOOWqLQciM6BGnwMBApKSkKCEL4Wu44xxCrvjlRMn0lIzb3nmEUFQ2ZHejIfLgvqoyaBFXCufLl8CRI8p/viOoLiic1OxHZOL+t9SekUIjToSWEVq/Ed6HpKl6AwcOxMyZM5Geni63PARhqwXofJRJUVQeMZT6ZUS5dn4Vlu+e54iQKIXvcu4cULOm8vs41cF+vLRyFnJUo4qGkPzIiChr8+c7vy6m2IpZ46RFxI4aWYcnw4nQMo2wE9/jI6dhaNTUO5HkHOLYsWPYuXMntm3bhvLlyyM0NNTm+vr162URjlAYk0ltCcShVe3AjEv55JX/tdIyxae3KZtWUMPk+Pm0onAakQU3UAyv4wIA4Hv0BwMD9qC+uoI5wd38lJoKDBniIg1tfB6PYACDd7EBG/Ge2qIQ3kRGBuAvff2cHCQhDPtRB/3xA28YrdTFmkdnlaIkwylbtmxo166d3LIQnsZPB7vJ61Wpf/AA6NUL6NcPeOcdAPJXorlzyxqdeKwe511sEHGbTr+pBohFRVTCaZxGBVXSd6c4ZiAACzCQ85rW84RQj5hqTcJg93FyjlpK3G40UCVdwot5/XXg/Hnh4R89Ehe/yQS0aSPuHsJnEGU4mUwmfPnll7hy5QrS0tLQsGFDTJ48GcHBwUrJRyiJ2kaJ0u7F1ezF+OQTICaG/Xslh+zuyFXGLNIqdMUmvKuODBpXuOXmbWxGf3yP79EfgTxhfO2deAPufjO9TR4QAvXWE7xcvgzcuQMUKKBM/MeOAZs3ux0N1cXeiaghh2nTpuF///sfwsLCULBgQcydOxcDB3L3IBI6QIPKuANijbuRI4HeveV9NikG5n1HpwWa9aqnMEkItTsj3xonX+M/FMREfIb/UFBtUQQhNM9rRUl2Vw6l/d24g1rvmMouoQhKFiSjUbm4Cd0jynBasWIFFixYgK1bt2Ljxo34888/8fPPP8Pkjd1dhHpYax/DhnGf5/qdkQHMng0sXQpcv66oiJxYy6S0dsQw8qUhNR4XWuJgzMU+1MEcDMNllJKWBuEStQeO9Y4WnG96wmAjCIIg3EeU4RQfH4+WLVtajhs3bgyDwYD//vtPdsEID6AHjWvPHmn3ydljJEUr4Xy3Mmk3RiNQsSLQp4888Ynl1fswvxa+HuXvMBj1sA8vEKHYBpvUm024i5xe9eSuUkV51bOrXr7CSFRErNsy1MVepPFODJUXKs+EYDSgv2hlpJzwLKIMp/T0dGTNmtXmXGBgIIw0rKlP1K543OkmtZad7zm00JVshWxT9Q4fBs6elR6XjqCpeq6xz/4H8abH0nIZ3ksUC7fWJwpAifw9Cl/hDCq6Hc9+1MUS9JZBItdYvwc/0EwWQt9Qu+WdiHIOwTAMevbsiaCgIMu5lJQU9O/f38YlObkjJxTB1f5OWpqvwqFhyqZ86XBqrLco0FomP/5DQfwri7LsabSiYHhCDiX6qwStraIySBCEFtGS7iYAUYZTjx49HM598MEHsglDEABcF6Lr14ENVu6vtVjolBzNk9twkiirFl67VhRuLXAP+XEP+dUWQ3d4euBdC+XGk4gpo9bG3Sp8gBH4RgmRCG9A7RkzAqD2yTsRZTgtW7ZMKTkIwhF7DcN8PHiw7fmEBPa/fUUql4YiVzzu1KGuRts8ySvHFD6m/2kWHegPukGvuZrdx8l5RtDDs1k/w1mUV1ESwqeRqY3VQ5kjxKOJHVDnz5+P6OhoZM2aFTVq1MDRo0cF3bdmzRoYDAa8++67ygrorTx7prYE0khNdX4sN3XrCgvnwquebFP11DaczHhADlrjpG98TXFQyx056xxC/+/aupxTmSe0jq/VbwSL6obT2rVrMWLECEyaNAknT55ExYoV0axZMzx48MDpfTdv3sQnn3yCOnXqeEhSL+T4cXXTl7qhraedQTx8KP4eLsPJncXO1s+mlTVOLrzqWaNUA+PLypUaerJSo1tqKiBatjdEedXzMny5bBMC4KqMMjLExcEwrB704oU8MtlHT3nYK1HdcJo9ezb69u2LXr16oWzZsvj+++8REhKCpUuX8t6TkZGBrl27YsqUKShWrJgHpSVkZckS59fl8rrnKTy1j5PcWpLUNU7ySmHhH9RWKGaCcERO5UZIUVLCyCHDiSAADB8uLvyGDcAbbwBVq9qe98YCRciGqDVOcpOWloYTJ05g3LhxlnN+fn5o3LgxDh06xHvf1KlTkSdPHvTu3Rv79+93mkZqaipSraZyPX/+HABgNBo14UbdM7tj6AuTyYQMoxEwmWzej9FoBIxG+MPW4jcxDBs+Pd0S3piWBqR5avcRVjZDRoalQJlMJouMlnzmRmVsfnYAMBiNDgXXaDSKelbrvB/AOKoo9mXDkJ5uSdOYlgb4+4MxCX8eMSMKdbEPJvgDIOXJGQYDA6MxHSaTAZ6ryhlYL9Z7iawIRoqH0lYGgyEzb3LlN2fthP019tB5STSZMmA0mmzCuZvPGYZBhoLbIbgjn9R7L6Cs5DQJ7yf9r7+A2Fj4WZfBffsE3Wsut/4//8y201ev2pRl6/aOUB6j0Qj4+6svg0BUzRuPHj1CRkYG8ubNa3M+b968uHTpEuc9//zzD5YsWYLY2FhBacyYMQNTpkxxOL9t2zaEhISIlllu3lFbAIUw+fnBT+KUssePH+NgTAz8U1PRyur8tq1bkR4WhlqPHiGP1flnCQnYFxMDQ0YG2rw6t2/fPqTkyIG3pT6ASGJiYlD4zBlUfnV8/8EDi3+zmJgYAEC5dOmG+v59+/AiLg4AkOfECdSyv75/PxqKlNdMw6QkhDu5DgDZrlxBPetr/v54+vSpiBSFw3AMhPMpX/bnTTDAT4CC+Cva4338Jk1AjZCc/BIxMdtx9mxhwJLzlOXly5cAMuvNp8iOYNx1O141DWSTi3oqJiYGhXjKrn05efnSH7CptRy5eTMOMTHnYV37uztV0WQy4cF959Pb3cFTUykZGFAal5AP93AFpTySJqFPAvr3ByCt89lcbt+4excF7M4BQM7z5/GWm/IRwtny998wWW1zpAbJycmCw+rKqH7x4gW6deuGRYsWIVeuXILuGTduHEaMGGE5fv78OaKiotC0aVNEREQoJarPY/h/e3ceZ1P9/wH8dWc3xphhGEtjlyVbiEahGEvK0qLyq/T1LX2FkMrSQn3VFxVp8a2+rdIiWqS+8rUnpUSEQiWibFGMfcbM+f1x3Jlz75xzz77d+3o+Hh7u3HuWzz33c875vM9nq1oV2L/f0LpVNm9Gr9xcJFSrFvJ+9+7dgYwMxD/3XMj7FStWRK9evUpqZACgU6dOQI0acEqvXr0QkPSFkj4M6NWrFwBgb8IDhrffsWNHoKn4BFauiKm3r18wTQCQIJmDTe5zAAhUqRL6WXw8PnjsJ137tEpXLMWvqC37mVgAVy/kRUNNVrly5dCrVy/s3+/cdylXrpyu5f3QeTouLg441zVCLr29evXC1oQnZdcNP0+OH1ffX926ddGrl3z+NSoQiEN21SoRl3kGI9ARqy3drxojv/+PaIQf0QhpsKffCVHwvI2fNavMewAQqBD+KNGYaLjPOKFnz56AznuL1YKt0bRwNXDKyspCfHw8Dhw4EPL+gQMHUC2s0AwAO3bswK5du9C7d++S94JPCxMSErB9+3bUr18/ZJ3k5OSQCXuDEhMTkZjIhnJ2CRgMmoISZ80q07QtMSEBSEwE4kJrJOICAcSF/ZaJSUnisg5JDEtXnOR1MJ+ZKUSWfHdAtkpbb14OWV6mY0aZ7Un2mZiYCMTHO3JLkLvxLEdXxc+1HuPouKEFkJiYiAQHr+KCEA3HLVzk7xTp3Ar/LClJw94C8UhICD2HzTfVC4Rcc+S8h/5ogJ/QGt9iLm7Qt/2oOF+IRCXnrcx9GgAcvaiSeOxdLo/rKUO5OjhEUlIS2rRpg2XLlpW8V1xcjGXLliE3N7wxEtC4cWNs3rwZGzduLPnXp08fXH755di4cSNycnKcTD7ZKdKoilp7YLvZwXPBgtLXRUXA//6HtII/jW/PS/M4hXFzVD0iJ2gOyF3M5lr2vQMN4GTvDTf6RhE5hfe12OT6qHqjR4/GSy+9hFmzZmHr1q248847ceLECQwaNAgAMHDgwJLBI1JSUtCsWbOQfxkZGahQoQKaNWuGJC2P+4jsolRymTED6NkTlc6Yq4UrYcVw5IcO6VteGqye+55OFhLNFqLW4iL0wCLLther9A7AOAaPAwCexghD+1uA3piKMYbW1crpYEfuGCoVwHQNR+7BmZADEHiukXcpnfweezhJ3uJ64HTDDTfgySefxIQJE9CqVSts3LgRixYtKukjsnv3buzbZ77zMUUBt+ZxMuvtt81vw+oap4ceMr5/Aw6GDOdhpdC8oFRIa4+1WIweqsuRcSdQdrCdBeiLSjiMUZhhaJuFSMQ4TMVp2NdxWEteEDQGJXpjl4GYhT+RiT5YoL5wBE5OgLsGF9u2bU6AS0Re54mGnMOHD8fw4cNlP1u5cmXEdV9//XXrE0TepqV0EggAe/bYnxY3WFFAOnLE0WT8jIb4O17RHUDZVXiKhkLZ7t3AyJFA8+bupqMC8pGMM9iM5iiPsiMT/YVKhrft9O9kvq+RvuVnYyBm4xao9bOyktnv2AFfyo58qYTNmcizbK6ljYb7jCO8+tBbgScCJ6Iy5E4kvSdXixbWpMULpN/diqZ6LngNf7d9H5qbNkXJDe2ZZ4CePd1Nw3FUwPEyA9pbIzgbmp2/l7TspLeQv2kTcMEFpWOnnDihbb3QS5n57yYIwOmUipqWNRLIhB5/+34L1jiRo9hUjwxwvakekW5aB4dwmp37dHtwCJk+Try5eINkFPyoEyw8u11rEVDI6y1bAuemkwEAyAwG6whBAE6kGdv5DIxUXcbt40/kRTwvYhNrnMg/1ArqsVKQd6PGKcKxtfPJMJvqeUt4NpAeRzsKEcUOPNsze9l4+WXgpZesSYtRgqC9H5ZUeRzHSZSdx81KRvMFz1Ei8iLWOJE3mS3NuDHClJ2BW0GBtfsxc3waNgT27vXVqHp2b89u36AtJmOc7Gcq0/fYyu4nrn77ncx4FOIE2c+jtApL3/fXf6y0Bk1mfgc+ladYFUvXr1jCwIn8J7zQ74WapsJCe7ffrl3pa7e/76+/6h+Vz2OM3NAewKNYhzY2pEZdO3yD+zHZlX1LOf08IljjZGfh28pR9TTvU+brTMA/0QLfYTieM7TN9etNJspjWOgkIi9i4ET+oTQcuRf63Eyd6tz+3WiqF37Mz551PX5z2nY0wn641IklAq9M32MmuFEqJDvRVM8rBMRhM1qgGPEl7+UjXfP63232SEYwgcESEXld7NyVKHp5oeT48cfO7cuNpnpWjHJogN6ClJ5R9QZilu70eLHZkZvZ36qCrtoEsE6Nqqe4jMNPCf6G1zAKT2E3amtex6tBpp5zJtKoeh/gasvSRATA9Kh6Xrwf+JLPnsJycAjypkgFdbmSzo4dQFFR6d+ff25PupRYMC+SZh67yDjxlNiOPk6zMRCNsB0P4F+WbttpXnhuYBevjKrntFn4m+51vFpbY9VvNxzP4Rp8aMm2iIiM8uYjKqJIpAESAOTnAw0aAI0alb53xx3OpslJHgucvETvPE56C5uxVoB3m1eDAbPsOIWj4VhJz69o+D4UHS6F/INYtTzKPBydGDiRNy1bVva9YGlj4cLQ9/futT89agTB2VontwUCjsRvdgc2vLHpE/6bS4+3HUGlI7WZFiX7zz+t2U60sWo4cp6r5Jiwi8LvqCm7GB+kxSYGTuRNmzZpX9YLbZV++gkYO9btVGjnhWNmA7nCVX/MVVwuGgpjUfpTAvBXU71bb3V3/17t41SAJNn3O2NlmfcifYdoOFfJYzQ+NWHeIylvXmmJ9IjmkqOH2VHj1BA/yu9LZxM8qffQ3/D2wtfxYgE+GgaHUNqOn/rPffKJJZsxzKsTUZ9EedyBFzEUM1GAxJL3V6FzmWWlgRNrnIjIixg4kX+YHAHH93bvdjsFoWw47j+joeXblMMaJ3/wy+/z8MP6lo+1Pk4v4Q48j6Gqy3m11ozICC+fk2Qcr1LkH0VFwO+/u50K99TWPjSxk7z6pFvLdqNhcAgnA6c9eyKkwwd9nDaipaXbC3rkEVs2q4vWY+XFPBwUqcaJyOtuwpuYh+vcTob/+OzhNwMn8o8rrwTOO6/s+15/5J6f73YKyrLgmHnxWmfXvE9e5vXsr4XaPE7kT+G/q1rQpqeP02zcjFF4ynjiiDRSug6F5+ffIFM+oajDwIn847vv3E6BMT//bP8+eve2fx8usmMeJzu26wavBE5P4l4A1k5UavXgEHK/t9PHz679aW3m5uU8r6eP03Gk4WmMciJZFGu8+FSQPIMT4BJFg507nd2fR4cj17tdPdv36uAQXvEE7sNS5GEzmlu2Tat/f7nfz+kykiDETh8nK2uAwz/juUim2XAievE8JGuxxonIT8aPdzsFJbxYbNH71F3PTc6rBTWv1DgJiMO3aINCheGnI6/r3qh6UpvQwtH9WUl6rO7D4y6mpJSZc4YFUCLyIgZORH4yZQpw8KDbqQAABDzYnMHuPk5eDJ68EjjZwanhyFthA6bjbozE07bvzy7SY/UU7lZczoo8/DAmmt6GXhyenBwTdm9jXiMpNtVz04kTbqeA/Ojrr91OQQgvjaqnd74nvU31vCga5nFya/tB36EV7kEr5QUsTIYTv5fdx20/qhlaTy1oC023N883iiImH/7py8+kyIMPYSNhjZObfpSf7JN0iuZH7nbRe8xkLmw+u9aFcHNwiJ2o4/g+7eLFGjhb+OBr2jk4RPg6bsy3xEIoWWrJEmDBAss3y3wa/Rg4uYkFfiJN9NYkWbWcH0TzZcTqUfW8IBoGh7ArcIr0O0fTOUse0L276U3IDVgizcPMs9GJgZOb/PzI3kt4HK13xRXA3ZJ+EjKlcy8edqub9v0PoTdXLxbgoyFw8vI8TvfdB+Tne+93D+fkBLhFiC95fQBVNa9n5fnzOv6medmv0B5PY4Rl+6bYonUeJy/eH8h6DJzI/6Kh5KiHE9930SJgxgz79yNRJHM5cruP09mwbqBW3BitvrlGc/b3QuD05JPWbi8a+jhFmm/JLtL9XI0PsBbtHdkvxYjDhy3ZjNL58DPqW7J9ch8DJ/K/aC452sWCPk5WVzlZUQAL38YkPBhxObV9xqFYcdtk3TFR2s4ZJANw/0muH357J9MorXGyktbvsBc1dG3X7fxDPpCVVfpa471NT41T8FpG/sfAiYj0kwReTg0ZbWS5FzDE1HalgZNVhS/LJ3W1YHNGR0mz02Y0w1SMtX0/Vv0een4HO5q5ujU4hJnj1xfzDaXBjkDoT2Ravk0iij4cjpyIlO3aBdSp40gfJ7kCmNtP+qWBk1W81FTvanyAy7ASb+JmY/u28Ul+C2wuee12PvADO49R+O9sVeC0AH0Nr0tkOUGQv9d5ZILuqOXFDtMRsMbJTWxiZo0jR9xOgbOczDezZzu2Kzua6qktp7b8N7goZB0vNvkxkx3m42qMwtMotqnplVWsOu5Gt+PF3z2cW4NDOCVa5gwjD7v9dvF/g031AH356Cv20/MlBk5EfmN5VU+E7QU/88k8TlYHTpPwUMjfR1HRWMIkrG7r7ubvwMKmN1n9u4RvTxo46dmXHwJQimGvvmpq9fD8racpOPkHAyeiWGe4ysK5wSGM9nFSG+ZabbunUa7k9QZciJF4WlM6lOxCbbyGQaa2EW7xYks35xvFPg7aomkep0j7jbPwGmHm+2kJ2JS2fxBVDO+XKJIdHGnPlxg4EfmNFU31tG4juJzc8sHKKBsLbFZv+03cBAB4Fndp3n4N/I7m2IRfUQe/IQf9Mdfw/jvic5ySBGOkjVzBdw5u1L2daK4hs2tSWiv3ZWUg5RSOhkZWN4PtiFUYiplYijwzySrjE1yJr9HO0m1SWQyciEhZhKZ6F11U5i1zu7KgUKtWoLsFs5GCU/hFx5O+faiBLWhuNmklornw7nVG+yRoWcbtLqteHI7cK7WCAgI878h2AQia8tlqXIrnMdSWpqu7UcvybVIoBk5EfuNkHyclgQCaNbM2GftQ3fQ21G9aAZxBSslfRp6cm7nZLVwYwPXXswCnl9zv6mR/GT/0zTmELPWFdNqCC2Tft6upnhXNdeUEzoVOkSQly28/mRVOpEAtTynnWfF9q/s48eGAMxg4ucntR5TkT1bnG4PbsyoZnbESn6ETeuPjMp9p7YtklJHt6i5ESyLM5s2BSy/VvcuY54fARc/zBzv6OP2BquiDj5CHJYCBfH05lmMH6oW8dw0+kF22EIklr6OlsJZeQf79quziFHtMnKAzMUzzsn64rjnCiyNNRcDAiSgWzZoFHD0K/PgjUOzuyD6r0BmX4TN8D+UqrE9xBQDglKS2SI7WwSGCTqC8xlSacM01oX/zgUnM27bNnu1+jD5YZrDfxEpcjgbYEfKeNECS+gydDe3DDLM1TobX91mhjkzasaPMW3ryzjq01bxsPIo0L0vewcCJKFZlZACNGonBkxEOFihWoAsuxhrkYI+l2zUy8avup4TSQIlBU9TS89O2aWNfOqyk1PzvbBTWOCli4BRb9u41vGqke4PcebIJLQzvi9zDwInIbzxW+Haq4PQ1LsZhlX4cetNSYGDELNPNKzz2+3mB+nwnLLyG+xYXAgB2oo4t2/8NNXEcCu3XHPIIJpS8tv06o3ReMnAiHZTyqdx94wtcirnob/u+yVoMnIjIs/TeCDx542CgZLnr8a5l/QP8Wi7ugwV4AveiC5bbsv3v0LLktV2DNqiZjVts2zbFsKIITeR0XK/Dr0F6J8AFgA9wjeoy5C0MnIj8xq8lPQf4InBiIFWG3iBoHq63bD9W5Rmnf9bfcR7G4AnsQl1btm900mm7uFbjxPM1+rRQaSJn8B6rt6lepPeNuKAZ86oTGDgR+Y2TN3KlG4hHCxN6B4cwwnQfJ48eOyKimPDDD7oWdyLosUKFtNC/a+I3dxKil88eBjNwIiLP8tqNCTAZOJFpMzDS8Lqy80HF2M/zFzJ1Ld+li00J0cDKBx+GryWxlkFIMz012Ep52co8vuu6e0P+3oualm2bSjFwIiJjPPiUyMiEtnoxcCrlRmB7N54C4N85ULZvd3f/K3A5nsIo3I6XIi4X/G3T0iIuZjoPnEW8pm3ZPQGu8srRe/6SvZy+Ph5p3tHR/cWqBLcTQET+5qVaISfSIlcA24vqqIF9Ciuwj5Mafb+buQK02/77X7dTEMDoc8GnpqVtzq6Z+AvHkC7uS+PvY8vvyD5OBIi/t8aHgrvCRrQ00seJ/Ic1TkSk7v33XdmtX0bVO4iqOBLIUF8wygphbgYiB5Dt2L6mYzQA4GNc5dg+Y4XWIc/1ntu/SZopmZoAN8rOWdJPKe8sRR7GYbLDqSG3MXByEy/I5BdPPul2CgxxsmCvuWDG894SE/EI5qOvI/v6ANeiLn5BP8xXXCZWf1Y7H1Yonb9a9ilttltNa4x9440aFyQCgABmYJTkL+UA/Q9UcShNZDcGTkR+w1H1XCVXmIv4RDuKR9Vzs/nJEWTi6giBjBy59Gr9DrtQF8WS/jjRTnpc9P7Ov6IWjiId3bDY8D71fKa2fEpqAGuQq77S22/r2geR1pFc96IGrsV7ms+JfahmOm1kDwZObvJg53oiTTyad8NvYseh0rPdAOVarNJ9v45bLd9vNHOqZtDO/Zw9a9umXaf3uM3F9cjEX1iKbo7uN9J25qE/bsQ7eAZ3KSyk8FAjih50kAYW/t7h96MPcG2Zc0Ipj1+A7y1Lh+d5tDyhhINDuMlnmYUohAP518xT5psxG3+istVJwvFaFwC7taeDyI+CedhoOVKw+Lms9JwyFlAF8C5uRBX8oXM1nssx5ZJLgL6hTYC11oRa+WDmL1SybFtkLdY4uYmBE/mBXD4NGG/Go8UTEOejuB//0rWeNC1v4WZL0xS0t0Zb9JVpIlYcKL2cRmy2V7euLemKJkfPjbJG3mDXMOFm9qvHX3UuVF+Io+pR0EcfaV7USw/JWKR0BgMnIlLmUqFhDJ5ACk5hLdrrWs+pm9iCsEEJwvs4RUxHXp5dyXLMRDwMABiKf1uyPenxqonfkI0DlmxXTkeswn8w2Lbtxxo3BofQYhruQQt8h8kYh403TzORCO8UjMnbjMwX5qXAi7RhUz038fEAeZ0guJZPzyBF9zq+uAlFQUHsn5iIJ3EvTqK85dv+C5mGfnutVqMjDqMy7jg3ASwvw9az4zzUus2V6IwReAab0RxAAJvRAu+kmthxFJyvZB+ztbFemFuO9GHg5CbesckIp2/kPsqnjkyAK9d/XE+NU5SwI2hyQ7EQ/b+VEWbysJWDOsiJlLazSMBmtLBk/0SA9nPB7SCIMb4z2FTPTT4qkJKHeCHfePQK7fmAJXjcMjJcTQaVKip2OwXe5pVT3a5O+KUbZR8n0q9IMkVBHIpRgCTsQm0cRiXsRi0XU0Z2YeDkJi8UgInUyOVTF5vweZWuGqd+/exNDMkKFrg9H2B7jFuDQxB5X2n+j0MxgAAa4GdUxz6cRaJkKfn75SFkqe7hXVxvOpWe5rOyBAMnIr/xWFO9WCs4yR3+sZgac031vOhuTHc7CVHJqSZI4efNvx5T/syqfahijRNpJAZOQBESUIgkTessRR4ex324BW8oLlP5Am2T4fos/vAtBk5uYi4nv/JoYaLYhUvaA3gUn6IXgyUP+BkNIn7O30g7Lxyra662/h6pOwj06LWOnKP1XAgGTvq2EcBYPI43cYviuhU1zs7AIqUzGDi5ibmcvM5nTfKWhM3K7oSjqAhAZ1M9sl0xfwPLRMrPp1DOsv1ECmr0nFMNG5a+vvxyjbEPAyRScEkHbctFCpyckpnpdgqiHwMnN/moQEoxzCf59CH8E1vRpOTv06ed2S/7zbhrDS4GAKzAZSEF7ziZQnjw85NILfMeyVOKJ/6G17Ad52PwuWHdvUR6yZo7N+wzNtUjm8SjSPEzM9cZrWsKAtCpk+HdkEYMnNzkkwIpeczbb7udApHH8u8O1Mc+VC/5OznZnv1oKUcpFs5YCLNcX3yEuzEd/TFP8zq7URuP4gHch8ch8Daoag1yy7w3C39DY2zHdjSybD9lzpsGDYBq1YDGjQ1vMytL46VK6dwcXDpZ8vdoajgdFP2yq9hT4yTomDIhKVF9GTKH8zi5yWMFTyJZPhocYgH6YCrGYOx77RzbZ/BJYnxiHHBGfK9LlwCw/NwCcSyY2+kPVMUM3A0gNC8WIyBb6xT0EB61PW1+Jj2W89AfA1CE9WjjbCISE4Hdu8VzKEH7dcayW+vWrUBSEnDvvWU/S0lxrlqbXKW5j5NgV+Bk7XKe47OE845ORMrOngVWrHA7FZoJiMM4TAWuvVZ12eHDrd13VpXSm+v5zSQjKkkDJ9Y4qWLTOW8ILSwGMAcD8BPOV1nOBomJQHy8+nISxWHlV8OnXaSargMHDG6U/EZr/okP2NTHSWNg4bP4w7cYOLmJuZy87l//Anr1Kvu+BwMAvQXuhHP17SPwtL79KHz1BGnZbvRo9RWohJdqLck8O4LfpUvLvjdsmPyydt1aQ/Jpusahzihm2DU4RPk0bcuxSOkMBk5uYi4nck3w9HsWI5AUbGOnwYABCh9IAyTp0EZsqkc+phb3n7WwxX+kALpr17LvPfecwnaM3Fo1POBgbShFYleNU1YWHyx5Ce/obmLgROQa6emnZbLC+Hhg2TJgyJDQ91ULU9ICGWufZLFA6l+nJCMUBhmtQdSaDwpUzlfeWslKgTht+TnSqHpmfZbZz7ZtR3IQVVzZr5cxcHITr+7kZx7Jv/NwHfahGj5CX13r6U1+RgbQpUuECiSlvkzlJPPcJHLIIzVmmu0pzaX1I8SJfd7DdcYTFmP0/A6fxXexMSWlJmMc3sYAfIvWisuopZvNQsmItbgIALAtwiiSds7j9HD92arL2HFL/hfut36jPsdR9dzkkYInkRluF0Sux1zEoRjF0NeBXM1wPKtpuZKn5Eq1SZUqAVOmiIEV+0XYSlpjIX3dAV+iE1bhY/R2I1lRLz4xABsftpe4H5NVlwlAiHhrdbR2s3174Ouvndsf2aYf5mMo/o0X8Q/FZWwbHALAqXj1jk4Ri5STJ4v3oLFjrUtUjGLg5CYGTkQWCIQETdOmaVtL8fRr1w74/HPMTFZvvqecJEkQxZuVLnYUbA8jCx/iGsu3S/KM/oZWPYQJH1VPEw3NaHWnr1kzBk5RoLgY2IcaqlMYxAn2PD0QNDbxjlikbNIE6NtX973IkQejPisLs6mem4oceERHFGOkA9pFEvFanWQgaFK6uXFwCFV23JzdrgmNFn7slhd+bku/A/MF6VVUrC3PbC2n3ITUlGifx8lnWOPkJkOPxYg8wI+lqTB6bzKqX5mBE0URtwMM6cCUeoWf25rO9Si4ppE91PLP+diOhvgJu8pf6kyCFNgROHHgnrJ4R3cTa5yIfE+2j1N4Uz1ynNsF/1hm9tiPGQOsXRt5mUqVIuzfprIeC5GxqVu3yJ//hPOxEFfaeqlXy9N3Y7rxjTdrpvjRXRZPFB8NeEd3E2ucyM+itV2AzifPqoNDMHAy7RfUBQAUIPKohNICOwu5zjEboq5CRwDAf3AHAGDqVKBBg8jrfPWV/Ptt25R9j5VJZEb37tqWcyqfBUcJDSqHk5iBu41vMMI9qn4DnjzheEd3E2ucKAr49cm+rU31WOOki1qQ0xOLMA/XoR1UqiHIFeHXAL1Ba1csQ138gkW4QtPy48cDDRvKf5aRGbDtmY5fr3Vkjtbf3a0A/TTEKS8M53s+WdCFfZzcxBon8qtDh4Dff3c7FaZYVbhSLSQycDLtJ5yP6zFP1zos5JoTPH5GylTZ2QAOaF/+LBKx61ytomlC5OHIFdlReIzWWvkYo/VndCr+ULq2Gc5ubudTt/evE+/obmKNE/nV++8Dn3zidiq8hTVOhlkV5LB5njvCf7/q1VxKyDl8JumQjAy3U+AIt/OTEBBrUe/AiziEyrgJb8kvZ8flz2dBjRN4R3cTAyciWz33nPJnjt0P5B5DPvQQUK6cQwkgMqdy5cifey1gHTZM/L9nTwd3On582feivQnU4cNup8ARWgOnSD+3FefIS7gDVfAH1qOt6W2FiPZ8ajFPBE4zZ85EnTp1kJKSgvbt22NthOF0XnrpJXTs2BGZmZnIzMxEXl5exOU9ze3HGEQuu/FGe7cf6X6gGDgZvYlIa5bUapzq1QPy84HOnY3ti8hB33/vdgp0CATw4IPAypVixbiT+40KaWmlr+epNI+Nkdp0rTXizoyq53CT0mjJ1xZyPde/++67GD16NCZOnIhvv/0WLVu2RI8ePXDw4EHZ5VeuXIkBAwZgxYoVWLNmDXJyctC9e3f87sf+FmZrnKpWtSYdRCZUqWJsvZQU4JZbrE1LOCtrlZTuHyVPEpV6qyvdTRMUuphu2KAvYVSC/ZqsIz2W2dnalwWAKm7emgQBCQniM4nUVB3rmS0gyl1s/NjMqXr10tcx0hRPTfgzbqX50SM+qFO4NlWvDsyfr5IAToDrKa4HTtOnT8fgwYMxaNAgNG3aFC+88AJSU1Px6quvyi7/1ltvYejQoWjVqhUaN26Ml19+GcXFxVi2bJnDKbeA2Rqn0aOtSQeRCYmJxgscp09bmBCd9N5kWrRQWeCFF8RIcPVqc32cqrncQSRKMIiyhtJ5cuutkj/CDnVz5WlhLGEmxlHNF4MHi/9fdZXxnVBUCS+q1a4tv5yRfLl3L9C3r/715ES8p7FWyTKujqpXUFCA9evXY7ykbXBcXBzy8vKwZs0aTds4efIkCgsLUUlhNrwzZ87gzJkzJX/n5+cDAAoLC1FYWGgi9eYFzpwx9QMUFRUh3rLUEBlT7fROg2sKOHKkCFZfhqTndVFRHKBwlhQVFUPu2VGxIKCosBCQzBn0j38U4f77i1G66dLPgjVOhZUqAa+8Ir555kzJEmeLiyEoXGviBaFMCgoL1GYrIi281u/Gb4IBRkFBMQoLi4CwXFlcLH/+iOvKPxS8HS/hZQzGPXgy4r6V781iGs6eLUJhYbHkHUm6gHPnb6mzZwNQu84IgQDOFhYC06YhcMUVEC67DCgsBAoLZc/HwrD35e7HxTLnt1OEChUQOHZM/3pCaWh59uzZiEct/BhEq4KCsyWvL7hAQEEBIN9kTvmcULoeleZ15SNZXFwMQSjd9tixRcBUuW2dRXFxsWwKzp49C0Hh9xKKixUfJxRedpntv3HhufPMTXriAVcDp0OHDqGoqAjZYe0AsrOzsW3bNk3bGDt2LGrUqIG8vDzZzydPnoxHHnmkzPuLFy9Gqq56fOvV+u47XGhi/e3btqGpZakhMqbDn/81tF5xcTG2bNkIKHR0TUsrwPHjCm0iIli4cGHJ6++/rwtAvqpo9+49AMo+Ojxy5Ag+X7gQQOljwCuu+AQbNkhb0ZV9RCjdb6CwEH3OvV7z9df489wDm3CXHD6MrLD3li9bhh6yS0ev/SitZTuNFBdTQuH27j2IhQu/Rnie/+233wDUAgCcLQoNlHb+8gvatduHtWurh7z/Cm7HPPRHPipG3Kf0XAolpmHHjh1YuHCr5J1Shw4dwpqw9TdsqAHgooj73NqhA34KrhcXB6xaBQBIPXAA3RTSKN33jh07cH7YMnv27JG5wjjjj3r1UPW773Svd+LECQR7OX29di0uibBs+DGIVmu/+QY4N8fYpEkLMHx4VwBpZZY7evQoJkzYhuXLc7B69Xmatl2a15WP5L59+3D06FEAmQCA3Fz5EW03btyAy/bvR02Zz9Z/+y32JybK7iX/2DHFM3Lhrl22/8ZLlyxBgcvNQk+ePKl5WV/P4zRlyhTMmTMHK1euREqK/M12/PjxGC1p0pafn1/SLyo9Pd2ppMoK7N9vav1GjRpZlBIi58XFxaFNm1aKnyckGHvO1atXr5LXv/6q/Lw3JydH9v2MzEz06tULPXoU43//i0OfPsUh2wz3LVqX2e+5R5IAgNxLLoFw8cWy68ZPm1bmvS6XX664r2hVgGRUxiEUIR7FFtWjs6meNTIzq8rm//POKy0YJsSHnmd169XDZ3OyZAeOVAuaAEQ83wCgfv366NVLft6nrMqVy6x/4oSkv1ZVAQjrQn12yRI0vPRSNIyXyXu//KIpjfWuuqrMaBQ552krPNshq3ZtwEDgVF7yQLl9+/YRl1X7naJF27btSl5fdVUvjBsnX3TOyKiIBx9si3vvBcKLl0rXIy3HsHr16kg/VXreKK3TsuWFqP6LfFPvNm3aQFBYL71CBcV9O/Eb5+Xlud5nP1/h4aYcVwOnrKwsxMfH48CB0JnyDhw4gGoq7fyffPJJTJkyBUuXLkWLCJ0PkpOTkZycXOb9xMREJCa6XMlssl1pfIyMaEPRKoDEROVLUMDg+SE9r+XKQUFxCudPXCCAuMREvPsu8PHHQJ8+cUhMLLtsE/yApvgBy9G1zH6l7ckTkpIApWuNzHcMvy49jRHKXyKK/AmVMa/JUcGCXlGRfP5XOn8A8d4UnxKaj88/H/jxR237Vrs3x8fHIzFR/uQOnr+h2yt9PXRoAHg4dJ0EhRYr4StLC7/haUxo2BCYObN0LHREPkZ2i1MafEZFQHrtUtmG62Uoh7Rpk4DkZKBOHfE7K92a4uPFc0Vu3K8ABDz9NDByZOj7Wo5hXHw8AoHSvKS0TkJCgmKeS0hIULwPRbrXhuyrQwfgyy9V06tXYmKi8j3SIXrysqsl76SkJLRp0yZkYIfgQA+5ubmK6z3++OOYNGkSFi1ahLZtLR7P3kmcx4limNrgDHb3V1Xbf8WKwM03l31yGLQNTfABrpX/0MzgEGEJ24H6+tYnAKxxMusgxCfAWpr+aznW27ebTVGpiNcGmQ+lp5TRUUA1Uamhsc1995V9z4oLqMvdGbwiNRU4ckR9WP7gIVc69CPCnoE9+qi2/QuCtsGMbB9Vjw/rAXigqd7o0aNx6623om3btmjXrh1mzJiBEydOYNCgQQCAgQMHombNmpg8eTIAYOrUqZgwYQLefvtt1KlTB/vPNXdLS0tDWlrZNqeeZjZw4kgo5HORLvR23wQcG7pV781Godkx6cPBIYwZW/89TG02GxM/EvsGKwVOnhn6OBAITYxbCZMr3Tp1j64p16vFIEEApkwBdu4ELorcLyxmBAKaLstqgZPUDTcADzxgLlnhGDg5w/XA6YYbbsAff/yBCRMmYP/+/WjVqhUWLVpUMmDE7t27Q6oen3/+eRQUFOC6664L2c7EiRPx8MMPO5l088wOR+6ZOxeRfmo3Fz3Ze8QI4JlnzKWnhBWFHTM1Tlnhw0UQOWd55rXA/GtRnA7gGNC1q/o6vn6E949/RP5cci6rBuPhFy2n7tFKEwsZNXas+D9bxchSnNPPphNBiE9QnUvNEQycAHggcAKA4cOHY/jw4bKfrVy5MuTvXbt22Z8gp/CiRDFMrfmBHTehG28E5swp3b8jjNxs7rkHkBk4grRjUz1ztmwBFi0CBg5UX9bpYx3x3JW5cCheSxYuBLp0MbEzE8vazegF1EvfwWf01DhpWeZBTMLNeBN/3ngfXrwQ+PvfgVGjlJcP/+muugpA+AB84TW0elSrBvzzn8CECcbWjxIMH93EGiciS2gtI3TqVPpaEEJjmveC/ZXk+guYSZCRwEkytx0DAOt17+52CrwreFupVQu44w5tLUfL1MT45d50xRWAzOBRSnx1LloROLE7gC5WH67H8CCaYBvOZmQhJwdYsgS48krl5cNPu48/lvwRrLIqX95coh56yNz6cvxyvTiHgZObWONEMU56vbzsMuXP1Bi5YQkCsGJF6d/XYy6wZw/Qr5/+jUViJHGVOcKcnWbMcDsFZEbEU0rmwhHSzMnOYMBLBUCl7/nCC8Du3c6mxe905hm34k3Z7Pf++2KfteCUGIsW6d9wcOTqW24R/3/+eUPpixaeaKoXs1jjRFQifORbPTcfpVFzW7ZUXqd6dbEGKi0NOH4cEBAHWDXvitYap/BzeMCA0tflywMnTmApIgyVTCG01giwqb55IeMxhB93j9VUdOwIPPII0KQJgD903jf1fBc/3JMzMoCcHKBbN7EKI5wfvoPLlLKEJ68r11wT+vclkaY0VvDVV8CuXedOIOh/sDdgAPDOO/r361Fe/JljB2ucvIkjCbnCzP16zBigXj0gfHyYSy8tu+z8+WJfJ6tHNFKkdUjf2rWBt98u/XvfPky+7WdsQxN70hWFtI6k57FyPVlJoY/ThAlA//4279tPQYfc7MRkip4+TnYwnP369o38eblypUGTEa1bG1/Xgxg4uclsjRPv/vbgcXWFmTJHlSrAjh3AxImRl8vJEe8R77wDBCdLD45LY+kE6XFxwGOPAePGibMmGlGhAg5ncA4noyLVPnnyybCPFQfCDmjYyfzGG9q288gjwOHDOneu93qtd3mtFyZBAOrW1bdtraSTc2ZkaFtH+j0ffFD7vtjHSZUVo+p54tBOmSLeDPXkDyM88WWtw9uHm5RqnCpVEqeXVru7++npll9YOR9GDBsxQowXbr3V+Daszt516sh3rJ00CVi+HJg3z9r94f77gXPzz5G3RNl93FJGzrsilVb/kTq0SzVpIt7+1Jjt326batWAdeus3660eZVcNbocaSafNEn7vliuKCvsgnFumlHFxawaVS9IT+yuS9WqYvOLpCRxGE3ShIGTm5QCp+xssfeymapRKpWRAdx2GzB4sPqyfBRtie7dxfkThw6NvFykeSutLtw+8ID8NhMSgMsv196ijvyPgZM1gkMjN2kuHzjl5wP796sHQ9dfD/zf/5XtjhHumWfEQWRGjJC8qXaRcVqbNqUDzNx1lzXbZDDjKXffLf++3HXlGNIAAAsR2qShdm2rU2Uym1xwgWXpKEPtguuzcpe/Uhtt1JrqWTlDaCzLygJefjl0LGol6en2pycGBLNmu3ZiTY4Xhn/2ZGE5UuRItvFkXvAZQQCmTxeb1tWoJR84VagATRN3DhsGvPUWEB8febm77hJHwkxLk7w5bZq+kcKC/Tm09rswklk++AA4cgRo1Ur/unKqVSt9rfU6oZZupc95HVKllE+DAZX00DbAz+iJT/EmbgYALF4sDvOvp4+trhonabNOp40bJ/++Wl6sUsX6tNiIgZOblGqceFd3z7vvup2CqHPddeJ0KeHCJ8ANvzlYff+28rSaNEkclMpSMlVevBTow1H1zNMzgEIgcK42SWlYSx3bMSwpCejRQ/vyNWoAR48Ca9ea2KmKQACoWFF8/d135rZ14ID4HYOsCpyc2kYMGDpUnMlCrknqQWTjf+gJnLs2desGvPiijc1Np0wR+9q5MYF6jOQXDkfuJrM1TqRNsJSk5XjaWV0dw+68Uyw/HDwILFzodmrMC/altfQUZdDuGF5a5S1YIP+QQ07IMTT5lNvxSg49LQvMJq5FC7G1w6pVxtavWtXaEXiDQRhrnCwTFzaThavXl1q1gF9+cWffMZJ3+NzNTZddBiE4tJeU1rMu/GIqrc6nUsHAyckndTEu/FAnJwOvvRb6NDsQYEu1EMFJBskwDkduTu/e2iuPQs5XkzVOfhGxRtPOC5j0Xm/0PjZ+PNC1K3DVVZHXc/JC7JeOpT65YLhyD33vPfUE+OT4acXAyU29eqH4sceMr19QUPq6WTOLx1OOInInLWuWLJWdDbz+eunfWi7gak3zrL7WRtm1m1REKuQGAsDFFzuYmGjnZlM9B2kNzMswW6I9e1Z5Wykp2rbxr38BS5eq1w6Gb9+qScHDtWhhYOx5isSVwEk672WMPP1k4OS2SHcMtbuJNHBKSvLP3cdpcmOEfvSROJs1WWLfvtChx5Wun3quq17u4+QUP6bZDwIBYPZst1PhbyHTk1Wu7FYyysrKsnZ7kpPQcOBkVHAgi0hN9V58Uf59o4NDhLOr2VcgwAucxVyJW6SdtZRGzYiy35mBk8tkm+ppJQ2cSJlcT/D69YG333Y+LVEq/Lpo5AIuXef7782lxzdi5AmdU/QMDhFl93LHXHopMGRI2ABaY8eKY/obpGXkPVXvvw/k5YlD/UWDiRPFyUmB0AMUfs1QGhjDqsBJroYqIcGa+2eUnIR2X8Ztm8fpwgt1p6WMypWBZ58FXnghbJ4AiSj5nYMYOLlM6N8fv3fogKIZM8p+qKfGScvysUrrcYk0b5Ydky7EoEg/hfSi37Qp8OSTpX+Ht2i9+mpr902xJRDgyHpG5eUBzz8fNipYxYriLNJBOmqgXn0VOP98CxJ2zTXAkiVA9eoWbEzCilLxwIH618nLA8qVE18/9pg4N9RHH4Wm58gRi6JOCS3f9403zLfY8NMF2U9p1eLnn8Vx/a0aLn/4cOAf/4iZfva8dbgtMRHrxoxBsXQSP60nKQMnbbSOqvfKK8qf7dplWXKoVKR7tLQPyvjxoZ998IH+ffH0oCDWOBkX8bi99ZY4AkxwZlwNBg0ynSTvCza500N6oCtXBj78EOjTJ/SiGRzy3EpGOqiSq7p2Fa9pmh8o1q8vziTtlCi72DJw8jMtnUpI/0kbZSe5G5QePEXKpsEaprFj1ZclMiMtjae5URHPy//7P2DuXM2jpTVqZE2abKU1o0Q6MFb2u7LqwmhmOHK1qVTMpoFCqP0kS5YAJ05Y371Pkd48GGW/c2yMIeo3WjNZp06lrwUh6jKnZfS2yWGJ3bAFC4CfftI2YllcXOihbt8eOH1aHLo8XKzO5RirzclSU4GTJ81tQ66/05dflm6f3Hf33W6nQJ86tQH8amBFKy8+cventm3t3acdvJ4+HwkEtA+uaLty5YBTp0r//vRTsWlgFInR27JPRHNNyaxZzu3LT8fF53r3BkaP1rbs//5X9j1p0KT2swVrqKZM0bY/T2YDlSA9VgMno88uTqO09PAFLgEAHEYlAMDUqUBurvgP8Gh+cIGJMR1iTlqa2ylQIB0SOkjtJDJT46T1BI10AfPTCehyWh3b/apVQOPGpZ2KO3bUn6BNm0L/7tkz8j59ODVMjN6Wo5SfLkSdOzu3Lz8dlxjSqZO5yr177gEOHCht2ueGK64Q/w8Wxq0Wq4GTUStwOd7HNXgEE3AT3sIUjEV7fA2g7AAEvCyI8vKc21fDhj6szdd6kQoZn90CZgIbM4Ij+UUSa4GTyxxrBNOxI7B1K3D//cDevaEDvmjVoEHZvndKv3XHjsD69fr34TLelqOF35qXOVkilJvHScvyQUOGWJse0iQ4oFQkVavan45I3noLmDlTHOzKEJU8GatlC7XvLW2lLCUgDtfhfTyMR3AQ2RiPKdiBBtYnMIrovRSbyZOffXYWY8astWRbnqM28mrNmtbsx+4+TloiaSsCp0hpuOUWbdv3CLnD8e23wMqVxqc4u/deU0kyr3p10xNby3r55dLXdevKt833OAZOXmT0biK3npFhUJ3gZOAU3JfRpguWjJVLetWrJzb7mzTJmu3ZUUjLzASGDgWqVDG4ATbVMyQz0+0URA+954WZcntWFtChwz7jG3CD9ADZ8YBS6QfQW+NUqZI16dFC63EwetG1Y7RAh114obmGNY8/bl1aHBf+u0v/vu02Z9NiA96WvcyKPk5aO4A4zY0aJzXBm8Gjjxpbn1Tp/dmnTQMefNCaffvxZ2TgJM/sBMuAP/ODHZjHTAhWJ9SoYXwbe/aE/h2sam/WTH55q2ZDNXMCWBE4BQLK2/HayelQeqZijPhi4EDb43VHee33NImXTC/Tk9n8cgEKcjJdWudxCuraVZxYkCzXv7/YF3TYMOf37clToVatiB+zUGsfT+YHF7h5HBo3dm/flli1ShyC3UhfkKDwJnyHD4v3nwoV5Jf3QinaqqZ6Sk3Boujk1PNz3Y9/4ZXBX4U2ZyPP4W3Zi6xsqqeka1dj+wCApCTj6wa50VRPjfT4SZsKRNFF3G2pqcCWLcBzz4l/e6EM4KqnnxajySVLZD+O1aynpztIjRpic0krLkuxyI08tnr1Wbz+unJfNU+JdJFq2lTs6GjlhFTlynm/qZrW4QXVBocIBIDFi/WtF8WKEY+9Oe2BxES3k2LOuHHi/zfeKP4fZTey2Myd0UpP5ly61Ph+li41PxGKkyeS3sEhKCp58uevWlWcMFShQ3Yslh9eeUW9T7K0LHvtteIAHfHxkdeJ1Ow+lrmRx9q1E0LmbycdtD5tuvJK8X+lmiu5E2DdOvXtJiQA11xT+nekTqhqTfUA+QxYvrx6OjzE7geAdevKvDljhvj/zJn27tyIMWOAjRuB2bPFv6PsYhuDt+Uo5eQEuB07ikNVmuFG4OTW+qTIyRontYK1F8Vi4OT3h61+E4t5TBevXf+1XjRvuEGszfnpJ/nP5b5Xmzbq23333dAnG5E6oeo9djt3Ar/+6r2LgEt54NtvxZ+wXj2ZD0eOBP76S6xu95pAAGjZ0p5R+TwgOr9VtPDaBVvK7N3WjT5OFJPuvBNYswbo08ftlOgXi1nXrlHeODiEPB4HHfzUtjgQALp1U/783nuBOXPsTYPeC5jVc2H5kHRwxAsvVFk4I8POpFgnyi4yMXhb9gEr+zjZlWH9FDhF2UkbTfSUQ555Rvz/ySf17ePf/wY2bPDldBE4dcrtFHiTXL6xYhDSWBSLwbmvKV009QZ1bdqIg1BMnmw6SYqMToAbgyfnrFnAgAHA7be7nRJSwxonL9M7qp6TFxuzpVA/XRidTmtCAnD2rLP79IG77hIHsDI6oaAf/ec/bqfAeZFGKY7ET5UBXuKnS7ErrGrja1UGtTKj2z0IRYwER1b8JAMHenfaTQrFZ01eFLygXHGFsfWcYLbtqp8Gh7AqrVpL/H7sjOOQWAqaAODECbdT4H1GCy08zURRVH61R06OWBVw222lcyy5ye0nBHoyjNHqTOk+PvrI2DasxJPEnCg7fgycvCh4YXzgAXfTYScnTySt7aatSNN55yl/pnUkwijtUEn6FRe7nQLnaTkNjTTVC/+8ShXtaYpmbKqnIhAA3n5bnFtn6lTxvVGjrN1Hdrb2Zd0OnPS4807lz7Q+0MzMtC49NomyuMA5HTq4nQJDeMn0Mj3N4ZSa6nn1jHZy1JwJE7QtZ8UNKVKQprUUzEfhFOPCT8VINY1GB4cgkVdvEZ6Ulwfk5wNPPaV/3UgHesoU7dsJLjtihPIySiPpybHyxAh/6PfQQ87u3yVR8BX00fuFw/P+jz8Cr73m2w5dDJy8yO0mZU5ITS19emc3pXkstNJzXGfNUv5M68UmhmqcYu6GoxOPj+iPP4A334y8zKOPhv79yiv2pSeasMZJJ6P3k0gns54TvXNn4NgxcfJsJQ0aaN+eXnJpDT7sCy8IJyQAzZvr34efyjIqgrFjTPdfCv89GzYE/vY33z4k5iUz2nm55DVmjNspCKV0sdZzEa9dW/kzBk5lGLmnxpJYbKonR8uAEeGtp3x6T7ZMSoq25aKojBo70tKs25YVZYStW8XR+Z54Qvv2tTbV80IZRiWNrVsrfzZypHh4Xn3V4jS5KcYvGgycvMhIplRqqseSl7Mi/XZaf4tJk8QmITGgXTtg/nxg0ya3U+JNXigzeJH0uARfBwKRuxjGmpwc4KWXxIe7kcR4GcgdubnAs88CP/zgdkr0k8swDRsC48bpC+iiZMS9zEzg739X/jwQABo3jvEHOT76PbVg4BRN5DKnkZLX8OHm0+Il0pKV3SLtIykpco0UAHz+OXDHHdamyeP69mXNk5JYDJzCa5eC08wonVrSZTt1Ev9PSwMuukh9X9OmGUujl33xBdCjB7Bggdhy6vHHIy/PpnouSEoS77NNmridEvsZuYj17y/+f8EF1qbFBsOHx2BQFBzUQesXZ+BErmnRovT1zTeX/dyqwClS2+lYpOekj7Rsr17AypXAgw8CQ4fKL1O/vvi/lt9t4kTt6SJf0trcKprddZf2ZWfOFCtsN24EmjYFvvwy8vJXXmkqaZaQNuGxYi7SDh2ARYvEp9xaRFmZxh/CD7oVT0i8+pTFSLrOPx84cECcuZy8p2ZNYM8e4K+/3E6JKxg4+Yn0Yhve81np4mSkqV6kR5AjR5a+vu468f8hQ0rfs3tCPTvpGRJWj6++EoOlJ58UR92bNAmoVk1+WT2lmGuv1bbczp3at0me8sILbqdAP7O1h1q6Gipd7jIyxFMt+Pzh4osj78sL0/IMGlT6OhBwvgaINU4xzu6A6+qr5d9Xu9dVrSqOvuuFgJBPF8o67zzzA2/5FC+ZfpWUVPY9q2qcIpkxo/T1G28AixeH1lDNnGnt/pykNJS42Ytm8+ZisCRt/23FQBRal421WWOjiNZaAy/ROl1ZJHKXrauvFh9Eh/cnMHOJ89p9XxDMfR8jA5UycHKB0oMzr3r77dLXbdvqW3fCBPlqYwYjsSPKfmteMr3IaKHarsEhXntN/D/8Yl+uHNCtm3wQ54S779a2nBeeWIVTKq0E37cyzSwZ+VaU3W80CQTkv3e5csC2bdYOM27l4GR2693b2Hrhl5IvvhD78QfFYh5znfQBJODuPUrLvgcMAH7/Hfj5Z3HUET2SkoBnnhGbqe/ZYyiJXubF4gXZiyUqPzE62p5ZAwcCS5YAmzeb35aVevbUt7zc8XPrkb4VNU5aMXDyLT8WaiOlWctc1FlZ6iMYy42qZ4ST83BrpXT86tY1vq5UVpb+daRq1NC3PJ1z443i/y1b2tcs3E41apS2gTWic+fQYS+1ZjxGJv7nxxtZBCxReZFSJgv2KZJrUqY0HLkVF524OHF47PA7rtus+G5XXaW+jJPBjB1N9Rg4+ZYffzqlbHnRRcAjj6iv36WLvlNb67JWlr+Co/fZwcxvrvYdn31WbO4obYigdX/z5wMjRoT2ySId/vUvYO5cYNmysp8FRykjb4qygr/jouz4+fC2HMPuuw/48EPgm2/kP4+1eZz0lpiMnrzh62Vk6KvtktuvUlrkSjELFphr4uDH0jcB8Of9Rml8GC0j1zZuLH5nq0a70zKghBHdulm3LSlBUD5di4rMl6+DM01Ij4XWPNa3r9idNYbm57ZWcrI4zLZcn9MmTcQR5Pbtcz5dVpOO/ms1t7oFRODHazSZwxKVnyQmAv366av5iebASS+rSk56AxG5/Uq30axZ6Wu5q3Dv3uZm9mTg5Ft+/Omef978NoYNi/y5kVO5qEj+/R499G/LTpECp8GDI6+rtRB3ySXq+yOHtWplbtCI9u3F/5OT9a3nVlM4I031rBh5hpwXZdElL5nRQuniF+kJjd/vmFov+FaPKqe2Xy3jMUuPvbS5IJvqkYRf7jfSroJ16gDXX192GS2na3CZSpVK31Or4dB6GVAKnN59N3TQMKmOHbVt20pKp+vZs+rraj0WffoAH3wA7NgRg5N3Rqs33gBGjxYnMXOT1kwYvLj55SKngF2wNPD5bxyOJSovMpLJ2rSRX69OHXGy1aZNy362fbvy9vQOvKDV1KnA4cP2bFtJsCQid3y0zDCq9/dYvTp06C+59YNPB8M/l+v9blaUXbRiiV9+utGjQ/82m30rVwbmzQM++kj/A3QlSoFTxYrioGFyli61Zt96mAmc5MgFRoGAOLx7vXrivNxNmwK33mps++QRVasC06b5Zw4DO+51dvHLhZgcwcDJ7zZvBsaMEXv9Sk/uHj3EAjwgzq0kNyV9gwbK2y1f3lh6Lrww8ue1a4c+TrabWm3LPfeIQecTTygvo/eimZ4eWosklwZpWxnpjYO1QyThZHbw2tgv110n1ozIMTKqnlLgFIkb5SUzgZNcenv2BFq3Bu64Q36dlBRgyxbg9dc1J5HIH/r2dTsFFIVYSvO7Zs3EWpxKlUofGXbqBCxaFFo4V6J0YdFbYtixQwzU5Gq2pIw8XbrmGuPbUit5ZmQA69YB996rPT1a9lu1KvDcc8BLL+l7ZG60qV5mJlBYiMLdu7WvT57nZME9Pd25fVkpGgKnWrXE//v2jRw4GUlPUhKwfj3w4ovKy/CBOllG7z3ezsw3fz7w8sv61rniCluSEtOi7ALDwMmLjGayhg2Bv/4CVqyIvM0PPyx9PX68NWmoV09boGbE++/Lv29F4KSF3LG45Rbx/wsuUF5v2DDg9tvVty83xJWRADMhwX8z0lNETtY4mbm35eaG/i2XfZWy9NGj6stYQW2cnGnTyr4XCIhzd4bXfFldDti2Ddi9W3zupNTnyEjgR6TKT4NDRDJnTujfEyeK/+sZP79BA3MDMVFMYODkJcFHvt27G99GRoZ6aatfv9LX7dsDn39edhkzJYMmTZQ/c/rJgx37EwSxU8TatcBXX1mzvSCjJWU/tBMn3Zw8Xczsq1kzseL299/1r+tUTZda4FGhQtn3AgHgrrvEvlZ2KlcOyMkRX0eqcWrXzt50EPmREAgAN9xQ+kbr1sDDD4uv9V7Y5NoHR1mNieOi7PgxcPKSLVvEauUHH7R+25FqIi69VBzq3CpmZhe3mtzAC1Zt96KLgLQ067YZ3K6SpUuBO++UX1YpcPr6a2vSRa7wS+AEiF0Fa9QQX9sdx1s5HLlT+9dKKXC64QbxmdQ335ib1o0ohNWZWe+oesF2qmaEX7wuu8zYdgTBuknkqNT557udAksxcPKSnBzgttusG0ZK6qKLxAEQlJq9hY/564dBCtxqqmdnqSlS6bVrV2DGDO3bio/nI2qf88NpaCUjp5bTfZyMlsm0UvrNg0O8t22r3JrIS8+siCIKZvT69cW+SF98Ib+c0gkuHQozeKLOnSt2FJwwwXi6oqx2xBPathXnfFizxu2UWCLGbss+tGKFWFs0f775bd17r/JAC+E1TmYuHpFKMk43KfPbJCXBm8mQIeL/l14q/7kWZo61dHTEN980vh0yxcl7uN1Bmp55nKzeLmBsOG+54z9mjP7tAMCIEdqWk7tkbdigLS9cfbW+NBE57qmnxDLNs8+Wvte3L9Chg/q60pGA//Of0tfBk6N/f7GsVLFi2c/MiHCRseM5d1QaMAC4+GK3U2EJBk5ed9llwN699g+rqTbLpFFq80GtWwfcfbf8Z40aKQeMX32lrcRkZVO9Ll3E/5XG9TVKbnCI668Xm26GTySj9B309MZX07272H/ruuvE0t5NNwEtWxrbFpmiNqCBHKMzCdSubWw9OX5sqmfH3NNSWssMrVuXfa9VK23r8mE5ed6oUWKZplEj/evOnSvel776KrSZvFUZX+nCIvNUacoUsXmy1gciFD0YOPmBE3fDtm3t2efChWIVrdJ227QBpk+XX3fbNuWAsX17bSUR6QXPaGkumOaPPgI+/RR47DFj29Gzr0BAHLEv/HGW9PipVREY/b5NmoiB9Lx5wNNPi+9VrWpsW2SKkecZkQZ6lHPFFWJFtN5ReyOxO3CSZsfhw7Wtc/nl1uzb6KWxSxex8ja8EjncSy8Z235mprH1KMa50cfJ6ElUt654X5JOIG9me+Hk0n7ffbKLjh0rPvf16zQOZBwDJxK98YY48MDGjeLfVl2IAoHSxvlWq1IFOHAg8jJW9nFKSxNr0JKSzG9TSq7GSUlcHDBwINC7tzj8vNw2zJLryPnSS+olPrJccLAFPaZOFf9Xmmkg3Pz5YtdHu0fhDWbR994zv63p08U5vl97TXz2EsnBg+I8RmrLqZ16AweKFa9duhg73QIBsfJWbY7wqlVLBwTT48kn9a9D5Hl2BmJEBtjUPot8Jzsb+Pe/S/+2so+TtNG+2kUwPl5sUzNwoLZ9qdWESAMnP1xctaRx1qyy7+ktyVWsGDqBDiBOYLx0qXxTxNq1xWHrGzcGtm/Xty9yVKtWQEGB2G1x8uTS93v3BlJSxAe2QZUqmXsOULWqfNOySNnx2muVP9OajbOzxTm+tahSRfynRtotQs6sWWL6jF5G9KxnNDAj0s3qGqcXXwQ6dgy9+JjhZOAk18GQJxaFYeBE8ty6WLz6KlC5cml/IrNibVgyMy65xL5JjMkxgYD87AK1agHPPScGS3/9VbqsGb//7r/xV5RoGVhB2pJWL6sDp7/9Dfj5Z/F5B8BO6uQRl15a+uTGKVaVV1heIA0YOJE8uwInte2WK2ftPApW1DjZHURa8cRP7zaM7pMT7fpWMBufOhX5cz3sGlPGDXYHgMHja9Up9Npr4v///CewapXYZ55It+rVrd+mk0EToH7xGjxYbLO7eLHyBRBg4ESaRNFtjywVLdXTFSqY30Y0BU633SYO/lCrln19z8g2GRnAkSPmtnH6dOlru7K20UEe/R6XDxkidhNNTQWWLw/9LHisMzLUt6PnOJiZsoYIt90mZtpu3dxOiXFqF7Lg0OVqQ47KBU7RUhYiyzC8JnlmLhZmgpUWLdSX0ZM2aYeO8JF4rBIcFfCpp+zZvhVmzxbT+fLLwD33iPNdGGFkFlGyxIABoSPwKpEWuitVKn0dPG28MJVGsCVuvXqh75cr53xapLReIpQCm9tvF+d4lOt6GTz+CoN0ado+keUSE4EXXojc+dBNl1wilinCR/61Q7S0OyZbMXAieWYCp2nTgIsukh/EQGkwh19+Ab780tjcDpFI5x+qUQP49Vf9j+zVjsXddwOHDonzUxghHR3PKLWS1s03K8+XpYfcLKJul3ZjhFKzuK+/Dv27oKD09ebNZZeXjmqnt6mddGaBSNSy45w5wCOPACtXin+/+6546r/zjr70WG3hQmPTtJ1/vhjURhoKPljTl56uPoRxdrb+NBBFpdRU8f4afqGTsqqJHZvqkQbMJSSvZk1z665dGzoy3ty5wMSJ4oS+curWBXJztW1fS1CXmwvs2VP2/Vq11IfPMqJyZePrVqkizlkll16tnHpELRc4vfKKM/uOcUoPQ9u1E8sWQdJJc6XDmQdPm5o1xfmEsrOBDz/UlwYtNV6AenasUkVsYpaTI/59/fXiKdC8ub70WK1SJeD//k99ufBL0A8/iGW7lJSyy+bliQ/Nq1XTng6r59gm8rWkpMhBDQeHIAexjxPJGzsW2LXLuh7H/fsbbx5mRJcu9k9MYyWzNW1W9OXSQq6pnpHJhkg3uXv6pk3i/+XLAydPiq+VCujSssVNN4kBgt7yhpnnA35pfmaktU58fOh60tNk8WLxfz3HOilJvPRaMecVUdTTenLl5IROp/Hoo2I1cXBITbmLrNzTEIppDJxIXlqa2C/Gi7RcJK0c1cfLnUM/+UTsNPHGG/rXDQT0l2blapz8UiL2ubi40AL5r7+KFahAaC2TkvBsbCRba60UluOXFp25ueI0NJFa0KrVHgWDWMD45WPGDHEgsGHDjK1PRGHmzwdGjhRHaMnPF/v7SgMjucDJiibuFFVYL0nRycysnuG8HDhdeaXYTshIx9nffxf7oZ1/vtgvTQu5GicvH58oEhcXWiCXTuqqJXAyo25dAR9/rP2nlsbSb7whVqj6pUVnfLw4vHek9N56a+RjceKE+XTUrAl89hkHvyRSpbWJXePGwP/+J3aq/PTTsrVJwWrjL74Qn6B88409TfvJ1xg4kf/JdbyI1Etbq6uuAjIzgd69zW8rErcCj+rVxX5o27cDo0drW0c6uMfAgeIQbZw01xHx8cCkSaV/S+/5WgInM5WwH3xwFlddJb4O1nJFGla7c+fS17fcIvZfsnrcFzclJADPP6/8ebArp7TvmVSDBpYniSjmFJ+7NxePGGHNBoOBU4cO4mBVTozkR77DwIn85/zzQ/9esUJ8OtSsWel7VgQ7CxaIbWXUhsDymNOZmeIL6YiCVvngA6BTJ3GSmlmzxLGXo2kWVA+LiwPuuguYMkVsoSmNt8eMEf+Xq5345z/FZmfBZbQIn9JFuq/Fi8Wh0VevVl5/5EjgpZeAn3/Wvs9oMnYs8OKLYmWwnHnzxDLZ3LnOposomhS9/TZWTZ2K4nHjrNkg+zORBgycyH8WLAj9u21b8emQtAOGFbU4gYAzQYHFNU6rH3sMRUOHlj1OVmjaVGw/dPnl1m+bZF10kfj/3/4m/j92rNhCU2rcOGDdOvluiQ89BPz4I5CVpb6vH38UpyP76CPlgQkaNRKHJY9UqZuYKM5pVL+++j6jUUqKODJe7dryn9erJ7YCcnK8HKKok5yMvxo1Mj//0osviiflv/9tTbooqvFRMfnLoEHK7VyU5ojyqptvFodtv+IKSzd7okYNFN9+O+KtHCCDXLN6tVjxGWmQyLg4oE0b8/tq2LB0OrJ+/cxvj4jI8+64g3MAkGYMnMhfIk0kM3as2F/HL72pZ88We9FzcAWKICnJnZH1mS2JiIhCMXAif3jhBeC118RZM5VUqCB2HvATlk7JozjKfGTB5pKRhi0nIqLowsCJ/OEf/xD/EZEjpDG9X+ZgctJ55wGHDzs39zQREbmPgRMREZURFwc8+mgRNmzYgbp167qdHE+qVMntFBARkZM8MarezJkzUadOHaSkpKB9+/ZYu3ZtxOXnzZuHxo0bIyUlBc2bN8fChQsdSikRUewYM6YYt9yy1e1kEBEReYLrgdO7776L0aNHY+LEifj222/RsmVL9OjRAwcPHpRd/ssvv8SAAQNw2223YcOGDejXrx/69euHLVu2OJxyIiIiIiKKFa4HTtOnT8fgwYMxaNAgNG3aFC+88AJSU1Px6quvyi7/9NNPo2fPnrjvvvvQpEkTTJo0Ca1bt8Zzzz3ncMqJiIiIiChWuNrHqaCgAOvXr8f48eNL3ouLi0NeXh7WrFkju86aNWswevTokPd69OiB+fPnyy5/5swZnDlzpuTv/Px8AEBhYSEKCwtNfgPzgmnwQlrI/5ifyErMT2Ql5ieyEvMTWUVPHnI1cDp06BCKioqQnZ0d8n52dja2bdsmu87+/ftll9+/f7/s8pMnT8YjjzxS5v3FixcjNTXVYMqtt2TJEreTQFGE+YmsxPxEVmJ+IisxP5FZJ0+e1Lxs1I+qN378+JAaqvz8fOTk5KB79+5IT093MWWiwsJCLFmyBN26dUNiYqLbySGfY34iKzE/kZWYn8hKzE9klWBrNC1cDZyysrIQHx+PAwcOhLx/4MABVKtWTXadatWq6Vo+OTkZycnJZd5PTEz01InmtfSQvzE/kZWYn8hKzE9kJeYnMktP/nF1cIikpCS0adMGy5YtK3mvuLgYy5YtQ25uruw6ubm5IcsDYjWt0vJERERERERmud5Ub/To0bj11lvRtm1btGvXDjNmzMCJEycwaNAgAMDAgQNRs2ZNTJ48GQAwcuRIdO7cGdOmTcOVV16JOXPmYN26dfjPf/7j5tcgIiIiIqIo5nrgdMMNN+CPP/7AhAkTsH//frRq1QqLFi0qGQBi9+7diIsrrRjr0KED3n77bTz44IO4//770bBhQ8yfPx/NmjVz6ysQEREREVGUcz1wAoDhw4dj+PDhsp+tXLmyzHv9+/dH//79bU4VERERERGRyPUJcImIiIiIiLyOgRMREREREZEKBk5EREREREQqGDgRERERERGpYOBERERERESkgoETERERERGRCgZOREREREREKhg4ERERERERqfDEBLhOEgQBAJCfn+9ySkSFhYU4efIk8vPzkZiY6HZyyOeYn8hKzE9kJeYnshLzE1klGBMEY4RIYi5wOnbsGAAgJyfH5ZQQEREREZEXHDt2DBUrVoy4TEDQEl5FkeLiYuzduxcVKlRAIBBwOznIz89HTk4O9uzZg/T0dLeTQz7H/ERWYn4iKzE/kZWYn8gqgiDg2LFjqFGjBuLiIvdiirkap7i4OJx33nluJ6OM9PR0nvhkGeYnshLzE1mJ+YmsxPxEVlCraQri4BBEREREREQqGDgRERERERGpYODksuTkZEycOBHJycluJ4WiAPMTWYn5iazE/ERWYn4iN8Tc4BBERERERER6scaJiIiIiIhIBQMnIiIiIiIiFQyciIiIiIiIVDBwIiIiIiIiUsHAyUUzZ85EnTp1kJKSgvbt22Pt2rVuJ4k8YNWqVejduzdq1KiBQCCA+fPnh3wuCAImTJiA6tWro1y5csjLy8NPP/0Ussyff/6Jm266Cenp6cjIyMBtt92G48ePhyyzadMmdOzYESkpKcjJycHjjz9u91cjh02ePBkXXXQRKlSogKpVq6Jfv37Yvn17yDKnT5/GsGHDULlyZaSlpeHaa6/FgQMHQpbZvXs3rrzySqSmpqJq1aq47777cPbs2ZBlVq5cidatWyM5ORkNGjTA66+/bvfXI4c9//zzaNGiRcmEo7m5ufj0009LPmdeIjOmTJmCQCCAUaNGlbzHPEWeI5Ar5syZIyQlJQmvvvqq8P333wuDBw8WMjIyhAMHDridNHLZwoULhQceeED44IMPBADChx9+GPL5lClThIoVKwrz588XvvvuO6FPnz5C3bp1hVOnTpUs07NnT6Fly5bCV199JXz++edCgwYNhAEDBpR8fvToUSE7O1u46aabhC1btgjvvPOOUK5cOeHFF1906muSA3r06CG89tprwpYtW4SNGzcKvXr1EmrVqiUcP368ZJkhQ4YIOTk5wrJly4R169YJF198sdChQ4eSz8+ePSs0a9ZMyMvLEzZs2CAsXLhQyMrKEsaPH1+yzC+//CKkpqYKo0ePFn744Qfh2WefFeLj44VFixY5+n3JXgsWLBD++9//Cj/++KOwfft24f777xcSExOFLVu2CILAvETGrV27VqhTp47QokULYeTIkSXvM0+R1zBwckm7du2EYcOGlfxdVFQk1KhRQ5g8ebKLqSKvCQ+ciouLhWrVqglPPPFEyXtHjhwRkpOThXfeeUcQBEH44YcfBADCN998U7LMp59+KgQCAeH3338XBEEQ/v3vfwuZmZnCmTNnSpYZO3as0KhRI5u/Ebnp4MGDAgDhs88+EwRBzDuJiYnCvHnzSpbZunWrAEBYs2aNIAhiIB8XFyfs37+/ZJnnn39eSE9PL8k/Y8aMES644IKQfd1www1Cjx497P5K5LLMzEzh5ZdfZl4iw44dOyY0bNhQWLJkidC5c+eSwIl5iryITfVcUFBQgPXr1yMvL6/kvbi4OOTl5WHNmjUupoy8bufOndi/f39I3qlYsSLat29fknfWrFmDjIwMtG3btmSZvLw8xMXF4euvvy5ZplOnTkhKSipZpkePHti+fTv++usvh74NOe3o0aMAgEqVKgEA1q9fj8LCwpD81LhxY9SqVSskPzVv3hzZ2dkly/To0QP5+fn4/vvvS5aRbiO4DK9n0auoqAhz5szBiRMnkJuby7xEhg0bNgxXXnllmd+deYq8KMHtBMSiQ4cOoaioKOREB4Ds7Gxs27bNpVSRH+zfvx8AZPNO8LP9+/ejatWqIZ8nJCSgUqVKIcvUrVu3zDaCn2VmZtqSfnJPcXExRo0ahUsuuQTNmjUDIP7WSUlJyMjICFk2PD/J5bfgZ5GWyc/Px6lTp1CuXDk7vhK5YPPmzcjNzcXp06eRlpaGDz/8EE2bNsXGjRuZl0i3OXPm4Ntvv8U333xT5jNen8iLGDgREcWAYcOGYcuWLVi9erXbSSEfa9SoETZu3IijR4/ivffew6233orPPvvM7WSRD+3ZswcjR47EkiVLkJKS4nZyiDRhUz0XZGVlIT4+vszIMAcOHEC1atVcShX5QTB/RMo71apVw8GDB0M+P3v2LP7888+QZeS2RAsvLgAACHFJREFUId0HRY/hw4fjk08+wYoVK3DeeeeVvF+tWjUUFBTgyJEjIcuH5ye1vKK0THp6Op/mRpmkpCQ0aNAAbdq0weTJk9GyZUs8/fTTzEuk2/r163Hw4EG0bt0aCQkJSEhIwGeffYZnnnkGCQkJyM7OZp4iz2Hg5IKkpCS0adMGy5YtK3mvuLgYy5YtQ25urospI6+rW7cuqlWrFpJ38vPz8fXXX5fkndzcXBw5cgTr168vWWb58uUoLi5G+/btS5ZZtWoVCgsLS5ZZsmQJGjVqxGZ6UUQQBAwfPhwffvghli9fXqZ5Zps2bZCYmBiSn7Zv347du3eH5KfNmzeHBONLlixBeno6mjZtWrKMdBvBZXg9i37FxcU4c+YM8xLp1rVrV2zevBkbN24s+de2bVvcdNNNJa+Zp8hz3B6dIlbNmTNHSE5OFl5//XXhhx9+EO644w4hIyMjZGQYik3Hjh0TNmzYIGzYsEEAIEyfPl3YsGGD8OuvvwqCIA5HnpGRIXz00UfCpk2bhL59+8oOR37hhRcKX3/9tbB69WqhYcOGIcORHzlyRMjOzhZuueUWYcuWLcKcOXOE1NRUDkceZe68806hYsWKwsqVK4V9+/aV/Dt58mTJMkOGDBFq1aolLF++XFi3bp2Qm5sr5ObmlnweHO63e/fuwsaNG4VFixYJVapUkR3u97777hO2bt0qzJw5k8P9RqFx48YJn332mbBz505h06ZNwrhx44RAICAsXrxYEATmJTJPOqqeIDBPkfcwcHLRs88+K9SqVUtISkoS2rVrJ3z11VduJ4k8YMWKFQKAMv9uvfVWQRDEIckfeughITs7W0hOTha6du0qbN++PWQbhw8fFgYMGCCkpaUJ6enpwqBBg4Rjx46FLPPdd98Jl156qZCcnCzUrFlTmDJlilNfkRwil48ACK+99lrJMqdOnRKGDh0qZGZmCqmpqcLVV18t7Nu3L2Q7u3btEq644gqhXLlyQlZWlnDPPfcIhYWFIcusWLFCaNWqlZCUlCTUq1cvZB8UHf7+978LtWvXFpKSkoQqVaoIXbt2LQmaBIF5icwLD5yYp8hrAoIgCO7UdREREREREfkD+zgRERERERGpYOBERERERESkgoETERERERGRCgZOREREREREKhg4ERERERERqWDgREREREREpIKBExERERERkQoGTkRERERERCoYOBERERkQCAQwf/58t5NBREQOYeBERESe9ccff+DOO+9ErVq1kJycjGrVqqFHjx744osv3E4aERHFmAS3E0BERKTk2muvRUFBAWbNmoV69erhwIEDWLZsGQ4fPux20oiIKMawxomIiDzpyJEj+PzzzzF16lRcfvnlqF27Ntq1a4fx48ejT58+AIDp06ejefPmKF++PHJycjB06FAcP368ZBuvv/46MjIy8Mknn6BRo0ZITU3Fddddh5MnT2LWrFmoU6cOMjMzMWLECBQVFZWsV6dOHUyaNAkDBgxA+fLlUbNmTcycOTNievfs2YPrr78eGRkZqFSpEvr27Ytdu3bZcmyIiMh5DJyIiMiT0tLSkJaWhvnz5+PMmTOyy8TFxeGZZ57B999/j1mzZmH58uUYM2ZMyDInT57EM888gzlz5mDRokVYuXIlrr76aixcuBALFy7E7Nmz8eKLL+K9994LWe+JJ55Ay5YtsWHDBowbNw4jR47EkiVLZNNRWFiIHj16oEKFCvj888/xxRdfIC0tDT179kRBQYE1B4SIiFwVEARBcDsRREREct5//30MHjwYp06dQuvWrdG5c2fceOONaNGihezy7733HoYMGYJDhw4BEGucBg0ahJ9//hn169cHAAwZMgSzZ8/GgQMHkJaWBgDo2bMn6tSpgxdeeAGAWOPUpEkTfPrppyXbvvHGG5Gfn4+FCxcCEAeH+PDDD9GvXz+8+eabePTRR7F161YEAgEAQEFBATIyMjB//nx0797dngNERESOYY0TERF51rXXXou9e/diwYIF6NmzJ1auXInWrVvj9ddfBwAsXboUXbt2Rc2aNVGhQgXccsstOHz4ME6ePFmyjdTU1JKgCQCys7NRp06dkqAp+N7BgwdD9p2bm1vm761bt8qm87vvvsPPP/+MChUqlNSUVapUCadPn8aOHTvMHgYiIvIADg5BRESelpKSgm7duqFbt2546KGHcPvtt2PixIm47LLLcNVVV+HOO+/EY489hkqVKmH16tW47bbbUFBQgNTUVABAYmJiyPYCgYDse8XFxYbTePz4cbRp0wZvvfVWmc+qVKlieLtEROQdDJyIiMhXmjZtivnz52P9+vUoLi7GtGnTEBcnNqCYO3euZfv56quvyvzdpEkT2WVbt26Nd999F1WrVkV6erplaSAiIu9gUz0iIvKkw4cPo0uXLnjzzTexadMm7Ny5E/PmzcPjjz+Ovn37okGDBigsLMSzzz6LX375BbNnzy7po2SFL774Ao8//jh+/PFHzJw5E/PmzcPIkSNll73pppuQlZWFvn374vPPP8fOnTuxcuVKjBgxAr/99ptlaSIiIvewxomIiDwpLS0N7du3x1NPPYUdO3agsLAQOTk5GDx4MO6//36UK1cO06dPx9SpUzF+/Hh06tQJkydPxsCBAy3Z/z333IN169bhkUceQXp6OqZPn44ePXrILpuamopVq1Zh7NixuOaaa3Ds2DHUrFkTXbt2ZQ0UEVGU4Kh6REREYerUqYNRo0Zh1KhRbieFiIg8gk31iIiIiIiIVDBwIiIiIiIiUsGmekRERERERCpY40RERERERKSCgRMREREREZEKBk5EREREREQqGDgRERERERGpYOBERERERESkgoETERERERGRCgZOREREREREKhg4ERERERERqfh/KggwoHLn45YAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "log_file_path = \"/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs/log_train_finetuned_info.txt\"\n", + "\n", + "# Lists to hold data\n", + "probabilities = []\n", + "\n", + " for line in file:\n", + " if line.strip().startswith(\"{\"): \n", + " line_dict = eval(line.strip())\n", + " # Extract the probabilities if they exist in the log\n", + " prob_entry = line_dict.get('probabilities')\n", + " if prob_entry:\n", + " # Probabilities expected to be a list of lists\n", + " probabilities.extend(eval(prob_entry))\n", + "\n", + "# Plotting the probabilities\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(probabilities[:, 0], label='Class 0 Probability', color='blue')\n", + "plt.plot(probabilities[:, 1], label='Class 1 Probability', color='red')\n", + "plt.xlabel('Sample')\n", + "plt.ylabel('Probability')\n", + "plt.title('Class Probabilities Over Samples')\n", + "plt.legend(loc='best')\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "05a5968f-6dd6-4249-9e5d-ae12d51896f4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parsed Log Data: {'epoch': 20, 'iter': 0, 'avg_loss': 0.6324796080589294, 'avg_acc': 100.0, 'loss': 0.6324796080589294}\n", + "Epoch: 20, Loss: 0.6324796080589294, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 10, 'avg_loss': 0.8984121463515542, 'avg_acc': 45.45454545454545, 'loss': 0.6594327092170715}\n", + "Epoch: 20, Loss: 0.6594327092170715, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 20, 'avg_loss': 0.9652528280303592, 'avg_acc': 28.57142857142857, 'loss': 1.4224793910980225}\n", + "Epoch: 20, Loss: 1.4224793910980225, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 30, 'avg_loss': 0.9635869822194499, 'avg_acc': 25.806451612903224, 'loss': 0.7135135531425476}\n", + "Epoch: 20, Loss: 0.7135135531425476, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 40, 'avg_loss': 0.972876513876566, 'avg_acc': 24.390243902439025, 'loss': 1.3517900705337524}\n", + "Epoch: 20, Loss: 1.3517900705337524, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 50, 'avg_loss': 1.021922061256334, 'avg_acc': 25.49019607843137, 'loss': 0.6623533964157104}\n", + "Epoch: 20, Loss: 0.6623533964157104, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 60, 'avg_loss': 1.0242969247161364, 'avg_acc': 22.950819672131146, 'loss': 0.7342778444290161}\n", + "Epoch: 20, Loss: 0.7342778444290161, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 70, 'avg_loss': 1.0194281394213018, 'avg_acc': 22.535211267605636, 'loss': 1.4144562482833862}\n", + "Epoch: 20, Loss: 1.4144562482833862, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 80, 'avg_loss': 0.9945832818378637, 'avg_acc': 23.456790123456788, 'loss': 0.9813053607940674}\n", + "Epoch: 20, Loss: 0.9813053607940674, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 90, 'avg_loss': 0.9772412478923798, 'avg_acc': 26.373626373626376, 'loss': 1.0407211780548096}\n", + "Epoch: 20, Loss: 1.0407211780548096, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 100, 'avg_loss': 0.9665699548060351, 'avg_acc': 27.722772277227726, 'loss': 1.2080334424972534}\n", + "Epoch: 20, Loss: 1.2080334424972534, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 110, 'avg_loss': 0.9632461189149736, 'avg_acc': 30.630630630630627, 'loss': 0.6947866678237915}\n", + "Epoch: 20, Loss: 0.6947866678237915, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 120, 'avg_loss': 0.9608795822651919, 'avg_acc': 32.231404958677686, 'loss': 0.40320727229118347}\n", + "Epoch: 20, Loss: 0.40320727229118347, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 130, 'avg_loss': 0.9593410491943359, 'avg_acc': 32.06106870229007, 'loss': 1.3654974699020386}\n", + "Epoch: 20, Loss: 1.3654974699020386, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 140, 'avg_loss': 0.950714221237399, 'avg_acc': 33.33333333333333, 'loss': 0.49264073371887207}\n", + "Epoch: 20, Loss: 0.49264073371887207, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 150, 'avg_loss': 0.9462508770409009, 'avg_acc': 34.437086092715234, 'loss': 0.9697921276092529}\n", + "Epoch: 20, Loss: 0.9697921276092529, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 160, 'avg_loss': 0.9463553169499272, 'avg_acc': 34.161490683229815, 'loss': 1.1782255172729492}\n", + "Epoch: 20, Loss: 1.1782255172729492, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 170, 'avg_loss': 0.9387507883080265, 'avg_acc': 35.67251461988304, 'loss': 1.2314364910125732}\n", + "Epoch: 20, Loss: 1.2314364910125732, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 180, 'avg_loss': 0.9282894720688709, 'avg_acc': 36.46408839779006, 'loss': 0.42901739478111267}\n", + "Epoch: 20, Loss: 0.42901739478111267, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 190, 'avg_loss': 0.928510325117261, 'avg_acc': 36.12565445026178, 'loss': 1.0955886840820312}\n", + "Epoch: 20, Loss: 1.0955886840820312, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 200, 'avg_loss': 0.9155606376887554, 'avg_acc': 38.308457711442784, 'loss': 0.32576441764831543}\n", + "Epoch: 20, Loss: 0.32576441764831543, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 210, 'avg_loss': 0.9126152413151275, 'avg_acc': 38.862559241706165, 'loss': 0.5676601529121399}\n", + "Epoch: 20, Loss: 0.5676601529121399, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 220, 'avg_loss': 0.9158515605856391, 'avg_acc': 38.009049773755656, 'loss': 1.1219587326049805}\n", + "Epoch: 20, Loss: 1.1219587326049805, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 230, 'avg_loss': 0.912027623681795, 'avg_acc': 37.66233766233766, 'loss': 0.45346391201019287}\n", + "Epoch: 20, Loss: 0.45346391201019287, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 240, 'avg_loss': 0.9076244853962506, 'avg_acc': 37.75933609958506, 'loss': 1.6801875829696655}\n", + "Epoch: 20, Loss: 1.6801875829696655, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 250, 'avg_loss': 0.9010671710231865, 'avg_acc': 38.24701195219124, 'loss': 1.5949524641036987}\n", + "Epoch: 20, Loss: 1.5949524641036987, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 260, 'avg_loss': 0.8927359990347391, 'avg_acc': 39.46360153256705, 'loss': 1.2342846393585205}\n", + "Epoch: 20, Loss: 1.2342846393585205, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 270, 'avg_loss': 0.877475259488799, 'avg_acc': 41.32841328413284, 'loss': 0.4685608446598053}\n", + "Epoch: 20, Loss: 0.4685608446598053, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 280, 'avg_loss': 0.8655961502701363, 'avg_acc': 42.34875444839858, 'loss': 0.252122700214386}\n", + "Epoch: 20, Loss: 0.252122700214386, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 290, 'avg_loss': 0.8628690249936277, 'avg_acc': 42.955326460481096, 'loss': 0.4874681234359741}\n", + "Epoch: 20, Loss: 0.4874681234359741, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 300, 'avg_loss': 0.8611048802584905, 'avg_acc': 42.857142857142854, 'loss': 0.774188756942749}\n", + "Epoch: 20, Loss: 0.774188756942749, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 310, 'avg_loss': 0.8558992817279227, 'avg_acc': 42.765273311897104, 'loss': 0.7752485275268555}\n", + "Epoch: 20, Loss: 0.7752485275268555, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 320, 'avg_loss': 0.8563959705309705, 'avg_acc': 42.36760124610592, 'loss': 1.0107471942901611}\n", + "Epoch: 20, Loss: 1.0107471942901611, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 330, 'avg_loss': 0.851796724825107, 'avg_acc': 43.202416918429, 'loss': 0.573390007019043}\n", + "Epoch: 20, Loss: 0.573390007019043, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 340, 'avg_loss': 0.8430047407090838, 'avg_acc': 43.988269794721404, 'loss': 0.2764188051223755}\n", + "Epoch: 20, Loss: 0.2764188051223755, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 350, 'avg_loss': 0.8370788225166479, 'avg_acc': 44.15954415954416, 'loss': 0.7711461782455444}\n", + "Epoch: 20, Loss: 0.7711461782455444, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 360, 'avg_loss': 0.8318019534907513, 'avg_acc': 44.87534626038781, 'loss': 0.2291455715894699}\n", + "Epoch: 20, Loss: 0.2291455715894699, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 370, 'avg_loss': 0.8259480669492018, 'avg_acc': 45.28301886792453, 'loss': 0.8360911011695862}\n", + "Epoch: 20, Loss: 0.8360911011695862, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 380, 'avg_loss': 0.8183476930371733, 'avg_acc': 46.194225721784775, 'loss': 0.1931251883506775}\n", + "Epoch: 20, Loss: 0.1931251883506775, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 390, 'avg_loss': 0.8129235445081121, 'avg_acc': 47.05882352941176, 'loss': 0.40773236751556396}\n", + "Epoch: 20, Loss: 0.40773236751556396, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 400, 'avg_loss': 0.8090237801583331, 'avg_acc': 47.13216957605985, 'loss': 0.7096580862998962}\n", + "Epoch: 20, Loss: 0.7096580862998962, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 410, 'avg_loss': 0.8038147058872701, 'avg_acc': 47.44525547445255, 'loss': 0.8231819868087769}\n", + "Epoch: 20, Loss: 0.8231819868087769, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 420, 'avg_loss': 0.7973752015772455, 'avg_acc': 48.45605700712589, 'loss': 0.928947389125824}\n", + "Epoch: 20, Loss: 0.928947389125824, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 430, 'avg_loss': 0.7922281221488789, 'avg_acc': 48.72389791183295, 'loss': 0.4240599274635315}\n", + "Epoch: 20, Loss: 0.4240599274635315, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 440, 'avg_loss': 0.7909014855179928, 'avg_acc': 48.75283446712018, 'loss': 0.4714360237121582}\n", + "Epoch: 20, Loss: 0.4714360237121582, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 450, 'avg_loss': 0.7859812361702422, 'avg_acc': 49.44567627494457, 'loss': 0.27238792181015015}\n", + "Epoch: 20, Loss: 0.27238792181015015, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 460, 'avg_loss': 0.7806033468815354, 'avg_acc': 50.108459869848154, 'loss': 0.9251547455787659}\n", + "Epoch: 20, Loss: 0.9251547455787659, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 470, 'avg_loss': 0.77501722907691, 'avg_acc': 50.530785562632694, 'loss': 1.1123549938201904}\n", + "Epoch: 20, Loss: 1.1123549938201904, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 480, 'avg_loss': 0.7672348431700728, 'avg_acc': 51.559251559251564, 'loss': 0.36292564868927}\n", + "Epoch: 20, Loss: 0.36292564868927, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 490, 'avg_loss': 0.7614982160616309, 'avg_acc': 52.342158859470466, 'loss': 0.5763269662857056}\n", + "Epoch: 20, Loss: 0.5763269662857056, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 500, 'avg_loss': 0.755586820834887, 'avg_acc': 53.093812375249506, 'loss': 0.43941155076026917}\n", + "Epoch: 20, Loss: 0.43941155076026917, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 510, 'avg_loss': 0.7529963151991017, 'avg_acc': 53.42465753424658, 'loss': 0.5494109392166138}\n", + "Epoch: 20, Loss: 0.5494109392166138, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 520, 'avg_loss': 0.7464623560489024, 'avg_acc': 53.93474088291747, 'loss': 0.9051780104637146}\n", + "Epoch: 20, Loss: 0.9051780104637146, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 530, 'avg_loss': 0.7401949792482984, 'avg_acc': 54.61393596986818, 'loss': 0.13609610497951508}\n", + "Epoch: 20, Loss: 0.13609610497951508, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 540, 'avg_loss': 0.7334553723304417, 'avg_acc': 55.26802218114602, 'loss': 0.8661832809448242}\n", + "Epoch: 20, Loss: 0.8661832809448242, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 550, 'avg_loss': 0.7272329243633578, 'avg_acc': 55.898366606170605, 'loss': 0.4187266230583191}\n", + "Epoch: 20, Loss: 0.4187266230583191, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 560, 'avg_loss': 0.7212331915762336, 'avg_acc': 56.68449197860963, 'loss': 0.38998621702194214}\n", + "Epoch: 20, Loss: 0.38998621702194214, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 570, 'avg_loss': 0.714248325851878, 'avg_acc': 57.26795096322241, 'loss': 0.42771902680397034}\n", + "Epoch: 20, Loss: 0.42771902680397034, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 580, 'avg_loss': 0.709527192832886, 'avg_acc': 57.831325301204814, 'loss': 0.4970065951347351}\n", + "Epoch: 20, Loss: 0.4970065951347351, Accuracy: None\n", + "Parsed Log Data: {'epoch': 20, 'iter': 590, 'avg_loss': 0.7070212593513293, 'avg_acc': 57.868020304568525, 'loss': 0.6174928545951843}\n", + "Epoch: 20, Loss: 0.6174928545951843, Accuracy: None\n", + "Parsed Log Data: {'epoch': 'EP20_train', 'avg_loss': 0.7055494453351037, 'total_acc': 57.983193277310924, 'precisions': 1.0, 'recalls': 0.5798319327731093, 'f1_scores': 0.7340425531914894, 'time_taken_from_start': 5.023622989654541}\n", + "Epoch: EP20_train, Loss: None, Accuracy: None\n", + "Epochs: []\n", + "Loss Values: []\n", + "Accuracy Values: []\n", + "No data to plot. Please check the log file for correct format.\n" + ] + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "log_file_path = \"/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs/log_train_finetuned.txt\"\n", + "\n", + "epochs = []\n", + "loss_values = []\n", + "accuracy_values = []\n", + "\n", + "# Reading the log file\n", + "with open(log_file_path, 'r') as file:\n", + " for line in file:\n", + " if line.strip().startswith(\"{\"): \n", + " try:\n", + " log_data = eval(line.strip()) \n", + "\n", + " print(f\"Parsed Log Data: {log_data}\")\n", + "\n", + " epoch = log_data.get('epoch')\n", + " loss = log_data.get('loss')\n", + " accuracy = log_data.get('accuracy')\n", + "\n", + " print(f\"Epoch: {epoch}, Loss: {loss}, Accuracy: {accuracy}\")\n", + "\n", + " # Append to lists if values are present\n", + " if epoch and loss is not None and accuracy is not None:\n", + " epochs.append(epoch)\n", + " loss_values.append(float(loss))\n", + " accuracy_values.append(float(accuracy))\n", + " except Exception as e:\n", + " print(f\"Error processing line: {e}\")\n", + "\n", + "# Check if data was extracted\n", + "print(f\"Epochs: {epochs}\")\n", + "print(f\"Loss Values: {loss_values}\")\n", + "print(f\"Accuracy Values: {accuracy_values}\")\n", + "\n", + "# Plotting Loss and Accuracy if data is present\n", + "if epochs and loss_values and accuracy_values:\n", + " plt.figure(figsize=(12, 6))\n", + "\n", + " # Subplot for loss\n", + " plt.subplot(1, 2, 1)\n", + " plt.plot(epochs, loss_values, label='Loss', color='blue', marker='o')\n", + " plt.xlabel('Epoch')\n", + " plt.ylabel('Loss')\n", + " plt.title('Training Loss over Epochs')\n", + " plt.grid(True)\n", + "\n", + " # Subplot for accuracy\n", + " plt.subplot(1, 2, 2)\n", + " plt.plot(epochs, accuracy_values, label='Accuracy', color='green', marker='o')\n", + " plt.xlabel('Epoch')\n", + " plt.ylabel('Accuracy')\n", + " plt.title('Training Accuracy over Epochs')\n", + " plt.grid(True)\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "else:\n", + " print(\"No data to plot. Please check the log file for correct format.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "2565d269-a7c7-4979-8848-56a2d2163ca1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parsed Log Data: {'epoch': 20, 'iter': 0, 'avg_loss': 0.6324796080589294, 'avg_acc': 100.0, 'loss': 0.6324796080589294}\n", + "Epoch: 20, Loss: 0.6324796080589294, Average Loss: 0.6324796080589294, Average Accuracy: 100.0\n", + "Parsed Log Data: {'epoch': 20, 'iter': 10, 'avg_loss': 0.8984121463515542, 'avg_acc': 45.45454545454545, 'loss': 0.6594327092170715}\n", + "Epoch: 20, Loss: 0.6594327092170715, Average Loss: 0.8984121463515542, Average Accuracy: 45.45454545454545\n", + "Parsed Log Data: {'epoch': 20, 'iter': 20, 'avg_loss': 0.9652528280303592, 'avg_acc': 28.57142857142857, 'loss': 1.4224793910980225}\n", + "Epoch: 20, Loss: 1.4224793910980225, Average Loss: 0.9652528280303592, Average Accuracy: 28.57142857142857\n", + "Parsed Log Data: {'epoch': 20, 'iter': 30, 'avg_loss': 0.9635869822194499, 'avg_acc': 25.806451612903224, 'loss': 0.7135135531425476}\n", + "Epoch: 20, Loss: 0.7135135531425476, Average Loss: 0.9635869822194499, Average Accuracy: 25.806451612903224\n", + "Parsed Log Data: {'epoch': 20, 'iter': 40, 'avg_loss': 0.972876513876566, 'avg_acc': 24.390243902439025, 'loss': 1.3517900705337524}\n", + "Epoch: 20, Loss: 1.3517900705337524, Average Loss: 0.972876513876566, Average Accuracy: 24.390243902439025\n", + "Parsed Log Data: {'epoch': 20, 'iter': 50, 'avg_loss': 1.021922061256334, 'avg_acc': 25.49019607843137, 'loss': 0.6623533964157104}\n", + "Epoch: 20, Loss: 0.6623533964157104, Average Loss: 1.021922061256334, Average Accuracy: 25.49019607843137\n", + "Parsed Log Data: {'epoch': 20, 'iter': 60, 'avg_loss': 1.0242969247161364, 'avg_acc': 22.950819672131146, 'loss': 0.7342778444290161}\n", + "Epoch: 20, Loss: 0.7342778444290161, Average Loss: 1.0242969247161364, Average Accuracy: 22.950819672131146\n", + "Parsed Log Data: {'epoch': 20, 'iter': 70, 'avg_loss': 1.0194281394213018, 'avg_acc': 22.535211267605636, 'loss': 1.4144562482833862}\n", + "Epoch: 20, Loss: 1.4144562482833862, Average Loss: 1.0194281394213018, Average Accuracy: 22.535211267605636\n", + "Parsed Log Data: {'epoch': 20, 'iter': 80, 'avg_loss': 0.9945832818378637, 'avg_acc': 23.456790123456788, 'loss': 0.9813053607940674}\n", + "Epoch: 20, Loss: 0.9813053607940674, Average Loss: 0.9945832818378637, Average Accuracy: 23.456790123456788\n", + "Parsed Log Data: {'epoch': 20, 'iter': 90, 'avg_loss': 0.9772412478923798, 'avg_acc': 26.373626373626376, 'loss': 1.0407211780548096}\n", + "Epoch: 20, Loss: 1.0407211780548096, Average Loss: 0.9772412478923798, Average Accuracy: 26.373626373626376\n", + "Parsed Log Data: {'epoch': 20, 'iter': 100, 'avg_loss': 0.9665699548060351, 'avg_acc': 27.722772277227726, 'loss': 1.2080334424972534}\n", + "Epoch: 20, Loss: 1.2080334424972534, Average Loss: 0.9665699548060351, Average Accuracy: 27.722772277227726\n", + "Parsed Log Data: {'epoch': 20, 'iter': 110, 'avg_loss': 0.9632461189149736, 'avg_acc': 30.630630630630627, 'loss': 0.6947866678237915}\n", + "Epoch: 20, Loss: 0.6947866678237915, Average Loss: 0.9632461189149736, Average Accuracy: 30.630630630630627\n", + "Parsed Log Data: {'epoch': 20, 'iter': 120, 'avg_loss': 0.9608795822651919, 'avg_acc': 32.231404958677686, 'loss': 0.40320727229118347}\n", + "Epoch: 20, Loss: 0.40320727229118347, Average Loss: 0.9608795822651919, Average Accuracy: 32.231404958677686\n", + "Parsed Log Data: {'epoch': 20, 'iter': 130, 'avg_loss': 0.9593410491943359, 'avg_acc': 32.06106870229007, 'loss': 1.3654974699020386}\n", + "Epoch: 20, Loss: 1.3654974699020386, Average Loss: 0.9593410491943359, Average Accuracy: 32.06106870229007\n", + "Parsed Log Data: {'epoch': 20, 'iter': 140, 'avg_loss': 0.950714221237399, 'avg_acc': 33.33333333333333, 'loss': 0.49264073371887207}\n", + "Epoch: 20, Loss: 0.49264073371887207, Average Loss: 0.950714221237399, Average Accuracy: 33.33333333333333\n", + "Parsed Log Data: {'epoch': 20, 'iter': 150, 'avg_loss': 0.9462508770409009, 'avg_acc': 34.437086092715234, 'loss': 0.9697921276092529}\n", + "Epoch: 20, Loss: 0.9697921276092529, Average Loss: 0.9462508770409009, Average Accuracy: 34.437086092715234\n", + "Parsed Log Data: {'epoch': 20, 'iter': 160, 'avg_loss': 0.9463553169499272, 'avg_acc': 34.161490683229815, 'loss': 1.1782255172729492}\n", + "Epoch: 20, Loss: 1.1782255172729492, Average Loss: 0.9463553169499272, Average Accuracy: 34.161490683229815\n", + "Parsed Log Data: {'epoch': 20, 'iter': 170, 'avg_loss': 0.9387507883080265, 'avg_acc': 35.67251461988304, 'loss': 1.2314364910125732}\n", + "Epoch: 20, Loss: 1.2314364910125732, Average Loss: 0.9387507883080265, Average Accuracy: 35.67251461988304\n", + "Parsed Log Data: {'epoch': 20, 'iter': 180, 'avg_loss': 0.9282894720688709, 'avg_acc': 36.46408839779006, 'loss': 0.42901739478111267}\n", + "Epoch: 20, Loss: 0.42901739478111267, Average Loss: 0.9282894720688709, Average Accuracy: 36.46408839779006\n", + "Parsed Log Data: {'epoch': 20, 'iter': 190, 'avg_loss': 0.928510325117261, 'avg_acc': 36.12565445026178, 'loss': 1.0955886840820312}\n", + "Epoch: 20, Loss: 1.0955886840820312, Average Loss: 0.928510325117261, Average Accuracy: 36.12565445026178\n", + "Parsed Log Data: {'epoch': 20, 'iter': 200, 'avg_loss': 0.9155606376887554, 'avg_acc': 38.308457711442784, 'loss': 0.32576441764831543}\n", + "Epoch: 20, Loss: 0.32576441764831543, Average Loss: 0.9155606376887554, Average Accuracy: 38.308457711442784\n", + "Parsed Log Data: {'epoch': 20, 'iter': 210, 'avg_loss': 0.9126152413151275, 'avg_acc': 38.862559241706165, 'loss': 0.5676601529121399}\n", + "Epoch: 20, Loss: 0.5676601529121399, Average Loss: 0.9126152413151275, Average Accuracy: 38.862559241706165\n", + "Parsed Log Data: {'epoch': 20, 'iter': 220, 'avg_loss': 0.9158515605856391, 'avg_acc': 38.009049773755656, 'loss': 1.1219587326049805}\n", + "Epoch: 20, Loss: 1.1219587326049805, Average Loss: 0.9158515605856391, Average Accuracy: 38.009049773755656\n", + "Parsed Log Data: {'epoch': 20, 'iter': 230, 'avg_loss': 0.912027623681795, 'avg_acc': 37.66233766233766, 'loss': 0.45346391201019287}\n", + "Epoch: 20, Loss: 0.45346391201019287, Average Loss: 0.912027623681795, Average Accuracy: 37.66233766233766\n", + "Parsed Log Data: {'epoch': 20, 'iter': 240, 'avg_loss': 0.9076244853962506, 'avg_acc': 37.75933609958506, 'loss': 1.6801875829696655}\n", + "Epoch: 20, Loss: 1.6801875829696655, Average Loss: 0.9076244853962506, Average Accuracy: 37.75933609958506\n", + "Parsed Log Data: {'epoch': 20, 'iter': 250, 'avg_loss': 0.9010671710231865, 'avg_acc': 38.24701195219124, 'loss': 1.5949524641036987}\n", + "Epoch: 20, Loss: 1.5949524641036987, Average Loss: 0.9010671710231865, Average Accuracy: 38.24701195219124\n", + "Parsed Log Data: {'epoch': 20, 'iter': 260, 'avg_loss': 0.8927359990347391, 'avg_acc': 39.46360153256705, 'loss': 1.2342846393585205}\n", + "Epoch: 20, Loss: 1.2342846393585205, Average Loss: 0.8927359990347391, Average Accuracy: 39.46360153256705\n", + "Parsed Log Data: {'epoch': 20, 'iter': 270, 'avg_loss': 0.877475259488799, 'avg_acc': 41.32841328413284, 'loss': 0.4685608446598053}\n", + "Epoch: 20, Loss: 0.4685608446598053, Average Loss: 0.877475259488799, Average Accuracy: 41.32841328413284\n", + "Parsed Log Data: {'epoch': 20, 'iter': 280, 'avg_loss': 0.8655961502701363, 'avg_acc': 42.34875444839858, 'loss': 0.252122700214386}\n", + "Epoch: 20, Loss: 0.252122700214386, Average Loss: 0.8655961502701363, Average Accuracy: 42.34875444839858\n", + "Parsed Log Data: {'epoch': 20, 'iter': 290, 'avg_loss': 0.8628690249936277, 'avg_acc': 42.955326460481096, 'loss': 0.4874681234359741}\n", + "Epoch: 20, Loss: 0.4874681234359741, Average Loss: 0.8628690249936277, Average Accuracy: 42.955326460481096\n", + "Parsed Log Data: {'epoch': 20, 'iter': 300, 'avg_loss': 0.8611048802584905, 'avg_acc': 42.857142857142854, 'loss': 0.774188756942749}\n", + "Epoch: 20, Loss: 0.774188756942749, Average Loss: 0.8611048802584905, Average Accuracy: 42.857142857142854\n", + "Parsed Log Data: {'epoch': 20, 'iter': 310, 'avg_loss': 0.8558992817279227, 'avg_acc': 42.765273311897104, 'loss': 0.7752485275268555}\n", + "Epoch: 20, Loss: 0.7752485275268555, Average Loss: 0.8558992817279227, Average Accuracy: 42.765273311897104\n", + "Parsed Log Data: {'epoch': 20, 'iter': 320, 'avg_loss': 0.8563959705309705, 'avg_acc': 42.36760124610592, 'loss': 1.0107471942901611}\n", + "Epoch: 20, Loss: 1.0107471942901611, Average Loss: 0.8563959705309705, Average Accuracy: 42.36760124610592\n", + "Parsed Log Data: {'epoch': 20, 'iter': 330, 'avg_loss': 0.851796724825107, 'avg_acc': 43.202416918429, 'loss': 0.573390007019043}\n", + "Epoch: 20, Loss: 0.573390007019043, Average Loss: 0.851796724825107, Average Accuracy: 43.202416918429\n", + "Parsed Log Data: {'epoch': 20, 'iter': 340, 'avg_loss': 0.8430047407090838, 'avg_acc': 43.988269794721404, 'loss': 0.2764188051223755}\n", + "Epoch: 20, Loss: 0.2764188051223755, Average Loss: 0.8430047407090838, Average Accuracy: 43.988269794721404\n", + "Parsed Log Data: {'epoch': 20, 'iter': 350, 'avg_loss': 0.8370788225166479, 'avg_acc': 44.15954415954416, 'loss': 0.7711461782455444}\n", + "Epoch: 20, Loss: 0.7711461782455444, Average Loss: 0.8370788225166479, Average Accuracy: 44.15954415954416\n", + "Parsed Log Data: {'epoch': 20, 'iter': 360, 'avg_loss': 0.8318019534907513, 'avg_acc': 44.87534626038781, 'loss': 0.2291455715894699}\n", + "Epoch: 20, Loss: 0.2291455715894699, Average Loss: 0.8318019534907513, Average Accuracy: 44.87534626038781\n", + "Parsed Log Data: {'epoch': 20, 'iter': 370, 'avg_loss': 0.8259480669492018, 'avg_acc': 45.28301886792453, 'loss': 0.8360911011695862}\n", + "Epoch: 20, Loss: 0.8360911011695862, Average Loss: 0.8259480669492018, Average Accuracy: 45.28301886792453\n", + "Parsed Log Data: {'epoch': 20, 'iter': 380, 'avg_loss': 0.8183476930371733, 'avg_acc': 46.194225721784775, 'loss': 0.1931251883506775}\n", + "Epoch: 20, Loss: 0.1931251883506775, Average Loss: 0.8183476930371733, Average Accuracy: 46.194225721784775\n", + "Parsed Log Data: {'epoch': 20, 'iter': 390, 'avg_loss': 0.8129235445081121, 'avg_acc': 47.05882352941176, 'loss': 0.40773236751556396}\n", + "Epoch: 20, Loss: 0.40773236751556396, Average Loss: 0.8129235445081121, Average Accuracy: 47.05882352941176\n", + "Parsed Log Data: {'epoch': 20, 'iter': 400, 'avg_loss': 0.8090237801583331, 'avg_acc': 47.13216957605985, 'loss': 0.7096580862998962}\n", + "Epoch: 20, Loss: 0.7096580862998962, Average Loss: 0.8090237801583331, Average Accuracy: 47.13216957605985\n", + "Parsed Log Data: {'epoch': 20, 'iter': 410, 'avg_loss': 0.8038147058872701, 'avg_acc': 47.44525547445255, 'loss': 0.8231819868087769}\n", + "Epoch: 20, Loss: 0.8231819868087769, Average Loss: 0.8038147058872701, Average Accuracy: 47.44525547445255\n", + "Parsed Log Data: {'epoch': 20, 'iter': 420, 'avg_loss': 0.7973752015772455, 'avg_acc': 48.45605700712589, 'loss': 0.928947389125824}\n", + "Epoch: 20, Loss: 0.928947389125824, Average Loss: 0.7973752015772455, Average Accuracy: 48.45605700712589\n", + "Parsed Log Data: {'epoch': 20, 'iter': 430, 'avg_loss': 0.7922281221488789, 'avg_acc': 48.72389791183295, 'loss': 0.4240599274635315}\n", + "Epoch: 20, Loss: 0.4240599274635315, Average Loss: 0.7922281221488789, Average Accuracy: 48.72389791183295\n", + "Parsed Log Data: {'epoch': 20, 'iter': 440, 'avg_loss': 0.7909014855179928, 'avg_acc': 48.75283446712018, 'loss': 0.4714360237121582}\n", + "Epoch: 20, Loss: 0.4714360237121582, Average Loss: 0.7909014855179928, Average Accuracy: 48.75283446712018\n", + "Parsed Log Data: {'epoch': 20, 'iter': 450, 'avg_loss': 0.7859812361702422, 'avg_acc': 49.44567627494457, 'loss': 0.27238792181015015}\n", + "Epoch: 20, Loss: 0.27238792181015015, Average Loss: 0.7859812361702422, Average Accuracy: 49.44567627494457\n", + "Parsed Log Data: {'epoch': 20, 'iter': 460, 'avg_loss': 0.7806033468815354, 'avg_acc': 50.108459869848154, 'loss': 0.9251547455787659}\n", + "Epoch: 20, Loss: 0.9251547455787659, Average Loss: 0.7806033468815354, Average Accuracy: 50.108459869848154\n", + "Parsed Log Data: {'epoch': 20, 'iter': 470, 'avg_loss': 0.77501722907691, 'avg_acc': 50.530785562632694, 'loss': 1.1123549938201904}\n", + "Epoch: 20, Loss: 1.1123549938201904, Average Loss: 0.77501722907691, Average Accuracy: 50.530785562632694\n", + "Parsed Log Data: {'epoch': 20, 'iter': 480, 'avg_loss': 0.7672348431700728, 'avg_acc': 51.559251559251564, 'loss': 0.36292564868927}\n", + "Epoch: 20, Loss: 0.36292564868927, Average Loss: 0.7672348431700728, Average Accuracy: 51.559251559251564\n", + "Parsed Log Data: {'epoch': 20, 'iter': 490, 'avg_loss': 0.7614982160616309, 'avg_acc': 52.342158859470466, 'loss': 0.5763269662857056}\n", + "Epoch: 20, Loss: 0.5763269662857056, Average Loss: 0.7614982160616309, Average Accuracy: 52.342158859470466\n", + "Parsed Log Data: {'epoch': 20, 'iter': 500, 'avg_loss': 0.755586820834887, 'avg_acc': 53.093812375249506, 'loss': 0.43941155076026917}\n", + "Epoch: 20, Loss: 0.43941155076026917, Average Loss: 0.755586820834887, Average Accuracy: 53.093812375249506\n", + "Parsed Log Data: {'epoch': 20, 'iter': 510, 'avg_loss': 0.7529963151991017, 'avg_acc': 53.42465753424658, 'loss': 0.5494109392166138}\n", + "Epoch: 20, Loss: 0.5494109392166138, Average Loss: 0.7529963151991017, Average Accuracy: 53.42465753424658\n", + "Parsed Log Data: {'epoch': 20, 'iter': 520, 'avg_loss': 0.7464623560489024, 'avg_acc': 53.93474088291747, 'loss': 0.9051780104637146}\n", + "Epoch: 20, Loss: 0.9051780104637146, Average Loss: 0.7464623560489024, Average Accuracy: 53.93474088291747\n", + "Parsed Log Data: {'epoch': 20, 'iter': 530, 'avg_loss': 0.7401949792482984, 'avg_acc': 54.61393596986818, 'loss': 0.13609610497951508}\n", + "Epoch: 20, Loss: 0.13609610497951508, Average Loss: 0.7401949792482984, Average Accuracy: 54.61393596986818\n", + "Parsed Log Data: {'epoch': 20, 'iter': 540, 'avg_loss': 0.7334553723304417, 'avg_acc': 55.26802218114602, 'loss': 0.8661832809448242}\n", + "Epoch: 20, Loss: 0.8661832809448242, Average Loss: 0.7334553723304417, Average Accuracy: 55.26802218114602\n", + "Parsed Log Data: {'epoch': 20, 'iter': 550, 'avg_loss': 0.7272329243633578, 'avg_acc': 55.898366606170605, 'loss': 0.4187266230583191}\n", + "Epoch: 20, Loss: 0.4187266230583191, Average Loss: 0.7272329243633578, Average Accuracy: 55.898366606170605\n", + "Parsed Log Data: {'epoch': 20, 'iter': 560, 'avg_loss': 0.7212331915762336, 'avg_acc': 56.68449197860963, 'loss': 0.38998621702194214}\n", + "Epoch: 20, Loss: 0.38998621702194214, Average Loss: 0.7212331915762336, Average Accuracy: 56.68449197860963\n", + "Parsed Log Data: {'epoch': 20, 'iter': 570, 'avg_loss': 0.714248325851878, 'avg_acc': 57.26795096322241, 'loss': 0.42771902680397034}\n", + "Epoch: 20, Loss: 0.42771902680397034, Average Loss: 0.714248325851878, Average Accuracy: 57.26795096322241\n", + "Parsed Log Data: {'epoch': 20, 'iter': 580, 'avg_loss': 0.709527192832886, 'avg_acc': 57.831325301204814, 'loss': 0.4970065951347351}\n", + "Epoch: 20, Loss: 0.4970065951347351, Average Loss: 0.709527192832886, Average Accuracy: 57.831325301204814\n", + "Parsed Log Data: {'epoch': 20, 'iter': 590, 'avg_loss': 0.7070212593513293, 'avg_acc': 57.868020304568525, 'loss': 0.6174928545951843}\n", + "Epoch: 20, Loss: 0.6174928545951843, Average Loss: 0.7070212593513293, Average Accuracy: 57.868020304568525\n", + "Parsed Log Data: {'epoch': 'EP20_train', 'avg_loss': 0.7055494453351037, 'total_acc': 57.983193277310924, 'precisions': 1.0, 'recalls': 0.5798319327731093, 'f1_scores': 0.7340425531914894, 'time_taken_from_start': 5.023622989654541}\n", + "Epoch: EP20_train, Loss: None, Average Loss: 0.7055494453351037, Average Accuracy: None\n", + "Epochs: [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]\n", + "Loss Values: [0.6324796080589294, 0.6594327092170715, 1.4224793910980225, 0.7135135531425476, 1.3517900705337524, 0.6623533964157104, 0.7342778444290161, 1.4144562482833862, 0.9813053607940674, 1.0407211780548096, 1.2080334424972534, 0.6947866678237915, 0.40320727229118347, 1.3654974699020386, 0.49264073371887207, 0.9697921276092529, 1.1782255172729492, 1.2314364910125732, 0.42901739478111267, 1.0955886840820312, 0.32576441764831543, 0.5676601529121399, 1.1219587326049805, 0.45346391201019287, 1.6801875829696655, 1.5949524641036987, 1.2342846393585205, 0.4685608446598053, 0.252122700214386, 0.4874681234359741, 0.774188756942749, 0.7752485275268555, 1.0107471942901611, 0.573390007019043, 0.2764188051223755, 0.7711461782455444, 0.2291455715894699, 0.8360911011695862, 0.1931251883506775, 0.40773236751556396, 0.7096580862998962, 0.8231819868087769, 0.928947389125824, 0.4240599274635315, 0.4714360237121582, 0.27238792181015015, 0.9251547455787659, 1.1123549938201904, 0.36292564868927, 0.5763269662857056, 0.43941155076026917, 0.5494109392166138, 0.9051780104637146, 0.13609610497951508, 0.8661832809448242, 0.4187266230583191, 0.38998621702194214, 0.42771902680397034, 0.4970065951347351, 0.6174928545951843]\n", + "Accuracy Values: [100.0, 45.45454545454545, 28.57142857142857, 25.806451612903224, 24.390243902439025, 25.49019607843137, 22.950819672131146, 22.535211267605636, 23.456790123456788, 26.373626373626376, 27.722772277227726, 30.630630630630627, 32.231404958677686, 32.06106870229007, 33.33333333333333, 34.437086092715234, 34.161490683229815, 35.67251461988304, 36.46408839779006, 36.12565445026178, 38.308457711442784, 38.862559241706165, 38.009049773755656, 37.66233766233766, 37.75933609958506, 38.24701195219124, 39.46360153256705, 41.32841328413284, 42.34875444839858, 42.955326460481096, 42.857142857142854, 42.765273311897104, 42.36760124610592, 43.202416918429, 43.988269794721404, 44.15954415954416, 44.87534626038781, 45.28301886792453, 46.194225721784775, 47.05882352941176, 47.13216957605985, 47.44525547445255, 48.45605700712589, 48.72389791183295, 48.75283446712018, 49.44567627494457, 50.108459869848154, 50.530785562632694, 51.559251559251564, 52.342158859470466, 53.093812375249506, 53.42465753424658, 53.93474088291747, 54.61393596986818, 55.26802218114602, 55.898366606170605, 56.68449197860963, 57.26795096322241, 57.831325301204814, 57.868020304568525]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAACZ2klEQVR4nOzdf3zO9f7H8ee1y2zzY2T5MTYmKvpJFJNFGpJEIz9yDtFJJ3RodXxzKqIfSqWp9POE6uRHaTk5CUthRYro9FO/1IxtojQ25ura5/vHdXZxuba5cP3Yrvfjfrvt1j7vz+vz3uu1j+zjtc/n/bFZlmUJAAAAAAAACKKIUCcAAAAAAAAA89CUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCAAAAAABA0NGUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCAAAAAABA0NGUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCwsANN9ygpKSkkzr23nvvlc1m829CQDnmz58vm82mTZs2hToVAEA1x7UPcGp++ukn2Ww2Pfroo6FOBYajKQUEkM1m8+ljzZo1oU41JG644QbVqVMn1GmEjbKmT0UfH330UahTBACEOa59fDd48GDZbDb93//9X6hTQQCUNX0q+njooYdCnSJQJdQIdQJAOHvllVc8tl9++WVlZWV5jbdt2/aUvs4LL7yg0tLSkzr27rvv1p133nlKXx9Vy/Tp09WyZUuv8datW4cgGwCASbj28U1hYaGWLVumpKQkLVy4UA899BB3b4WpYcOG6aqrrvIab9++fQiyAaoemlJAAP3pT3/y2P7oo4+UlZXlNX6s4uJi1apVy+evExkZeVL5SVKNGjVUowZ/FVQXRUVFql27dqUxffr0UceOHYOUEQAAR3Dt45s33nhDTqdTc+fOVY8ePbRu3Tp169YtpDmVx7IsHTp0SDExMaFOpUry5brsoosuOu6ff8BkPL4HhFj37t113nnnafPmzbrssstUq1Yt/eMf/5Ak/fvf/1bfvn3VtGlTRUVFqVWrVrrvvvvkdDo95jh2XYWjnxF//vnn1apVK0VFReniiy/WJ5984nFseesq2Gw2jR8/XkuXLtV5552nqKgonXvuuVqxYoVX/mvWrFHHjh0VHR2tVq1a6bnnnvP7Wg2vv/66OnTooJiYGJ1++un605/+pJ07d3rE5Ofna9SoUUpISFBUVJTi4+PVv39//fTTT+6YTZs2qXfv3jr99NMVExOjli1bavTo0T7l8PTTT+vcc89VVFSUmjZtqnHjxmnfvn3u/ePHj1edOnVUXFzsdeywYcPUpEkTj/P2zjvvKCUlRbVr11bdunXVt29fffnllx7HlT3e+MMPP+iqq65S3bp1NXz4cJ/yrczRfz4ef/xxtWjRQjExMerWrZu++OILr/j33nvPnWv9+vXVv39/ff31115xO3fu1I033uj+89qyZUvdcsstOnz4sEdcSUmJ0tPT1bBhQ9WuXVvXXnutfvnlF4+YUzlXAICqjWsf6dVXX1XPnj11+eWXq23btnr11VfLjfvmm280ePBgNWzYUDExMTr77LN11113ecQc7+dvRbmVPfZ/9LVSUlKSrr76aq1cuVIdO3ZUTEyMnnvuOUnSvHnz1KNHDzVq1EhRUVE655xz9Mwzz5Sb9zvvvKNu3bqpbt26io2N1cUXX6wFCxZIkqZOnarIyEivn/2SNGbMGNWvX1+HDh2q9Pt3vGuTJUuWyGazae3atV7HPvfcc7LZbB7XPN98840GDRqkBg0aKDo6Wh07dtRbb71V7vdr7dq1Gjt2rBo1aqSEhIRK8/RV2fd91apVateunaKjo3XOOecoMzPTK/bHH3/UddddpwYNGqhWrVrq3Lmz3n77ba+4Q4cO6d5779VZZ52l6OhoxcfHKy0tTT/88INX7PH+n/HlOhs4WdweAVQBe/fuVZ8+fTR06FD96U9/UuPGjSW5fvjVqVNH6enpqlOnjt577z1NmTJFhYWFeuSRR44774IFC7R//37dfPPNstlsmjlzptLS0vTjjz8e9zeMH3zwgTIzMzV27FjVrVtXTzzxhAYOHKicnBzFxcVJkrZs2aIrr7xS8fHxmjZtmpxOp6ZPn66GDRue+jflf+bPn69Ro0bp4osv1owZM1RQUKDZs2frww8/1JYtW1S/fn1J0sCBA/Xll1/q1ltvVVJSknbv3q2srCzl5OS4t3v16qWGDRvqzjvvVP369fXTTz+V+8P+WPfee6+mTZum1NRU3XLLLdq2bZueeeYZffLJJ/rwww8VGRmpIUOGaM6cOXr77bd13XXXuY8tLi7WsmXLdMMNN8hut0tyPdowcuRI9e7dWw8//LCKi4v1zDPPqGvXrtqyZYvHRfYff/yh3r17q2vXrnr00Ud9+i3y77//rj179niM2Ww293kr8/LLL2v//v0aN26cDh06pNmzZ6tHjx76/PPP3X8G3333XfXp00dnnHGG7r33Xh08eFBPPvmkLr30Un366afuXHft2qVLLrlE+/bt05gxY9SmTRvt3LlTS5YsUXFxsWrWrOn+urfeeqtOO+00TZ06VT/99JMyMjI0fvx4LV68WJJO6VwBAKoHk699du3apffff18vvfSSJNcvrx5//HE99dRTHj8v//vf/yolJUWRkZEaM2aMkpKS9MMPP2jZsmV64IEH3HP5+vPXV9u2bdOwYcN0880366abbtLZZ58tSXrmmWd07rnn6pprrlGNGjW0bNkyjR07VqWlpRo3bpz7+Pnz52v06NE699xzNXnyZNWvX19btmzRihUrdP311+vPf/6zpk+frsWLF2v8+PHu4w4fPqwlS5Zo4MCBio6OrjA/X65N+vbtqzp16ui1117zugNt8eLFOvfcc3XeeedJkr788ktdeumlatasme68807Vrl1br732mgYMGKA33nhD1157rcfxY8eOVcOGDTVlyhQVFRUd9/tZXFzsdV0mSfXr1/e4Y++7777TkCFD9Ne//lUjR47UvHnzdN1112nFihXq2bOnJKmgoEBdunRRcXGx/va3vykuLk4vvfSSrrnmGi1ZssSdq9Pp1NVXX63Vq1dr6NChmjBhgvbv36+srCx98cUXatWqlfvr+vL/zPGus4FTYgEImnHjxlnH/m/XrVs3S5L17LPPesUXFxd7jd18881WrVq1rEOHDrnHRo4cabVo0cK9vX37dkuSFRcXZ/3666/u8X//+9+WJGvZsmXusalTp3rlJMmqWbOm9f3337vHPvvsM0uS9eSTT7rH+vXrZ9WqVcvauXOne+y7776zatSo4TVneUaOHGnVrl27wv2HDx+2GjVqZJ133nnWwYMH3eP/+c9/LEnWlClTLMuyrN9++82SZD3yyCMVzvXmm29akqxPPvnkuHkdbffu3VbNmjWtXr16WU6n0z3+1FNPWZKsuXPnWpZlWaWlpVazZs2sgQMHehz/2muvWZKsdevWWZZlWfv377fq169v3XTTTR5x+fn5Vr169TzGR44caUmy7rzzTp9ynTdvniWp3I+oqCh3XNmfj5iYGCs3N9c9vnHjRkuSddttt7nH2rVrZzVq1Mjau3eve+yzzz6zIiIirBEjRrjHRowYYUVERJT7/S0tLfXILzU11T1mWZZ12223WXa73dq3b59lWSd/rgAAVQ/XPt4effRRKyYmxiosLLQsy7K+/fZbS5L15ptvesRddtllVt26da2ff/7ZY/zon6G+/Pwtr17LOvJzefv27e6xFi1aWJKsFStWeMWXd2569+5tnXHGGe7tffv2WXXr1rU6derkce12bN7JyclWp06dPPZnZmZakqz333/f6+sczddrk2HDhlmNGjWy/vjjD/dYXl6eFRERYU2fPt09dsUVV1jnn3++x5+v0tJSq0uXLtaZZ57pHiv7fnXt2tVjzoqU/Zms6GPDhg3u2LLv+xtvvOEe+/333634+Hirffv27rGJEydakqzs7Gz32P79+62WLVtaSUlJ7mvVuXPnWpKsWbNmeeVVdh58/X/Gl+ts4FTw+B5QBURFRWnUqFFe40c/v79//37t2bNHKSkpKi4u1jfffHPceYcMGaLTTjvNvZ2SkiLJddvv8aSmpnr8FuWCCy5QbGys+1in06l3331XAwYMUNOmTd1xrVu3Vp8+fY47vy82bdqk3bt3a+zYsR6/Mevbt6/atGnjvlU5JiZGNWvW1Jo1a/Tbb7+VO1fZHVX/+c9/5HA4fM7h3Xff1eHDhzVx4kRFRBz5K/Omm25SbGysOwebzabrrrtOy5cv14EDB9xxixcvVrNmzdS1a1dJUlZWlvbt26dhw4Zpz5497g+73a5OnTrp/fff98rhlltu8TlfSZozZ46ysrI8Pt555x2vuAEDBqhZs2bu7UsuuUSdOnXS8uXLJUl5eXnaunWrbrjhBjVo0MAdd8EFF6hnz57uuNLSUi1dulT9+vUrdy2rYx8ZGDNmjMdYSkqKnE6nfv75Z0knf64AANWHydc+r776qvr27au6detKks4880x16NDB4xG+X375RevWrdPo0aPVvHlzj+PLfoae6M9fX7Vs2VK9e/f2Gj/63JTdld2tWzf9+OOP+v333yW5rnP279+vO++80+tup6PzGTFihDZu3OjxKNmrr76qxMTEStfW8vXaRHL9Wdi9e7fHmx6XLFmi0tJSDRkyRJL066+/6r333tPgwYPdf9727NmjvXv3qnfv3vruu++8loy46aab3He/+2LMmDFe12VZWVk655xzPOKaNm3qcVdWbGysRowYoS1btig/P1+StHz5cl1yySXu60pJqlOnjsaMGaOffvpJX331lSTXmmWnn366br31Vq98jv1zcbz/Z3y5zgZOBU0poApo1qxZubdXf/nll7r22mtVr149xcbGqmHDhu6FEst++Ffm2IuYsh84vvxAOfbYsuPLjt29e7cOHjxY7hvd/PWWt7ImRdlt40dr06aNe39UVJQefvhhvfPOO2rcuLEuu+wyzZw50/0DXJK6deumgQMHatq0aTr99NPVv39/zZs3TyUlJSeVQ82aNXXGGWe490uuH+oHDx50r0Fw4MABLV++XNddd537AuC7776TJPXo0UMNGzb0+Fi1apV2797t8XVq1KhxwusVXHLJJUpNTfX4uPzyy73izjzzTK+xs846y70+QGXf/7Zt22rPnj0qKirSL7/8osLCQvdt8MdzvD+XJ3uuAADVh6nXPl9//bW2bNmiSy+9VN9//737o3v37vrPf/6jwsJCSUcaApX9bD3Rn7++Ku8NvpL04YcfKjU11b2OU8OGDd1rgZWdm7Im0/FyGjJkiKKiotyNuN9//13/+c9/NHz48Eqbab5em0jSlVdeqXr16rmXB5Bcvyxs166dzjrrLEnS999/L8uydM8993hdl02dOlWSvK7NKvr+VOTMM8/0ui5LTU1VbGysR1zr1q29ai/L8+hrs4pqL9svuc7D2Wef7dOC/sf7f8aX62zgVNCUAqqA8t5osm/fPnXr1k2fffaZpk+frmXLlikrK0sPP/ywJPn0GuSKfotjWVZAjw2FiRMn6ttvv9WMGTMUHR2te+65R23bttWWLVskuX4rtGTJEm3YsEHjx4/Xzp07NXr0aHXo0MHjzqZT0blzZyUlJem1116TJC1btkwHDx50/zZOOnLeXnnllXJ/a/bvf//bY86oqCiPO7TCwfH+bAXjXAEAQsvUa59//etfkqTbbrtNZ555pvvjscce06FDh/TGG2/47WuVqajJc+zi8WXKOzc//PCDrrjiCu3Zs0ezZs3S22+/raysLN12222SfDs3RzvttNN09dVXu5tSS5YsUUlJiV/fUhcVFaUBAwbozTff1B9//KGdO3fqww8/LPe67I477ij3uiwrK8ur4RhubyL05c/98a6zgVPBQudAFbVmzRrt3btXmZmZuuyyy9zj27dvD2FWRzRq1EjR0dH6/vvvvfaVN3YyWrRoIcm14GaPHj089m3bts29v0yrVq10++236/bbb9d3332ndu3a6bHHHnNfAEquxlHnzp31wAMPaMGCBRo+fLgWLVqkv/zlL8fN4YwzznCPHz58WNu3b1dqaqpH/ODBgzV79mwVFhZq8eLFSkpKUufOnT1ylFzfv2OPDbayu7aO9u2337oXrDy69mN98803Ov3001W7dm3FxMQoNja23Df3nYoTPVcAgOot3K99LMvSggULdPnll2vs2LFe+++77z69+uqrGjVqlPuao7KfrQ0bNvTp52/ZnS/79u1zPyIvyeNu7+NZtmyZSkpK9NZbb3ncWXPssgNl1zlffPHFce8eGzFihPr3769PPvlEr776qtq3b69zzz230mN8vTYpM2TIEL300ktavXq1vv76a1mW5dGUKvs+R0ZGhvy6rOyuraObiN9++60keVybVVR72X7JdR42btwoh8Nx3AX+feXLdTZwMsLr1+9AGCn7rcXRv6U4fPiwnn766VCl5MFutys1NVVLly7Vrl273OPff/99uesXnYyOHTuqUaNGevbZZz0e3XrnnXf09ddfq2/fvpJcbzU59tXBrVq1Ut26dd3H/fbbb16/6WzXrp0kVfpYWGpqqmrWrKknnnjC4/gXX3xRv//+uzuHMkOGDFFJSYleeuklrVixQoMHD/bY37t3b8XGxurBBx8sd72k8l6PHChLly71WCfh448/1saNG93rYsTHx6tdu3Z66aWXtG/fPnfcF198oVWrVumqq66SJEVERGjAgAFatmyZNm3a5PV1TvQ3zCd7rgAA1Vu4X/t8+OGH+umnnzRq1CgNGjTI62PIkCF6//33tWvXLjVs2FCXXXaZ5s6dq5ycHI95yr4/vv78LWsUrVu3zr2vqKjI/fY/X2s/ek7J9cjdvHnzPOJ69eqlunXrasaMGV7XZsf+bO/Tp49OP/10Pfzww1q7dq1Pd0n5em1SJjU1VQ0aNNDixYu1ePFiXXLJJR6P3zVq1Ejdu3fXc889p7y8PK+vF8zrsl27dunNN990bxcWFurll19Wu3bt1KRJE0nSVVddpY8//lgbNmxwxxUVFen5559XUlKSe52qgQMHas+ePXrqqae8vs6JXpf5cp0NnArulAKqqC5duui0007TyJEj9be//U02m02vvPJKlXp87t5779WqVat06aWX6pZbbpHT6dRTTz2l8847T1u3bvVpDofDofvvv99rvEGDBho7dqwefvhhjRo1St26ddOwYcNUUFCg2bNnKykpyX3L+LfffqsrrrhCgwcP1jnnnKMaNWrozTffVEFBgYYOHSpJeumll/T000/r2muvVatWrbR//3698MILio2N9bqAOVrDhg01efJkTZs2TVdeeaWuueYabdu2TU8//bQuvvhirwuoiy66SK1bt9Zdd92lkpISj9/GSa5FK5955hn9+c9/1kUXXaShQ4eqYcOGysnJ0dtvv61LL7203AuIE/HOO++Uuxhsly5dPO72at26tbp27apbbrlFJSUlysjIUFxcnCZNmuSOeeSRR9SnTx8lJyfrxhtvdL92uV69err33nvdcQ8++KBWrVqlbt26acyYMWrbtq3y8vL0+uuv64MPPvD4zezxnOy5AgBUb+F+7fPqq6/Kbrd7/UKrzDXXXKO77rpLixYtUnp6up544gl17dpVF110kcaMGaOWLVvqp59+0ttvv+3+Wr78/O3Vq5eaN2+uG2+8UX//+99lt9s1d+5c9/WHL3r16qWaNWuqX79+uvnmm3XgwAG98MILatSokUczJzY2Vo8//rj+8pe/6OKLL9b111+v0047TZ999pmKi4s9GmGRkZEaOnSonnrqKdntdg0bNsynXHy9Nin7GmlpaVq0aJGKior06KOPes03Z84cde3aVeeff75uuukmnXHGGSooKNCGDRuUm5urzz77zKe8KvLpp5+WezdRq1atlJyc7N4+66yzdOONN+qTTz5R48aNNXfuXBUUFHg0/u68804tXLhQffr00d/+9jc1aNBAL730krZv36433njDveTDiBEj9PLLLys9PV0ff/yxUlJSVFRUpHfffVdjx45V//79fc7fl+ts4JQE8U1/gPEqei3yueeeW278hx9+aHXu3NmKiYmxmjZtak2aNMlauXKl1+tyK3otcnmvbpVkTZ061b1d0WuRx40b53VsixYtrJEjR3qMrV692mrfvr1Vs2ZNq1WrVtY///lP6/bbb7eio6Mr+C4cMXLkyApfk9uqVSt33OLFi6327dtbUVFRVoMGDazhw4dbubm57v179uyxxo0bZ7Vp08aqXbu2Va9ePatTp07Wa6+95o759NNPrWHDhlnNmze3oqKirEaNGllXX321tWnTpuPmaVmW9dRTT1lt2rSxIiMjrcaNG1u33HKL9dtvv5Ube9ddd1mSrNatW1c43/vvv2/17t3bqlevnhUdHW21atXKuuGGGzzyGTlypFW7dm2f8rOsI68qruhj3rx5lmV5/vl47LHHrMTERCsqKspKSUmxPvvsM6953333XevSSy+1YmJirNjYWKtfv37WV1995RX3888/WyNGjLAaNmxoRUVFWWeccYY1btw4q6SkxCO/Y19b/f7773v8mT7VcwUAqDq49nE5fPiwFRcXZ6WkpFQYY1mW1bJlS6t9+/bu7S+++MK69tprrfr161vR0dHW2Wefbd1zzz0exxzv569lWdbmzZutTp06WTVr1rSaN29uzZo1y/1zefv27R719u3bt9zc3nrrLeuCCy6woqOjraSkJOvhhx+25s6d6zVHWWyXLl3c1w6XXHKJtXDhQq85P/74Y0uS1atXr0q/L8fy9drEsiwrKyvLkmTZbDZrx44d5cb88MMP1ogRI6wmTZpYkZGRVrNmzayrr77aWrJkiTumouuYipT9mazo4+g/V2Xf95UrV1oXXHCBFRUVZbVp08Z6/fXXy8110KBB7j8Tl1xyifWf//zHK664uNi66667rJYtW1qRkZFWkyZNrEGDBlk//PCDR37H+3/Gl+ts4FTYLKsK/eoBQFgYMGCAvvzyy3LXLELo/fTTT2rZsqUeeeQR3XHHHaFOBwCAao9rn5Pz2WefqV27dnr55Zf15z//OdTphExSUpLOO+88/ec//wl1KkDQsaYUgFNy8OBBj+3vvvtOy5cvV/fu3UOTEAAAQABx7eM/L7zwgurUqaO0tLRQpwIgRFhTCsApOeOMM3TDDTfojDPO0M8//6xnnnlGNWvW9FiXCAAAIFxw7XPqli1bpq+++krPP/+8xo8f7/HGPABmoSkF4JRceeWVWrhwofLz8xUVFaXk5GQ9+OCDOvPMM0OdGgAAgN9x7XPqbr31VhUUFOiqq67StGnTQp0OgBBiTSkAAAAAAAAEHWtKAQAAAAAAIOhoSgEAAAAAACDojFtTqrS0VLt27VLdunVls9lCnQ4AAKiCLMvS/v371bRpU0VE8Du8o3EtBQAAKnMi11HGNaV27dqlxMTEUKcBAACqgR07dighISHUaVQpXEsBAABf+HIdZVxTqm7dupJc35zY2Fi/z+9wOLRq1Sr16tVLkZGRfp+/qjGpXpNqlcyq16RaJbPqNalWyax6A11rYWGhEhMT3dcNOIJrKf8xqVbJrHpNqlUyq16TapXMqtekWqXA1nsi11HGNaXKbjOPjY0N2IVUrVq1FBsba8wfZFPqNalWyax6TapVMqtek2qVzKo3WLXyeJo3rqX8x6RaJbPqNalWyax6TapVMqtek2qVglOvL9dRLJIAAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFwGhOp7R2rU3r1jXT2rU2OZ2hzggAAKD6cJY6tfbntVr32zqt/XmtnKVcTAHwHU0pAMbKzJSSkqSePWto1qyO6tmzhpKSXOMAUNWsW7dO/fr1U9OmTWWz2bR06VKP/ZZlacqUKYqPj1dMTIxSU1P13XffecT8+uuvGj58uGJjY1W/fn3deOONOnDgQBCrABBOMr/OVNLsJPV8tadm/TxLPV/tqaTZScr8mospAL6hKQXASJmZ0qBBUm6u5/jOna5xGlMAqpqioiJdeOGFmjNnTrn7Z86cqSeeeELPPvusNm7cqNq1a6t37946dOiQO2b48OH68ssvlZWVpf/85z9at26dxowZE6wSAISRzK8zNei1Qcot9LyY2lm4U4NeG0RjCoBPaEoBMI7TKU2YIFmW976ysYkTxaN8AKqUPn366P7779e1117rtc+yLGVkZOjuu+9W//79dcEFF+jll1/Wrl273HdUff3111qxYoX++c9/qlOnTuratauefPJJLVq0SLt27QpyNQCqM2epUxNWTJAl74upsrGJKybyKB+A46oR6gQAINiys73vkDqaZUk7drjiuncPWloAcNK2b9+u/Px8paamusfq1aunTp06acOGDRo6dKg2bNig+vXrq2PHju6Y1NRURUREaOPGjeU2uySppKREJSUl7u3CwkJJksPhkMPh8HstZXMGYu6qxqRaJbPqDfda1/681usOqaNZsrSjcIfe//F9dWvRLYiZBV64n9tjmVSvSbVKga33ROakKQXAOHl5/o0DgFDLz8+XJDVu3NhjvHHjxu59+fn5atSokcf+GjVqqEGDBu6Y8syYMUPTpk3zGl+1apVq1ap1qqlXKCsrK2BzVzUm1SqZVW+41rrut3U+xb3zwTsq+rIowNmERrie24qYVK9JtUqBqbe4uNjnWJpSAIwTH+/fOAAIZ5MnT1Z6erp7u7CwUImJierVq5diY2P9/vUcDoeysrLUs2dPRUZG+n3+qsSkWiWz6g33Wmv/XFuzfp513Lg+XfuE5Z1S4Xxuj2VSvSbVKgW23rK7qn1BUwqAcVJSpIQE16Lm5a0rZbO59qekBD83ADgZTZo0kSQVFBQo/qiOekFBgdq1a+eO2b17t8dxf/zxh3799Vf38eWJiopSVFSU13hkZGRAL9oDPX9VYlKtkln1hmutl59xuRJiE7SzcGe560rZZFNCbIIuP+Ny2SPsIcgw8ML13FbEpHpNqlUKTL0nMh8LnQMwjt0uzZ7t+txm89xXtp2R4YoDgOqgZcuWatKkiVavXu0eKyws1MaNG5WcnCxJSk5O1r59+7R582Z3zHvvvafS0lJ16tQp6DkDqL7sEXbNvtJ1MWWT58VU2XbGlRlh25AC4D80pQAYKS1NWrJEOvbmgIQE13haWmjyAoCKHDhwQFu3btXWrVsluRY337p1q3JycmSz2TRx4kTdf//9euutt/T5559rxIgRatq0qQYMGCBJatu2ra688krddNNN+vjjj/Xhhx9q/PjxGjp0qJo2bRq6wgBUS2lt07Rk8BI1rev590dCbIKWDF6itLZcTAE4Ph7fA2CstDQpOVkq+7fYa6/9obS0GtwhBaBK2rRpky6//HL3dtk6TyNHjtT8+fM1adIkFRUVacyYMdq3b5+6du2qFStWKDo62n3Mq6++qvHjx+uKK65QRESEBg4cqCeeeCLotQAID2lt05TaMlX1Hq4nSVo2eJn6nN2HO6QA+IymFACjHd2A6tLFoiEFoMrq3r27rPIWwvsfm82m6dOna/r06RXGNGjQQAsWLAhEegAMdXQDqmvzrjSkAJwQHt8DAAAAAABA0NGUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCAAAAAABA0NGUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCAAAAAABA0NGUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCAAAAAABA0NGUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCAAAAAABA0IW0KbVu3Tr169dPTZs2lc1m09KlS497TElJie666y61aNFCUVFRSkpK0ty5cwOfLAAAAAAAAPymRii/eFFRkS688EKNHj1aaWlpPh0zePBgFRQU6MUXX1Tr1q2Vl5en0tLSAGcKIFw5nUc+X7/eprQ0yW4PXT4AAAAAYIqQNqX69OmjPn36+By/YsUKrV27Vj/++KMaNGggSUpKSgpQdgDCXWamNH78ke3Bg2soIUGaPVvysU8OAAAAADhJIW1Knai33npLHTt21MyZM/XKK6+odu3auuaaa3TfffcpJiam3GNKSkpUUlLi3i4sLJQkORwOORwOv+dYNmcg5q6KTKrXpFql8K/3zTdtGjrULsuSJJt7fOdOS4MGSYsWOXXttVbI8gukcD+3RzOpVsmsegNdqwnfQwAAgFCrVk2pH3/8UR988IGio6P15ptvas+ePRo7dqz27t2refPmlXvMjBkzNG3aNK/xVatWqVatWgHLNSsrK2BzV0Um1WtSrVJ41ut0SmPH9pJl2XV0Q0qSLMsmydK4cYdVo0ZWWD/KF47ntiIm1SqZVW+gai0uLg7IvAAAADiiWjWlSktLZbPZ9Oqrr6pevXqSpFmzZmnQoEF6+umny71bavLkyUpPT3dvFxYWKjExUb169VJsbKzfc3Q4HMrKylLPnj0VGRnp9/mrGpPqNalWKbzrXbvWpr17K/vrz6Y9e2opNravunULv7ulwvncHsukWiWz6g10rWV3VgMAACBwqlVTKj4+Xs2aNXM3pCSpbdu2sixLubm5OvPMM72OiYqKUlRUlNd4ZGRkQC/YAz1/VWNSvSbVKoVnvb/84mtcDYVZ6R7C8dxWxKRaJbPqDVStpnz/AAAAQiki1AmciEsvvVS7du3SgQMH3GPffvutIiIilJCQEMLMAFQn8fH+jQMAAAAAnLiQNqUOHDigrVu3auvWrZKk7du3a+vWrcrJyZHkevRuxIgR7vjrr79ecXFxGjVqlL766iutW7dOf//73zV69OgKFzoHgGOlpEgJCZLNVv5+m01KTHTFAQAAAAACI6RNqU2bNql9+/Zq3769JCk9PV3t27fXlClTJEl5eXnuBpUk1alTR1lZWdq3b586duyo4cOHq1+/fnriiSdCkj+A6slul2bPdn1+bGOqbDsjQ2G9yDkAAAAAhFpI15Tq3r27LKviRYTnz5/vNdamTRuj3ioEIDDS0qQlS6Tx46W8vCPjCQmuhlRaWshSAwAAAAAjVKs1pQDAn9LSpM2bj2y/9tof2r6dhhQAAAAABANNKQBGO/oRvS5dLB7ZAwAAAIAgoSkFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAAAAAgKCjKQUAAAAAAICgoykFAAAAAACAoKMpBQAAEEb279+viRMnqkWLFoqJiVGXLl30ySefuPdblqUpU6YoPj5eMTExSk1N1XfffRfCjAEAgKloSgEAAISRv/zlL8rKytIrr7yizz//XL169VJqaqp27twpSZo5c6aeeOIJPfvss9q4caNq166t3r1769ChQyHOHAAAmIamFAAAQJg4ePCg3njjDc2cOVOXXXaZWrdurXvvvVetW7fWM888I8uylJGRobvvvlv9+/fXBRdcoJdfflm7du3S0qVLQ50+AAAwTI1QJwAAAAD/+OOPP+R0OhUdHe0xHhMTow8++EDbt29Xfn6+UlNT3fvq1aunTp06acOGDRo6dKjXnCUlJSopKXFvFxYWSpIcDoccDoffayibMxBzVzUm1SqZVa+JtZZ9Hu41m3RuJbPqNalWKbD1nsicNKUAAADCRN26dZWcnKz77rtPbdu2VePGjbVw4UJt2LBBrVu3Vn5+viSpcePGHsc1btzYve9YM2bM0LRp07zGV61apVq1avm/iP/JysoK2NxVjUm1SmbVa0Kth5xHHv197733FG2PriQ6fJhwbo9mUr0m1SoFpt7i4mKfY2lKAQAAhJFXXnlFo0ePVrNmzWS323XRRRdp2LBh2rx580nNN3nyZKWnp7u3CwsLlZiYqF69eik2NtZfabs5HA5lZWWpZ8+eioyM9Pv8VYlJtUpm1WtSrUWHi6TPXZ/36NFD9WvXD2k+gWbSuZXMqtekWqXA1lt2V7UvaEoBAACEkVatWmnt2rUqKipSYWGh4uPjNWTIEJ1xxhlq0qSJJKmgoEDx8fHuYwoKCtSuXbty54uKilJUVJTXeGRkZEAv2gM9f1ViUq2SWfWaUGukdaQ+E+otY1Ktkln1mlSrFJh6T2Q+FjoHAAAIQ7Vr11Z8fLx+++03rVy5Uv3791fLli3VpEkTrV692h1XWFiojRs3Kjk5OYTZAgAAE3GnFAAAQBhZuXKlLMvS2Wefre+//15///vf1aZNG40aNUo2m00TJ07U/fffrzPPPFMtW7bUPffco6ZNm2rAgAGhTh0AABiGphQAAEAY+f333zV58mTl5uaqQYMGGjhwoB544AH3rfSTJk1SUVGRxowZo3379qlr165asWKF1xv7AAAAAo2mFAAAQBgZPHiwBg8eXOF+m82m6dOna/r06UHMCgAAwBtrSgEAAAAAACDoaEoBAAAAAAAg6GhKAQAAAAAAIOhoSgEAAAAAACDoaEoBAAAAAAAg6GhKAQAAAAAAIOhoSgEwmtN55PP1620e2wAAAACAwKEpBcBYmZlShw5HtgcPrqGkJNc4AAAAACCwaEoBMFJmpjRokJSX5zmem+sapzEFAAAAAIFFUwqAcZxOacIEybLK329Z0sSJ4lE+AAAAAAggmlIAjJOd7bojqjI7drjiAAAAAACBQVMKgHF27vRvHAAAAADgxNGUAmCcX37xbxwAAAAA4MSFtCm1bt069evXT02bNpXNZtPSpUt9PvbDDz9UjRo11K5du4DlByA8NWzo3zgAAAAAwIkLaVOqqKhIF154oebMmXNCx+3bt08jRozQFVdcEaDMAISzZs38GwcAAAAAOHE1QvnF+/Tpoz59+pzwcX/96191/fXXy263n9DdVQAgSSkpUkJC5YudJya64gAAAAAAgRHSptTJmDdvnn788Uf961//0v3333/c+JKSEpWUlLi3CwsLJUkOh0MOh8Pv+ZXNGYi5qyKT6jWpVin8633sMZuGDrXLsiTJ5h632SxJ0qOPOlVaaqm0NDT5BVK4n9ujmVSrZFa9ga7VhO8hAABAqFWrptR3332nO++8U9nZ2apRw7fUZ8yYoWnTpnmNr1q1SrVq1fJ3im5ZWVkBm7sqMqlek2qVwrfeqChp0qR4PfPMhSosjHKPx8Ud1I03fqGoqDwtXx7CBIMgXM9teUyqVTKr3kDVWlxcHJB5AQAAcES1aUo5nU5df/31mjZtms466yyfj5s8ebLS09Pd24WFhUpMTFSvXr0UGxvr9zwdDoeysrLUs2dPRUZG+n3+qsakek2qVTKj3quukrp1s+mqq1zbM2Yc1sSJkbLb20tqH9LcAsmEc1vGpFols+oNdK1ld1YDAAAgcKpNU2r//v3atGmTtmzZovHjx0uSSktLZVmWatSooVWrVqlHjx5ex0VFRSkqKsprPDIyMqAX7IGev6oxqV6TapXCv96j/3q46KIIRUdXm78WT1m4n9ujmVSrZFa9garVlO8fAABAKFWbf33Fxsbq888/9xh7+umn9d5772nJkiVq2bJliDIDAAAAAADAiQppU+rAgQP6/vvv3dvbt2/X1q1b1aBBAzVv3lyTJ0/Wzp079fLLLysiIkLnnXeex/GNGjVSdHS01zgAAAAAAACqtpA2pTZt2qTLL7/cvV229tPIkSM1f/585eXlKScnJ1TpAQAAAAAAIEBC2pTq3r27LNf72Ms1f/78So+/9957de+99/o3KQBGcTqPfP7ZZzZdcYVkt4cuHwAAAAAwRUSoEwCAUMnMlIYPP7I9aZJdSUmucQAAAABAYNGUAmCkzExp0CDpl188x3fudI3TmAIAAACAwKIpBcA4Tqc0YYJU3tPDZWMTJ3o+2gcAAAAA8C+aUgCMk50t5eZWvN+ypB07XHEAAAAAgMCgKQXAOHl5/o0DAAAAAJw4mlIAjBMf7984AAAAAMCJoykFwDhdukh2e+UxdrsrDgAAAAAQGDSlABhn/frjL2LudLriAAAAAACBQVMKgHFYUwoAAAAAQo+mFADjsKYUAAAAAIQeTSkAxklJkRISJJut/P02m5SY6IoDAAAAAAQGTSkAxrHbpdmzy99X1qjKyDj+YugAAAAAgJNHUwqAkdLSpCVLpPr1PccTElzjaWkhSQsAAAAAjEFTCoCx0tKkqVOPbI8d69T27TSkAAAAACAYaEoBMFrEUX8Ltm7NI3sAAAAAECw0pQAAAAAAABB0NKUAAAAAAAAQdDSlAAAAAAAAEHQ0pQAAAAAAABB0NKUAAAAAAAAQdDSlAAAAAAAAEHQ0pQAAAAAAABB0NKUAAAAAAAAQdDSlAAAAAAAAEHQ0pQAAAMKE0+nUPffco5YtWyomJkatWrXSfffdJ8uy3DGWZWnKlCmKj49XTEyMUlNT9d1334UwawAAYCqaUgAAAGHi4Ycf1jPPPKOnnnpKX3/9tR5++GHNnDlTTz75pDtm5syZeuKJJ/Tss89q48aNql27tnr37q1Dhw6FMHMAAGCiGqFOAAAAAP6xfv169e/fX3379pUkJSUlaeHChfr4448lue6SysjI0N13363+/ftLkl5++WU1btxYS5cu1dChQ0OWOwAAMA93SgEAAISJLl26aPXq1fr2228lSZ999pk++OAD9enTR5K0fft25efnKzU11X1MvXr11KlTJ23YsCEkOQMAAHNxpxQAAECYuPPOO1VYWKg2bdrIbrfL6XTqgQce0PDhwyVJ+fn5kqTGjRt7HNe4cWP3vmOVlJSopKTEvV1YWChJcjgccjgcfq+hbM5AzF3VmFSrZFa9JtZa9nm412zSuZXMqtekWqXA1nsic9KUAgAACBOvvfaaXn31VS1YsEDnnnuutm7dqokTJ6pp06YaOXLkSc05Y8YMTZs2zWt81apVqlWr1qmmXKGsrKyAzV3VmFSrZFa9JtR6yHlkPbr33ntP0fboEGYTPCac26OZVK9JtUqBqbe4uNjnWJpSAAAAYeLvf/+77rzzTvfaUOeff75+/vlnzZgxQyNHjlSTJk0kSQUFBYqPj3cfV1BQoHbt2pU75+TJk5Wenu7eLiwsVGJionr16qXY2Fi/1+BwOJSVlaWePXsqMjLS7/NXJSbVKplVr0m1Fh0ukj53fd6jRw/Vr10/pPkEmknnVjKrXpNqlQJbb9ld1b6gKQUAABAmiouLFRHhuWSo3W5XaWmpJKlly5Zq0qSJVq9e7W5CFRYWauPGjbrlllvKnTMqKkpRUVFe45GRkQG9aA/0/FWJSbVKZtVrQq2R1pH6TKi3jEm1SmbVa1KtUmDqPZH5aEoBAACEiX79+umBBx5Q8+bNde6552rLli2aNWuWRo8eLUmy2WyaOHGi7r//fp155plq2bKl7rnnHjVt2lQDBgwIbfIAAMA4NKUAAADCxJNPPql77rlHY8eO1e7du9W0aVPdfPPNmjJlijtm0qRJKioq0pgxY7Rv3z517dpVK1asUHS0GevAAACAqoOmFAAAQJioW7euMjIylJGRUWGMzWbT9OnTNX369OAlBgAAUI6I44cAAAAAAAAA/kVTCgAAAAAAAEFHUwoAAAAAAABBR1MKgNH+95Z0SdL330tOZ+hyAQAAAACT0JQCYKzMTOnee49sP/20XUlJrnEAAAAAQGDRlAJgpMxMaeBA6fffPcdzc13jNKYAAAAAILBoSgEwjtMpjRlTecyYMTzKBwAAAACBRFMKgHHWrJH27q08Zu9eVxwAAAAAIDBoSgEwjq/NJppSAAAAABA4NKUAAAAAAAAQdDSlABine3f/xgEAAAAAThxNKQDG6d5diourPCYujqYUAAAAAAQSTSkAxrHbpdGjK48ZPdoVBwAAAAAIjJA2pdatW6d+/fqpadOmstlsWrp0aaXxmZmZ6tmzpxo2bKjY2FglJydr5cqVwUkWQNhwOqWFCyuPWbTIFQcAAAAACIyQNqWKiop04YUXas6cOT7Fr1u3Tj179tTy5cu1efNmXX755erXr5+2bNkS4EwBhJPsbCk3t/KYHTtccQAAAACAwKgRyi/ep08f9enTx+f4jIwMj+0HH3xQ//73v7Vs2TK1b9/ez9kBCFd5ef6NAwAAAACcuGq9plRpaan279+vBg0ahDoVANVIfLx/4wAAAAAAJy6kd0qdqkcffVQHDhzQ4MGDK4wpKSlRSUmJe7uwsFCS5HA45HA4/J5T2ZyBmLsqMqlek2qVwrvezp2lZs1qaNcuybJsXvttNkvNmkmdO/+hMCw/rM/tsUyqVTKr3kDXasL3EAAAINSqbVNqwYIFmjZtmv7973+rUaNGFcbNmDFD06ZN8xpftWqVatWqFbD8srKyAjZ3VWRSvSbVKoVvvX/6U7wefvhiSZakoxtTlixLGj78E61cGd7P74XruS2PSbVKZtUbqFqLi4sDMi8AAACOqJZNqUWLFukvf/mLXn/9daWmplYaO3nyZKWnp7u3CwsLlZiYqF69eik2NtbvuTkcDmVlZalnz56KjIz0+/xVjUn1mlSrFP71XnWVdNFFTo0aZdfR//Zs1kyaNcupa69tLyk816oL93N7NJNqlcyqN9C1lt1ZDQAAgMCpdk2phQsXavTo0Vq0aJH69u173PioqChFRUV5jUdGRgb0gj3Q81c1JtVrUq1SeNdbo4ZkO+bpPZvNpho1aihMS/YQzuf2WCbVKplVb6BqNeX7BwAAEEohXej8wIED2rp1q7Zu3SpJ2r59u7Zu3aqcnBxJrrucRowY4Y5fsGCBRowYoccee0ydOnVSfn6+8vPz9fvvv4cifQDVWGamNGiQVFTkOb5zp2s8MzM0eQEAAACAKULalNq0aZPat2+v9u1dj8ikp6erffv2mjJliiQpLy/P3aCSpOeff15//PGHxo0bp/j4ePfHhAkTQpI/gOrJ6ZQmTJAsy3tf2djEia44AAAAAEBghPTxve7du8sq71+F/zN//nyP7TVr1gQ2IQBGyM6WcnMr3m9Z0o4drrju3YOWFgAAAAAYJaR3SgFAKOT5+FI9X+MAAAAAACeOphQA48TH+zcOAAAAAHDiaEoBME5KipSQ4P3mvTI2m5SY6IoDAAAAAAQGTSkAxrHbpdmzy99X1qjKyHDFAQAAAAACg6YUACOlpUlLlki1a3uOJyS4xtPSQpMXAAAAAJiCphQAY6WlSUOHHtm+6qpSbd9OQwoAAAAAgoGmFACjRRz1t2B8PI/sAQAAAECw0JQCAAAAAABA0NGUAgAAAAAAQNDRlAIAAAAAAEDQ0ZQCAAAAAABA0NGUAgAAAAAAQNDRlAJgtNLSI5/n5UlOZ+hyAQAAAACT0JQCYKzMTGnRoiPby5dHKCnJNQ4AAAAACCyaUgCMlJkpDRokFRV5ju/c6RqnMQUAAAAAgUVTCoBxnE5pwgTJsrz3lY1NnMijfAAAAAAQSDSlABgnO1vKza14v2VJO3a44gAAAAAAgUFTCoBx8vL8GwcAAAAAOHE0pQAYp1Ej/8YBAAAAAE4cTSkAAAAAAAAEHU0pAMbZvdu/cQAAAACAE0dTCoBx4uP9GwcAAAAAOHE0pQAYJyVFiourPCYuzhUHAAAAAAgMmlIAAAAAAAAIOppSAIyTnS3t3Vt5zN69rjgAqE6SkpJks9m8PsaNGydJOnTokMaNG6e4uDjVqVNHAwcOVEFBQYizBgAApqIpBcA4eXn+jQOAquKTTz5RXl6e+yMrK0uSdN1110mSbrvtNi1btkyvv/661q5dq127diktLS2UKQMAAIPVCHUCABBsLHQOIFw1bNjQY/uhhx5Sq1at1K1bN/3+++968cUXtWDBAvXo0UOSNG/ePLVt21YfffSROnfuHIqUAQCAwbhTCoBxUlKkhATJZit/v80mJSay0DmA6u3w4cP617/+pdGjR8tms2nz5s1yOBxKTU11x7Rp00bNmzfXhg0bQpgpAAAwFXdKATCO3S7Nni0NGuS9r6xRlZHhigOA6mrp0qXat2+fbrjhBklSfn6+atasqfr163vENW7cWPn5+RXOU1JSopKSEvd2YWGhJMnhcMjhcPg977I5AzF3VWNSrZJZ9ZpYa9nn4V6zSedWMqtek2qVAlvvicxJUwqAkdLSpCVLpMGDJafzyHhCgqshxRIrAKq7F198UX369FHTpk1PaZ4ZM2Zo2rRpXuOrVq1SrVq1TmnuypSth2UCk2qVzKrXhFoPOQ+5P3/vvfcUbY8OYTbBY8K5PZpJ9ZpUqxSYeouLi32OpSkFwFhpaVKLFtKPP7q2mza1tH27jTukAFR7P//8s959911lZma6x5o0aaLDhw9r3759HndLFRQUqEmTJhXONXnyZKWnp7u3CwsLlZiYqF69eik2NtbvuTscDmVlZalnz56KjIz0+/xViUm1SmbVa1KtRYeLpM9dn/fo0UP1a9cPaT6BZtK5lcyq16RapcDWW3ZXtS9oSgEw2tHrSsXE8MgegPAwb948NWrUSH379nWPdejQQZGRkVq9erUGDhwoSdq2bZtycnKUnJxc4VxRUVGKioryGo+MjAzoRXug569KTKpVMqteE2qNtI7UZ0K9ZUyqVTKrXpNqlQJT74nMR1MKAAAgjJSWlmrevHkaOXKkatQ4cqlXr1493XjjjUpPT1eDBg0UGxurW2+9VcnJybx5DwAAhARNKQAAgDDy7rvvKicnR6NHj/ba9/jjjysiIkIDBw5USUmJevfuraeffjoEWQIAANCUAgAACCu9evWSZVnl7ouOjtacOXM0Z86cIGcFAADgLSLUCQAAAAAAAMA83CkFwGhH30xw8KDkdLLYOQD/KC0t1dq1a5Wdna2ff/5ZxcXFatiwodq3b6/U1FQlJiaGOkUAAICQ4k4pAMbKzJR+/vnI9q5dNiUlucYB4GQdPHhQ999/vxITE3XVVVfpnXfe0b59+2S32/X9999r6tSpatmypa666ip99NFHoU4XAAAgZLhTCoCRMjOlQYM875SSpJ07XeNLlkhpaaHJDUD1dtZZZyk5OVkvvPCCevbsWe5rkX/++WctWLBAQ4cO1V133aWbbropBJkCAACEFk0pAMZxOqUJE7wbUpJrzGaTJk6U+vfnUT4AJ27VqlVq27ZtpTEtWrTQ5MmTdccddygnJydImQEAAFQtPL4HwDjZ2VJubsX7LUvascMVBwAn6ngNqaNFRkaqVatWAcwGAACg6uJOKQDGycvzbxwAHM8ff/yh5557TmvWrJHT6dSll16qcePGKTo6OtSpAQAAhAxNKQDGiY/3bxwAHM/f/vY3ffvtt0pLS5PD4dDLL7+sTZs2aeHChaFODQAAIGRoSgEwTkqKlJDgWtS8vHWlbDbX/pSU4OcGIDy8+eabuvbaa93bq1at0rZt22T/30J1vXv3VufOnUOVHgAAQJXAmlIAjGO3S7Nnl7/PZnP9NyODRc4BnLy5c+dqwIAB2rVrlyTpoosu0l//+letWLFCy5Yt06RJk3TxxReHOEsAAIDQoikFwEhpadKSJVLEMX8LNmvmGk9LC01eAMLDsmXLNGzYMHXv3l1PPvmknn/+ecXGxuquu+7SPffco8TERC1YsCDUaQIAAIQUj+8BMFrZnVEA4G9DhgxR7969NWnSJPXu3VvPPvusHnvssVCnBQAAUGVwpxQAI2VmSoMGSU6n5/jOna7xzMzQ5AUgvNSvX1/PP/+8HnnkEY0YMUJ///vfdejQoVCnBQAAUCXQlAJgHKdTmjCh/EXOy8YmTvRuWAGAr3JycjR48GCdf/75Gj58uM4880xt3rxZtWrV0oUXXqh33nkn1CkCAACEXEibUuvWrVO/fv3UtGlT2Ww2LV269LjHrFmzRhdddJGioqLUunVrzZ8/P+B5Aggv2dlSbm7F+y1L2rHDFQcAJ2PEiBGKiIjQI488okaNGunmm29WzZo1NW3aNC1dulQzZszQ4MGDQ50mAABASIW0KVVUVKQLL7xQc+bM8Sl++/bt6tu3ry6//HJt3bpVEydO1F/+8hetXLkywJkCCCd5ef6NA4Bjbdq0SQ888ICuvPJKzZo1S//973/d+9q2bat169YpNTU1hBkCAACEXkgXOu/Tp4/69Onjc/yzzz6rli1buhcJbdu2rT744AM9/vjj6t27d6DSBBBm4uP9GwcAx+rQoYOmTJmikSNH6t1339X555/vFTNmzJgQZAYAAFB1VKu3723YsMHrt4q9e/fWxIkTKzympKREJSUl7u3CwkJJksPhkMPh8HuOZXMGYu6qyKR6TapVCu96O3eWmjWroV27JMvyfv2ezWapWTOpc+c/FIblh/W5PZZJtUpm1RvoWk913pdfflm33367brvtNrVr107PPfecnzIDAAAIH9WqKZWfn6/GjRt7jDVu3FiFhYU6ePCgYmJivI6ZMWOGpk2b5jW+atUq1apVK2C5ZmVlBWzuqsikek2qVQrfev/0p3g9/PDFkixJRzemLFmWNHz4J1q5Mryf3wvXc1sek2qVzKo3ULUWFxef0vEtWrTQkiVL/JQNAABAeKpWTamTMXnyZKWnp7u3CwsLlZiYqF69eik2NtbvX8/hcCgrK0s9e/ZUZGSk3+evakyq16RapfCv96qrpIsucmroULvHW/gSEqTHHnPq2mvbS2ofsvwCKdzP7dFMqlUyq95A11p2Z/XJKCoqUu3atQMWDwAAEC6qVVOqSZMmKigo8BgrKChQbGxsuXdJSVJUVJSioqK8xiMjIwN6wR7o+asak+o1qVYpvOutUUMeDSnJ9ThfjRo1FKYlewjnc3ssk2qVzKo3ULWeypytW7fWhAkTNHLkSMVXsDidZVl69913NWvWLF122WWaPHnySX89AACA6qpaNaWSk5O1fPlyj7GsrCwlJyeHKCMA1VVmpjRokPf4rl2u8SVLpLS04OcFoPpbs2aN/vGPf+jee+/VhRdeqI4dO6pp06aKjo7Wb7/9pq+++kobNmxQjRo1NHnyZN18882hThkAACAkQtqUOnDggL7//nv39vbt27V161Y1aNBAzZs31+TJk7Vz5069/PLLkqS//vWveuqppzRp0iSNHj1a7733nl577TW9/fbboSoBQDXkdEoTJnjfJSW5xmw2aeJEqX9/yW4PenoAqrmzzz5bb7zxhnJycvT6668rOztb69ev18GDB3X66aerffv2euGFF9SnTx/Z+UsGAAAYLKRNqU2bNunyyy93b5et/TRy5EjNnz9feXl5ysnJce9v2bKl3n77bd12222aPXu2EhIS9M9//lO9e/cOeu4Aqq/sbCk3t+L9liXt2OGK6949aGkBCDPNmzfX7bffrttvvz3UqQAAAFRJIW1Kde/eXVZ5tyr8z/z588s9ZsuWLQHMCkC4y/PxpXq+xgEAAAAATlxEqBMAgGBr1Mi/cQAAAACAE0dTCgAAAAAAAEFHUwqAcXbv9m8cAAAAAODE0ZQCYJz4eP/GAQAAAABOHE0pAMZJSZESEiSbrfz9NpuUmOiKA4BTkZSUpOnTp3u8TRgAAAAuNKUAGMdul2bPLn9fWaMqI8MVBwCnYuLEicrMzNQZZ5yhnj17atGiRSopKQl1WgAAAFUCTSkARkpLk5Ys8W48JSS4xtPSQpMXgPAyceJEbd26VR9//LHatm2rW2+9VfHx8Ro/frw+/fTTUKcHAAAQUjSlABgrLU1q0eLIdtOmlrZvpyEFwP8uuugiPfHEE9q1a5emTp2qf/7zn7r44ovVrl07zZ07V5ZlhTpFAACAoKsR6gQAIJSOXlcqJoZH9gAEhsPh0Jtvvql58+YpKytLnTt31o033qjc3Fz94x//0LvvvqsFCxaEOk0AAICgOqmm1I4dO2Sz2ZSQkCBJ+vjjj7VgwQKdc845GjNmjF8TBIBAOvrmhIMHJaeTxhQA//n00081b948LVy4UBERERoxYoQef/xxtWnTxh1z7bXX6uKLLw5hlgAAAKFxUo/vXX/99Xr//fclSfn5+erZs6c+/vhj3XXXXZo+fbpfEwSAQMnMlH766cj2rl02tWjhGgcAf7j44ov13Xff6ZlnntHOnTv16KOPejSkJKlly5YaOnRoiDIEAAAInZNqSn3xxRe65JJLJEmvvfaazjvvPK1fv16vvvqq5s+f78/8ACAgMjOlgQOl0lLP8Z07XeM0pgD4w48//qgVK1bouuuuU2RkZLkxtWvX1rx584KcGQAAQOidVFPK4XAoKipKkvTuu+/qmmuukSS1adNGeXl5/ssOAALA6ZSO96TxmDGuOAA4Fbt379bGjRu9xjdu3KhNmzaFICMAAICq46SaUueee66effZZZWdnKysrS1deeaUkadeuXYqLi/NrggDgb2vWSHv3Vh6zd68rDgBOxbhx47Rjxw6v8Z07d2rcuHEhyAgAAKDqOKmm1MMPP6znnntO3bt317Bhw3ThhRdKkt566y33Y30AUFX52myiKQXgVH311Ve66KKLvMbbt2+vr776KgQZAQAAVB0n9fa97t27a8+ePSosLNRpp53mHh8zZoxq1arlt+QAAACqs6ioKBUUFOiMM87wGM/Ly1ONGid1GQYAABA2TupOqYMHD6qkpMTdkPr555+VkZGhbdu2qVGjRn5NEAD8rXt3/8YBQEV69eqlyZMn6/fff3eP7du3T//4xz/Us2fPEGYGAAAQeif1K7r+/fsrLS1Nf/3rX7Vv3z516tRJkZGR2rNnj2bNmqVbbrnF33kCgN907y7FxVW+rlRcHE0pAKfu0Ucf1WWXXaYWLVqoffv2kqStW7eqcePGeuWVV0KcHQAAQGid1J1Sn376qVJSUiRJS5YsUePGjfXzzz/r5Zdf1hNPPOHXBAHA3+x2afToymNGj3bFAcCpaNasmf773/9q5syZOuecc9ShQwfNnj1bn3/+uRITE0OdHgAAQEid1J1SxcXFqlu3riRp1apVSktLU0REhDp37qyff/7ZrwkCgL85ndLChZXHLFokzZhBYwrAqatdu7bGjBkT6jQAAACqnJNqSrVu3VpLly7Vtddeq5UrV+q2226TJO3evVuxsbF+TRAA/C07W8rNrTxmxw5XHI/wAfCHr776Sjk5OTp8+LDH+DXXXBOijAAAAELvpJpSU6ZM0fXXX6/bbrtNPXr0UHJysiTXXVNl6yUAQFWVl+ffOACoyI8//qhrr71Wn3/+uWw2myzLkiTZbDZJktPpDGV6AAAAIXVSa0oNGjRIOTk52rRpk1auXOkev+KKK/T444/7LTkACIT4eP/GAUBFJkyYoJYtW2r37t2qVauWvvzyS61bt04dO3bUmjVrQp0eAABASJ3UnVKS1KRJEzVp0kS5/3sGJiEhQZdcconfEgOAQElJkRISpJ07pf/dtODBZnPt/9/7HADgpG3YsEHvvfeeTj/9dEVERCgiIkJdu3bVjBkz9Le//U1btmwJdYoAAAAhc1J3SpWWlmr69OmqV6+eWrRooRYtWqh+/fq67777VFpa6u8cAcCv7HZp9uzy9/3viRplZLDIOYBT53Q63S+HOf3007Vr1y5JUosWLbRt27aAfM2dO3fqT3/6k+Li4hQTE6Pzzz9fmzZtcu+3LEtTpkxRfHy8YmJilJqaqu+++y4guQAAAFTmpJpSd911l5566ik99NBD2rJli7Zs2aIHH3xQTz75pO655x5/5wgAfpeWJi1Z4j2ekOAaT0sLfk4Aws95552nzz77TJLUqVMnzZw5Ux9++KGmT5+uM844w+9f77ffftOll16qyMhIvfPOO/rqq6/02GOP6bTTTnPHzJw5U0888YSeffZZbdy4UbVr11bv3r116NAhv+cDAABQmZN6fO+ll17SP//5T483xlxwwQVq1qyZxo4dqwceeMBvCQJAoJTXeNq+nTukAPjP3XffraKiIknS9OnTdfXVVyslJUVxcXFavHix37/eww8/rMTERM2bN8891rJlS/fnlmUpIyNDd999t/r37y9Jevnll9W4cWMtXbpUQ4cO9XtOAAAAFTmpptSvv/6qNm3aeI23adNGv/766yknBQChQkMKgD/17t3b/Xnr1q31zTff6Ndff9Vpp53mfgOfP7311lvq3bu3rrvuOq1du9b9C8ObbrpJkrR9+3bl5+crNTXVfUy9evXUqVMnbdiwodymVElJiUpKStzbhYWFkiSHwyGHw+H3GsrmDMTcVY1JtUpm1WtirWWfh3vNJp1byax6TapVCmy9JzLnSTWlLrzwQj311FN64oknPMafeuopXXDBBSczJQBUCU4njSkA/uFwOBQTE6OtW7fqvPPOc483aNAgYF/zxx9/1DPPPKP09HT94x//0CeffKK//e1vqlmzpkaOHKn8/HxJUuPGjT2Oa9y4sXvfsWbMmKFp06Z5ja9atUq1atXyfxH/k5WVFbC5qxqTapXMqteEWg85jzz6+9577ynaHh3CbILHhHN7NJPqNalWKTD1FhcX+xx7Uk2pmTNnqm/fvnr33XeVnJwsyfV2mR07dmj58uUnMyUABF1mpvdYUpJrEXTWlAJwqiIjI9W8eXM5nc6gfc3S0lJ17NhRDz74oCSpffv2+uKLL/Tss89q5MiRJzXn5MmTlZ6e7t4uLCxUYmKievXqpdjYWL/kfTSHw6GsrCz17NlTkZGRfp+/KjGpVsmsek2qtehwkfS56/MePXqofu36Ic0n0Ew6t5JZ9ZpUqxTYesvuqvbFSTWlunXrpm+//VZz5szRN998I0lKS0vTmDFjdP/99yuF96gDqOIyM6VBg7zHd+50jbPYOQB/uOuuu/SPf/xDr7zySkDvkCoTHx+vc845x2Osbdu2euONNyRJTZo0kSQVFBQoPj7eHVNQUKB27dqVO2dUVJSioqK8xiMjIwN60R7o+asSk2qVzKrXhFojrSP1mVBvGZNqlcyq16RapcDUeyLznVRTSpKaNm3qtaD5Z599phdffFHPP//8yU4LAAHndEoTJkiW5b3PsiSbTZo4Uerfn0f5AJyap556St9//72aNm2qFi1aqHbt2h77P/30U79+vUsvvVTbtm3zGPv222/VokULSa5Fz5s0aaLVq1e7m1CFhYXauHGjbrnlFr/mAgAAcDwn3ZQCgOoqO1vKza14v2VJO3a44rp3D1paAMLQgAEDgvr1brvtNnXp0kUPPvigBg8erI8//ljPP/+8+xeGNptNEydO1P33368zzzxTLVu21D333KOmTZsGPVcAAACaUgCMk5fn3zgAqMjUqVOD+vUuvvhivfnmm5o8ebKmT5+uli1bKiMjQ8OHD3fHTJo0SUVFRRozZoz27dunrl27asWKFYqONmNxYgAAUHXQlAJgnKOWUfFLHABUJVdffbWuvvrqCvfbbDZNnz5d06dPD2JWAAAA3k6oKZV2nFV/9+3bdyq5AEBQpKRICQmuRc3LW1fKZnPt550NAE5VRESEbDZbhfuD+WY+AACAquaEmlL16tU77v4RI0acUkIAEGh2uzR7dvlv3yv7t2NGBoucAzh1b775pse2w+HQli1b9NJLL2natGkhygoA/OfwH4fdnz+z6RndfuntqlmjZggzAlCdnFBTat68eYHKAwCCKi1NWrJEGjjQc7xZM1fD6jg3hgKAT/r37+81NmjQIJ177rlavHixbrzxxhBkBQD+MSlrkh7b8Jh7+x9r/qF71t6j9OR0zew5M4SZAaguIkKdAABUJeU9zgcA/ta5c2etXr061GkAwEmblDVJj6x/RKVWqce403LqkfWPaFLWpBBlBqA6oSkFwEiZmeU/vrdrl2s8MzP4OQEww8GDB/XEE0+oWbNmoU4FAE7K4T8Oa9aGWZXGzNowy+PRPgAoD2/fA2Acp1OaMKH8u6Isy7Wu1MSJUv/+rCsF4NScdtppHgudW5al/fv3q1atWvrXv/4VwswA4OQ9velpOa3KX9TgtJx6etPTmth5YnCSAlAt0ZQCYJzsbCk3t+L9liXt2OGK6949aGkBCEOPP/64R1MqIiJCDRs2VKdOnXTaaaeFMDMAOHk//PqDX+MAmIumFADj5OX5Nw4AKnLDDTeEOgUA8Luk+kl+jQNgLtaUAmCc+Hj/xgFARebNm6fXX3/da/z111/XSy+9FIKMAODUnd/ofL/GATAXTSkAxunS5fhrRdntrjgAOBUzZszQ6aef7jXeqFEjPfjggyHICABO3d6De/0aB8BcNKUAGGf9etdi55VxOl1xAHAqcnJy1LJlS6/xFi1aKCcnJwQZAcCpi6/r2+3kvsYBMBdNKQDGYU0pAMHSqFEj/fe///Ua/+yzzxQXFxeCjADg1HVJ6CK7rfLbzu02u7okcNs5gMrRlAJgHNaUAhAsw4YN09/+9je9//77cjqdcjqdeu+99zRhwgQNHTo01OkBwElZn7teTqvy286dllPrc7ntHEDlqkRTas6cOUpKSlJ0dLQ6deqkjz/+uNL4jIwMnX322YqJiVFiYqJuu+02HTp0KEjZAqjuUlKkhATpqLe0e7DZpMREVxwAnIr77rtPnTp10hVXXKGYmBjFxMSoV69e6tGjB2tKAai28vb7dju5r3EAzFUj1AksXrxY6enpevbZZ9WpUydlZGSod+/e2rZtmxo1auQVv2DBAt15552aO3euunTpom+//VY33HCDbDabZs2aFYIKAFQ3drs0e7Y0aJD3vrJGVUbG8RdDB4DjqVmzphYvXqz7779fW7duVUxMjM4//3y1aNEi1KkBwEljTSkA/hLyptSsWbN00003adSoUZKkZ599Vm+//bbmzp2rO++80yt+/fr1uvTSS3X99ddLkpKSkjRs2DBt3LgxqHkDqN7S0qQlS6SBAz3HExJcDam0tJCkBSBMnXnmmTrzzDNDnQYA+EXZmlKVPcLHmlIAfBHSx/cOHz6szZs3KzU11T0WERGh1NRUbdiwodxjunTpos2bN7sf8fvxxx+1fPlyXXXVVUHJGUD4KK/xtH07DSkA/jNw4EA9/PDDXuMzZ87UddddF4KMAODUsaYUAH8J6Z1Se/bskdPpVOPGjT3GGzdurG+++abcY66//nrt2bNHXbt2lWVZ+uOPP/TXv/5V//jHP8qNLykpUUlJiXu7sLBQkuRwOORwOPxUyRFlcwZi7qrIpHpNqlUyqd5Ij63SUodKS0OUSpCYc27NqlUyq95A1+qvedetW6d7773Xa7xPnz567LHH/PI1ACDYWFMKgL+E/PG9E7VmzRo9+OCDevrpp9WpUyd9//33mjBhgu677z7dc889XvEzZszQtGnTvMZXrVqlWrVqBSzPrKysgM1dFZlUr0m1SibU299ja/ny5SHKI/jC/9weYVKtkln1BqrW4uJiv8xz4MAB1axZ02s8MjLS/YsyAKhuWFMKgL+EtCl1+umny263q6CgwGO8oKBATZo0KfeYe+65R3/+85/1l7/8RZJ0/vnnq6ioSGPGjNFdd92liAjPJxInT56s9PR093ZhYaESExPVq1cvxcbG+rki129Ws7Ky1LNnT0VGRh7/gGrOpHpNqlUyr94yJjwKbNK5NalWyax6A12rvxpG559/vhYvXqwpU6Z4jC9atEjnnHOOX74GAAQba0oB8JeQNqVq1qypDh06aPXq1RowYIAkqbS0VKtXr9b48ePLPaa4uNir8WT/3yuyLMvyio+KilJUVJTXeGRkZEAv2AM9f1VjUr0m1SqZV69lRaqcmxrCkknn1qRaJbPqDVSt/prznnvuUVpamn744Qf16NFDkrR69WotXLhQr7/+ul++BgAE24msKdU9qXtwkgJQLYX88b309HSNHDlSHTt21CWXXKKMjAwVFRW538Y3YsQINWvWTDNmzJAk9evXT7NmzVL79u3dj+/dc8896tevn7s5BQC+mDTJeywmRrr9dmnmzODnAyD89OvXT0uXLtWDDz6oJUuWKCYmRhdccIHeffdddevWLdTpAcBJYU0pAP4S8qbUkCFD9Msvv2jKlCnKz89Xu3bttGLFCvfi5zk5OR53Rt19992y2Wy6++67tXPnTjVs2FD9+vXTAw88EKoSAFRDkyZJjzziPV5aemScxhQAf+jbt6/69u3rNf7FF1/ovPPOC0FGAHBqWFMKgL+EvCklSePHj6/wcb01a9Z4bNeoUUNTp07V1KlTg5AZgHB0+LB0vJdePfaYdP/9MuZRPgDBsX//fi1cuFD//Oc/tXnzZjmdlT/+AgBVEWtKAfCXiOOHAEB4eeop1x1RlSktdcUBgD+sW7dOI0aMUHx8vB599FH16NFDH330UajTAoCTciJrSgFAZarEnVIAEEzZ2b7HHfXyTgA4Ifn5+Zo/f75efPFFFRYWavDgwSopKdHSpUt58x6Aao01pQD4C3dKATBO3br+jQOAY/Xr109nn322/vvf/yojI0O7du3Sk08+Geq0AMAvWFMKgL/QlAJgnD//2b9xAHCsd955RzfeeKOmTZumvn378oZgAGGlbE2pyrCmFABf0JQCYJwePaQ6dSqPqVvXFQcAJ+ODDz7Q/v371aFDB3Xq1ElPPfWU9uzZE+q0AMAvWFMKgL/QlAJgHLtdeumlymPmz3fFAcDJ6Ny5s1544QXl5eXp5ptv1qJFi9S0aVOVlpYqKytL+/fvD3WKAHDSWFMKgL/QlAJgpLQ06Y03vMcTElzjaWnBzwlA+Kldu7ZGjx6tDz74QJ9//rluv/12PfTQQ2rUqJGuueaaUKcHACelUe1Gfo0DYC6aUgCMVV7j6aefaEgBCIyzzz5bM2fOVG5urhYuXBjqdAAAAEKOphQAAEAQ2e12DRgwQG+99VaoUwGAk5J/IN+vcQDMRVMKgLEyM73HkpLKHwcAAIDLL8W/+DUOgLloSgEwUmamNGiQ9/jOna5xGlMAAADla1iroV/jAJiLphQA4zid0oQJkmV57ysbmzjRFQcAAABPzWKb+TUOgLloSgEwTna2lJtb8X7LknbscMUBAADAU5eELrLb7JXG2G12dUnoEqSMAFRXNKUAGCcvz79xAAAAJlmfu15Oq/Jbyp2WU+tz1wcpIwDVFU0pAMaJj/dvHAAAgEny9vv2mztf4wCYi6YUAOOkpEgJCZXHJCa64gAAAOApvq5vv7nzNQ6AuWhKATCO3S4NG1Z5zNChrjgAAAB4SmmeoriYuEpj4mLilNKc3/ABqBxNKQDGcTqluXMrj5k7l7fvAQAAAEAg0ZQCYJw1a6S9eyuP2bvXFQcAAABP2TnZ2nuw8oupvQf3KjuHVxkDqBxNKQDG8bXZRFMKAADAGwudA/AXmlIAAAAAAJ+x0DkAf6EpBcA43bv7Nw4AAMAkKc1TlBBb+auME2MTWegcwHHRlAJgnO7dpTp1Ko+pW5emFAAAQHnsEXYNO6/yVxkPPW+o7BG8yhhA5WhKATBSVFTl+2vWDE4eAAAA1Y2z1KmFXyysNGbRF4vkLOVVxgAqR1MKgHGys317+142L4wBUM3ce++9stlsHh9t2rRx7z906JDGjRunuLg41alTRwMHDlRBQUEIMwZQHWXnZCu3MLfSmB2FO3j7HoDjoikFwDh5Pr4Ixtc4AKhKzj33XOXl5bk/PvjgA/e+2267TcuWLdPrr7+utWvXateuXUpLSwthtgCqI96+B8BfaoQ6AQAItngfXwTjaxwAVCU1atRQkyZNvMZ///13vfjii1qwYIF69OghSZo3b57atm2rjz76SJ07dw52qgCqqUa1G/k1DoC5aEoBME5KipSQIO3cKVmW936bzbU/hRfGAKiGvvvuOzVt2lTR0dFKTk7WjBkz1Lx5c23evFkOh0Opqanu2DZt2qh58+basGFDhU2pkpISlZSUuLcLCwslSQ6HQw6Hw+/5l80ZiLmrGpNqlcyqN9xr/eOPP3yOC7fvQbif22OZVK9JtUqBrfdE5qQpBcA4drs0e7Y0aJD3PpvN9d+MDFccAFQnnTp10vz583X22WcrLy9P06ZNU0pKir744gvl5+erZs2aql+/vscxjRs3Vn5+foVzzpgxQ9OmTfMaX7VqlWrVquXvEtyysrICNndVY1Ktkln1hmut635b51PcqvWrdOjrQwHOJjTC9dxWxKR6TapVCky9xcXFPsfSlAJgpLQ0ackSaeBAz/GEBFdDiiVWAFRHffr0cX9+wQUXqFOnTmrRooVee+01xcTEnNSckydPVnp6unu7sLBQiYmJ6tWrl2JjY08552M5HA5lZWWpZ8+eioyM9Pv8VYlJtUpm1Rvutdb+ubZm/TzruHF9uvZRtxbdgpBR8IT7uT2WSfWaVKsU2HrL7qr2BU0pAMYqr/G0bZt0kv9uA4Aqp379+jrrrLP0/fffq2fPnjp8+LD27dvncbdUQUFBuWtQlYmKilJUVJTXeGRkZEAv2gM9f1ViUq2SWfWGa62XJV0mu80up+WsMMZus+uypMsUWSP86pfC99xWxKR6TapVCky9JzIfb98DYKxJk7zH6tYtfxwAqqMDBw7ohx9+UHx8vDp06KDIyEitXr3avX/btm3KyclRcnJyCLMEUN2sz11faUNKkpyWU+tz1wcpIwDVFXdKATDSpEnSI494jzudR8ZnzgxuTgBwqu644w7169dPLVq00K5duzR16lTZ7XYNGzZM9erV04033qj09HQ1aNBAsbGxuvXWW5WcnMyb9wCckLz9eX6NA2AumlIAjHP4sDTrOMsgzJol3X+/VLNmcHICAH/Izc3VsGHDtHfvXjVs2FBdu3bVRx99pIYNG0qSHn/8cUVERGjgwIEqKSlR79699fTTT4c4awDVTXzdeL/GATAXTSkAxnn6adcdUZVxOl1xEycGJSUA8ItFixZVuj86Olpz5szRnDlzgpQRgHDUqWknv8YBMBdrSgEwzg8/+DcOAADAJE9v8u0OS1/jAJiLphQA47Rq5d84AAAAk3yQ84Ff4wCYi6YUAOOMHSvZ7ZXH2O2uOAAAAHiqU7OOX+MAmIumFADj1KwppadXHpOeziLnAAAA5fnzBX/2axwAc9GUAmCkmTOl/v3L39e/v2s/AAAAvF1xxhXHvQuqTs06uuKMK4KUEYDqiqYUACNlZkpvvVX+vrfecu0HAACAN3uEXbd0vKXSmFs63iJ7xHHWSwBgPJpSAIzjdEoTJkiWVXHMxImuOAAAAHhyljq18IuFlcYs+mKRnKVcTAGoHE0pAMbJzpZycyveb1nSjh2uOAAAAHjKzslWbmElF1OSdhTuUHYOF1MAKkdTCoBx8vL8GwcAAGCSnYU7/RoHwFw0pQAYJz7ev3EAAAAmyT+Q79c4AOaiKQXAOCkpUlxc5TFxca44AAAAePr14K9+jQNgLppSAAAAAACfRdh8+2ekr3EAzMXfEgCMk50t7d1beczevSx0DgAAUJ7uSd39GgfAXDSlABiHhc4BAABOXvek7qpTs06lMXVq1qEpBeC4qkRTas6cOUpKSlJ0dLQ6deqkjz/+uNL4ffv2ady4cYqPj1dUVJTOOussLV++PEjZAqjuWOgcAADg1ETZo05pPwBIVaAptXjxYqWnp2vq1Kn69NNPdeGFF6p3797avXt3ufGHDx9Wz5499dNPP2nJkiXatm2bXnjhBTVr1izImQOorlJSpIQEyWYrf7/NJiUmstA5AABAebJzsrX3YOVrIew9uFfZOayFAKByIW9KzZo1SzfddJNGjRqlc845R88++6xq1aqluXPnlhs/d+5c/frrr1q6dKkuvfRSJSUlqVu3brrwwguDnDmA6spul2bPLn9fWaMqI8MVBwAAAE95+31b48DXOADmqhHKL3748GFt3rxZkydPdo9FREQoNTVVGzZsKPeYt956S8nJyRo3bpz+/e9/q2HDhrr++uv1f//3f7KX8y/IkpISlZSUuLcLCwslSQ6HQw6Hw88VyT1nIOauikyq16RapfCvt18/adEim4YM8fxrsFkzS4895lS/fpbCtPSwP7dHM6lWyax6A12rCd9DADhZ8XV9W+PA1zgA5gppU2rPnj1yOp1q3Lixx3jjxo31zTfflHvMjz/+qPfee0/Dhw/X8uXL9f3332vs2LFyOByaOnWqV/yMGTM0bdo0r/FVq1apVq1a/imkHFlZWQGbuyoyqV6TapXCu96oKEnq7zE2e/ZbstslE5apC+dzeyyTapXMqjdQtRYXFwdkXgAIB10SuihCESpVaYUxEYpQl4QuQcwKQHUU0qbUySgtLVWjRo30/PPPy263q0OHDtq5c6ceeeSRcptSkydPVnp6unu7sLBQiYmJ6tWrl2JjY/2en8PhUFZWlnr27KnIyEi/z1/VmFSvSbVK5tVbpl+/q0KdQsCZdG5NqlUyq95A11p2ZzUAwFt2TnalDSlJKlWpsnOydcUZVwQpKwDVUUibUqeffrrsdrsKCgo8xgsKCtSkSZNyj4mPj1dkZKTHo3pt27ZVfn6+Dh8+rJo1a3rER0VFKSrK+80PkZGRAb1gD/T8VY1J9ZpUq2RevRERkcasJWXSuTWpVsmsegNVqynfPwA4GWt+WuNzHE0pAJUJ6ULnNWvWVIcOHbR69Wr3WGlpqVavXq3k5ORyj7n00kv1/fffq7T0SGf+22+/VXx8vFdDCgAqk5npPZaUVP44AAAAAMC/Qv72vfT0dL3wwgt66aWX9PXXX+uWW25RUVGRRo0aJUkaMWKEx0Lot9xyi3799VdNmDBB3377rd5++209+OCDGjduXKhKAFANZWZKAwd6j+fmusZpTAEAAJTvshaX+TUOgLlCvqbUkCFD9Msvv2jKlCnKz89Xu3bttGLFCvfi5zk5OYqIONI7S0xM1MqVK3XbbbfpggsuULNmzTRhwgT93//9X6hKAFDNOJ3SmDGVx4wZI/XvL2Me5QMAAPBVhM23ext8jQNgrpA3pSRp/PjxGj9+fLn71qxZ4zWWnJysjz76KMBZAQhXa9ZIe/dWHrN3ryvuCpZBAAAA8LC7aLdf4wCYi9Y1AOOU0+s+pTgAAACTxNeN92scAHPRlAJgnNLK32B8wnEAAAAm6ZLQRRHH+adkhCLUJaFLkDICUF3RlAJgnLg4/8YBAACYJDsnW6Wq/Ld3pSpVdk52kDICUF3RlAJgnP+9R8FvcQAAACZZ89Mav8YBMBdNKQDGadbMv3EAAAAAgBNHUwqAcVJSjv9oXlycKw4AAACeuid192scAHPRlAIAAAAA+CyleYpPC52nNOc3fAAqR1MKgHGys6W9eyuP2bvXFQcAAABP63PX+7TQ+frc9UHKCEB1RVMKgHHy8vwbBwAAYJKdhTv9GgfAXDSlABgnPt6/cQAAACbZtX+XX+MAmIumFADjpKRICQmSzVb+fptNSkxkoXMAAIDybM3f6tc4AOaiKQXAOHa7NHt2+fvKGlUZGa44AAAAeCo6XOTXOADmoikFwEhpadKSJd7jCQmu8bS04OcEAABQHcTX8W2NA1/jAJirRqgTAIBQKa/xtH07d0gBAABUpn5Mfb/GATAXd0oBwFFoSAEAAFSuRoRv9zb4GgfAXDSlAAAAAAA+657U3a9xAMxFUwoAAAAA4LPuSd0VFxNXaUxcTBxNKQDHRVMKAAAAAOAze4Rdo9uPrjRmdPvRskewLgKAytGUAgAAAAD4zFnq1MIvFlYas+iLRXKWOoOUEYDqiqYUABzFybUTAABApbJzspVbmFtpzI7CHcrOyQ5SRgCqK5pSAIyVmek9lpRU/jgAAABc8vbn+TUOgLloSgEwUmamNHCg93hurmucxhQAAED54uvG+zUOgLloSgEwjtMpjRlTecyYMTzKBwAAUJ6U5ilKiE2QTbZy99tkU2JsolKapwQ5MwDVDU0pAMZZs0bau7fymL17XXEAUF099NBDstlsmjhxonvs0KFDGjdunOLi4lSnTh0NHDhQBQUFoUsSQLVkj7Br9pWzJcmrMVW2nXFlBm/fA3BcNKUAGOe99/wbBwBVzSeffKLnnntOF1xwgcf4bbfdpmXLlun111/X2rVrtWvXLqWlpYUoSwDVWVrbNC0ZvERN6jTxGE+ITdCSwUuU1pa/WwAcH00pAMbJyfFvHABUJQcOHNDw4cP1wgsv6LTTTnOP//7773rxxRc1a9Ys9ejRQx06dNC8efO0fv16ffTRRyHMGEB1ldY2TZvHbHZvvzbwNW2fsJ2GFACf1Qh1AgAQbM2b+zcOAKqScePGqW/fvkpNTdX999/vHt+8ebMcDodSU1PdY23atFHz5s21YcMGde7cudz5SkpKVFJS4t4uLCyUJDkcDjkcDr/nXzZnIOauakyqVTKrXhNrlSTnH045HA6VRpSGMKPAMuncSmbVa1KtUmDrPZE5aUoBME6PHtKDD/oWBwDVyaJFi/Tpp5/qk08+8dqXn5+vmjVrqn79+h7jjRs3Vn5+foVzzpgxQ9OmTfMaX7VqlWrVqnXKOVckKysrYHNXNSbVKplVb7jXumHfBj2f+7x7e9i/hylueZz+0uwvSq6fHMLMAi/cz+2xTKrXpFqlwNRbXFzscyxNKQDG6d5dqlNHOnCg4pg6dVxxAFBd7NixQxMmTFBWVpaio6P9Nu/kyZOVnp7u3i4sLFRiYqJ69eql2NhYv32dMg6HQ1lZWerZs6ciIyP9Pn9VYlKtkln1mlDrm9+8qZmZM2XJ8hj/1fGrZv40U4vSFunaNteGKLvAMeHcHs2kek2qVQpsvWV3VfuCphQAlMNW/huOAaDK2rx5s3bv3q2LLrrIPeZ0OrVu3To99dRTWrlypQ4fPqx9+/Z53C1VUFCgJk2alDOjS1RUlKKiorzGIyMjA3rRHuj5qxKTapXMqjdca3WWOnX7u7d7NaQkyZIlm2y64907NPDcgWH7Br5wPbcVMalek2qVAlPviczHQucAjLNmTeV3SUnS/v2uOACoLq644gp9/vnn2rp1q/ujY8eOGj58uPvzyMhIrV692n3Mtm3blJOTo+Tk8H7MBoB/ZedkK7cwt8L9liztKNyh7JzsIGYFoDriTikAxvG12bRmjXTFFYHMBAD8p27dujrvvPM8xmrXrq24uDj3+I033qj09HQ1aNBAsbGxuvXWW5WcnFzhIucAUJ68/Xl+jQNgLppSAAAAhnj88ccVERGhgQMHqqSkRL1799bTTz8d6rQAVDONajfyaxwAc9GUAmCc7t2lo96SXmkcAFRna465NTQ6Olpz5szRnDlzQpMQAADAUVhTCoBxyt6+VxnevgcAAFC+3UW7/RoHwFw0pQAYqZwXSZ3QfgAAAFPx+B4Af6EpBcA42dnS3r2Vx+zd64oDAACAJ2ep069xAMxFUwqAcfJ8fBGMr3EAAAAmWfvzWr/GATAXTSkAxomP928cAACASXYU7vBrHABz0ZQCYJwuXSS7vfIYu90VBwAAAE9N6zb1axwAc9GUAmCc9esl53GWOHA6XXEAAADwVFhS6Nc4AOaiKQXAOKwpBQAAcPJssvk1DoC5aEoBME5cnH/jAAAATHJm3Jl+jQNgLppSAIzz2Wf+jQMAADDJ2I5jZbdVvkCn3WbX2I5jg5QRgOqKphQA4/i6VhRrSgEAAHirWaOm0pPTK41JT05XzRo1g5QRgOqKphQA49Su7d84AAAA03RO6HxK+wFAoikFwEAXXujfOAAAAJM4S52asGJCpTETV0yUs/Q4rzsGYDyaUgCM07Spf+MAAABMkp2TrdzC3EpjdhTuUHZOdpAyAlBdVYmm1Jw5c5SUlKTo6Gh16tRJH3/8sU/HLVq0SDabTQMGDAhsggDCSpMm/o0DAAAwyc7CnX6NA2CukDelFi9erPT0dE2dOlWffvqpLrzwQvXu3Vu7d++u9LiffvpJd9xxh1JSUoKUKYBw4fTxTnJf4wAAAEzyS/Evfo0DYK6QN6VmzZqlm266SaNGjdI555yjZ599VrVq1dLcuXMrPMbpdGr48OGaNm2azjjjjCBmCyAcZPt4J7mvcQAAACZpWKuhX+MAmKtGKL/44cOHtXnzZk2ePNk9FhERodTUVG3YsKHC46ZPn65GjRrpxhtvVPZx/tVYUlKikpIS93ZhYaEkyeFwyOFwnGIF3srmDMTcVZFJ9ZpUqxTe9TqdEZLsPsQ55XCUBj6hIAvnc3ssk2qVzKo30LWa8D0EgJPVpI5vaxz4GgfAXCFtSu3Zs0dOp1ONGzf2GG/cuLG++eabco/54IMP9OKLL2rr1q0+fY0ZM2Zo2rRpXuOrVq1SrVq1TjhnX2VlZQVs7qrIpHpNqlUKz3qjo+MkdfUhboOWL98b+IRCJBzPbUVMqlUyq95A1VpcXByQeQEAAHBESJtSJ2r//v3685//rBdeeEGnn366T8dMnjxZ6enp7u3CwkIlJiaqV69eio2N9XuODodDWVlZ6tmzpyIjI/0+f1VjUr0m1SqFd72pqdK991qyLEmylRNhKSJCuv32TqpZM8jJBUE4n9tjmVSrZFa9ga617M5qAIC33UWVr/97onEAzBXSptTpp58uu92ugoICj/GCggI1Kee1Vz/88IN++ukn9evXzz1WWup6tKZGjRratm2bWrVq5XFMVFSUoqKivOaKjIwM6AV7oOevakyq16RapfCs98MP9b+GVEVsKi2VPvkkUt27BympEAjHc1sRk2qVzKo3ULWa8v0DgJMRXzfer3EAzBXShc5r1qypDh06aPXq1e6x0tJSrV69WsnJyV7xbdq00eeff66tW7e6P6655hpdfvnl2rp1qxITE4OZPoBqKi/Pv3EAAAAmSWmeooTYBNnKveNcssmmxNhEpTTnTekAKhfyx/fS09M1cuRIdezYUZdccokyMjJUVFSkUaNGSZJGjBihZs2aacaMGYqOjtZ5553ncXz9+vUlyWscACoS7+Mv7XyNAwAAMIk9wq7ZV87WwNcGlrvfkqWMKzNkjzj+i2UAmC3kTakhQ4bol19+0ZQpU5Sfn6927dppxYoV7sXPc3JyFBER0hu6AISZlBQpIUHaubP8x/hsNtf+FH65BwAAAAABE/KmlCSNHz9e48ePL3ffmjVrKj12/vz5/k8IQFiz26XZs6VBg7z32f53F3pGhisOAAAAnpylTk1YMaHSmIkrJqr/2f25WwpApbgFCYCR0tKkO+7wHo+IcI2npQU/JwAAgOogOydbuYW5lcbsKNyh7JzsIGUEoLqiKQXASJmZ0qOPeo87na7xzMzg5wQAAFAd7Czc6dc4AOaiKQXAOE6nNGFC+etJlZk40RUHAAAAT78U/+LXOADmoikFwDjZ2VJuJXecW5a0Y4crDgAAAJ4a1mro1zgA5qIpBcA4eXn+jQMAADBJs9hmfo0DYC6aUgCMEx/v3zgAAACTpDRPUVxMXKUxcTFxSmmeEqSMAFRXNKUAGCclRUpIkGy28vfbbFJioisOAAAAABAYNKUAGMdul2bPLn9fWaMqI8MVBwAAAE/ZOdnae3BvpTF7D+5Vdg4LdAKoHE0pAEZKS5OWLJFq1/YcT0hwjaelhSYvAACAqi5vv28Lb/oaB8BcNKUAGCstTRo69Mj2VVeVavt2GlIAAACVia/r28KbvsYBMBdNKQAAAACAz1KapyghNqHSmMTYRBY6B3BcNKUAGCszU1q06Mj28uURSkpyjQMAAKB89gi7hp03rNKYoecNlT2CBToBVI6mFAAjZWZKgwZJRUWe4zt3usZpTAEAAJTPWerU3C1zK42Zu2WunKXOIGUEoLqiKQXAOE6nNGGCZFne+8rGJk50xQEAAMDTmp/W+PT2vTU/rQlOQgCqLZpSAIyTnS3l5la837KkHTtccQAAAPDka7OJphSA46EpBcA4eT6+ndjXOAAAAADAiaMpBcA48T6+ndjXOAAAAJN0T+ru1zgA5qIpBcA4KSlSQoJks5W/32aTEhNdcQAAAPDUPam74mLiKo2Ji4mjKQXguGhKATCO3S7Nnl3+vrJGVUaGKw4AAACe7BF2Pd/v+Upjnu/3vOwRXEwBqBxNKQBGSkuTliyR6tb1HE9IcI2npYUmLwAAgOogrW2a3hj8huLreK53kBCboDcGv6G0tlxMATg+mlIAjJWWJo0ff2R72DCntm+nIQUAAOAry7Iq3QaAytCUAmC0iKP+FmzRgkf2AAAAfJH5daYGvTZI+UX5HuO79u/SoNcGKfPrzBBlBqA6oSkFAAAAAPCZs9SpCSsmyJL3XVFlYxNXTJSz1Bns1ABUMzSlAAAAwsQzzzyjCy64QLGxsYqNjVVycrLeeecd9/5Dhw5p3LhxiouLU506dTRw4EAVFBSEMGMA1VF2TrZyC3Mr3G/J0o7CHcrOyQ5iVgCqI5pSAAAAYSIhIUEPPfSQNm/erE2bNqlHjx7q37+/vvzyS0nSbbfdpmXLlun111/X2rVrtWvXLqWxkB6AE5S3P8+vcQDMVSPUCQBAKJWWHvn8558lp5N1pQBUX/369fPYfuCBB/TMM8/oo48+UkJCgl588UUtWLBAPXr0kCTNmzdPbdu21UcffaTOnTuHImUA1VB83fjjB51AHABzcacUAGNlZkpPPnlke+FCu1q0cI0DQHXndDq1aNEiFRUVKTk5WZs3b5bD4VBqaqo7pk2bNmrevLk2bNgQwkwBVDcpzVOUEJsgm2zl7rfJpsTYRKU0TwlyZgCqG+6UAmCkzExp4EDv8Z07XeNvvCHxRAuA6ujzzz9XcnKyDh06pDp16ujNN9/UOeeco61bt6pmzZqqX7++R3zjxo2Vn59f/mSSSkpKVFJS4t4uLCyUJDkcDjkcDr/nXzZnIOauakyqVTKrXhNqfSz1MQ3NHCqbbB4Lnpc1qh5NfVSlzlKVOksrmqJaMuHcHs2kek2qVQpsvScyJ00pAMZxOqUxYyqPGTNG6t+fR/kAVD9nn322tm7dqt9//11LlizRyJEjtXbt2pOeb8aMGZo2bZrX+KpVq1SrVq1TSbVSWVlZAZu7qjGpVsmsesO51ihFaVLSJD2f+7x+++M393hcZJxubHajon6M0vIfl4cww8AK53NbHpPqNalWKTD1FhcX+xxLUwqAcdaskfburTxm715X3BVXBCMjAPCfmjVrqnXr1pKkDh066JNPPtHs2bM1ZMgQHT58WPv27fO4W6qgoEBNmjSpcL7JkycrPT3dvV1YWKjExET16tVLsbGxfs/f4XAoKytLPXv2VGRkpN/nr0pMqlUyq15Tar1KV2ns/rFq8WQLSdLC/gs1oO0A2SPC97d6ppzbMibVa1KtUmDrLbur2hc0pQAYZ80a3+NoSgGo7kpLS1VSUqIOHTooMjJSq1ev1sD/Pb+8bds25eTkKDk5ucLjo6KiFBUV5TUeGRkZ0Iv2QM9flZhUq2RWvSbUGh0V7f48JSnFYzucmXBuj2ZSvSbVKgWm3hOZj6YUAABAmJg8ebL69Omj5s2ba//+/VqwYIHWrFmjlStXql69errxxhuVnp6uBg0aKDY2VrfeequSk5N58x6Ak+Ysdbo/X5+7XmmxaWF9pxQA/+LtewCM0727f+MAoKrYvXu3RowYobPPPltXXHGFPvnkE61cuVI9e/aUJD3++OO6+uqrNXDgQF122WVq0qSJMnnlKICTlPl1pjo838G9PfiNwUqanaTMr/l7BYBvuFMKgHG6d5fi4ipfVyoujqYUgOrnxRdfrHR/dHS05syZozlz5gQpIwDhKvPrTA16bZDHm/ckaWfhTg16bZCWDF6itLa8yhhA5bhTCoBx7HZp9OjKY0aP5s17AAAA5XGWOjVhxQSvhpQk99jEFRM9Hu0DgPLQlAJgHKdTWriw8phFi1xxAAAA8JSdk63cwtwK91uytKNwh7JzsoOYFYDqiKYUAONkZ0u5FV9HSZJ27HDFAQAAwFPe/jy/xgEwF00pAMbJ8/H6yNc4AAAAk8TXjfdrHABz0ZQCYJx4H6+PfI0DAAAwSUrzFMXFxFUaExcTp5TmKUHKCEB1RVMKgHFSUqSEBMlmK3+/zSYlJrriAAAAAACBQVMKgHHsdmn2bMnyfmGMJNd4RgZv3wMAAChPdk629h7cW2nM3oN7WegcwHHRlAIAAAAA+IyFzgH4C00pAMZxOqUJEyreb7NJEye64gAAAOCJhc4B+AtNKQDGyc6WcnMr3m9Z0o4drjgAAAB4SmmeooTYBNlU/gKdNtmUGJvIQucAjoumFADj5Pl4J7mvcQAAACaxR9g1+8rZkuTVmCrbzrgyQ/YIFugEUDmaUgCME+/jneS+xgEAAJgmrW2a7uhyh2zHvM44whahO7rcobS2aSHKDEB1QlMKgHFSUqSEBNfaUeWx2aTERFccAAAAvGV+nalH1z+qUqvUY9xpOfXo+keV+XVmiDIDUJ3QlAJgHLtdmu2649yrMVW2nZHhigMAAIAnZ6lTE1ZMkCWrwpiJKybKWcpbYwBUrko0pebMmaOkpCRFR0erU6dO+vjjjyuMfeGFF5SSkqLTTjtNp512mlJTUyuNB4DypKVJS5ZIjRp5jickuMbTuOMcAACgXNk52cotrPitMZYs7Sjcoewc3hoDoHIhb0otXrxY6enpmjp1qj799FNdeOGF6t27t3bv3l1u/Jo1azRs2DC9//772rBhgxITE9WrVy/t3LkzyJkDqO7S0qSVK49sP//8H9q+nYYUAABAZfL2+/Y2GF/jAJgr5E2pWbNm6aabbtKoUaN0zjnn6Nlnn1WtWrU0d+7ccuNfffVVjR07Vu3atVObNm30z3/+U6WlpVq9enWQMwcAAAAA8zSq3ej4QScQB8BcIW1KHT58WJs3b1Zqaqp7LCIiQqmpqdqwYYNPcxQXF8vhcKhBgwaBShNAmMrMlHr3PrI9ZkwNJSW5xgEAAAAAgVUjlF98z549cjqdaty4scd448aN9c033/g0x//93/+padOmHo2to5WUlKikpMS9XVhYKElyOBxyOBwnmXnFyuYMxNxVkUn1mlSrFP71vvmmTUOH2mVZknRktfOdOy0NGiQtWuTUtddWvHhndRbu5/ZoJtUqmVVvoGs14XsIACcr/0C+X+MAmCukTalT9dBDD2nRokVas2aNoqOjy42ZMWOGpk2b5jW+atUq1apVK2C5ZWVlBWzuqsikek2qVQrPep1OaezYXrIsu45uSEmSZdkkWRo37rBq1MgK6zfwheO5rYhJtUpm1RuoWouLiwMyLwCEg1+Kf/FrHABzhbQpdfrpp8tut6ugoMBjvKCgQE2aNKn02EcffVQPPfSQ3n33XV1wwQUVxk2ePFnp6enu7cLCQvfi6LGxsadWQDkcDoeysrLUs2dPRUZG+n3+qsakek2qVQrveteutWnv3sr++rNpz55aio3tq27dwu9uqXA+t8cyqVbJrHoDXWvZndUAAG8NazX0axwAc4W0KVWzZk116NBBq1ev1oABAyTJvWj5+PHjKzxu5syZeuCBB7Ry5Up17Nix0q8RFRWlqKgor/HIyMiAXrAHev6qxqR6TapVCs96f/Hxl3a//FJDYVa6h3A8txUxqVbJrHoDVasp3z8AOBnNYpv5NQ6AuUL+9r309HS98MILeumll/T111/rlltuUVFRkUaNGiVJGjFihCZPnuyOf/jhh3XPPfdo7ty5SkpKUn5+vvLz83XgwIFQlQCgmomP928cAACASVKapyghNqHSmMTYRKU0TwlSRgCqq5A3pYYMGaJHH31UU6ZMUbt27bR161atWLHCvfh5Tk6O8vLy3PHPPPOMDh8+rEGDBik+Pt798eijj4aqBADVTEqKlJAg2Wzl77fZpMREVxwAAAA82SPsmn3l7EpjMq7MkD0ijBfnBOAXVWKh8/Hjx1f4uN6aNWs8tn/66afAJwQgrNnt0uzZ0qBBrgaUddSyUWWNqowMhfUi5wAAAAAQaiG/UwoAQiEtTVqyRDr2nQoJCa7xtLTQ5AUAAFDVOUudmrBiQoX7bbJp4oqJcpY6g5gVgOqIphQAY6WlSZs3H9l+7bU/tH07DSkAAIDKZOdkK7cwt8L9liztKNyh7JzsIGYFoDqiKQXAaEc/oteli8UjewAAAMeRtz/v+EEnEAfAXDSlAAAAAAA+i6/r2yuKfY0DYC6aUgAAAACA/2/v3oOquu73jz8HxCMgB/GCSLgYW4vWqF9rlBJjNUoVkzYSSK2Ok2jH1kvQRp1MbdpYk3YatToxacbSxPGSjo02OF4SrbdY1GhQE8VEjCGttTZR0KpREYIirN8f/jhxA+oRzwXPfr9mmGGvvVis56xtzieLzcZj/ZP6q014m5v2aRPeRv2T+FPGAG6OTSkAAAAAAAD4HZtSAAAAAACPvfff93T2q7M37XP2q7M86BzALbEpBQAAAADwGA86B+AtbEoBsLXq6q8/f/99h+UYAAAA9cVGxnq1HwD7YlMKgG2tXi317v318YgRzdSx47V2AAAAAIBvsSkFwJZWr5Yef1wqqXNX+YkT19rZmAIAAGjY6fLTXu0HwL7YlAJgO9XV0tNPS8bUP1fbNnWq+FU+AACABvDrewC8hU0pALbz3nvSF1/c+Lwx0uefX+sHAAAAAPANNqUA2E7dX9m7034AAAB2wq/vAfAWNqUA2E6HDt7tBwAAYCcdojwrkjztB8C+2JQCYDv9+0sJCZLD0fB5h0NKTLzWDwAAAFb9k/orwZUghxouphxyKNGVqP5JFFMAbo5NKQC2ExoqvfLKtc/rbkzVHr/88rV+AAAAsAoNCdUrGdeKqbobU7XHL2e8rNAQiikAN8emFABbysqSVq2S7rnH2p6QcK09Kysw8wIAALgbZHXN0qoRq3SPy1pMJbgStGrEKmV1pZgCcGvNAj0BAAiUrCxp+HApP/+qNm48qGHD/k8PPdSMO6QAAAA8kNU1S8NThiv/3/nauGujhj04TA91eog7pAB4jE0pALYWGioNGGBUXn5CAwb0ZEMKAADgNoSGhGpA8gCVHy7XgOQBbEgBuC38+h4AAAAAAAD8jk0pAAAAAAAA+B2bUgAAAAAAAPA7NqUAAACCxOzZs9WnTx9FRUUpNjZWmZmZKi4utvSprKxUTk6O2rRpo5YtWyo7O1unTp0K0IwBAICdsSkFAAAQJHbs2KGcnBzt2bNHW7duVVVVlYYMGaLy8nJ3n2nTpumdd95RXl6eduzYoZMnTyoriz/dDgAA/I+/vgcAABAkNm3aZDletmyZYmNjtX//fn3ve9/ThQsXtHjxYr355psaNGiQJGnp0qXq2rWr9uzZo+9+97uBmDYAALAp7pQCAAAIUhcuXJAktW7dWpK0f/9+VVVVKT093d2nS5cuSkpKUkFBQUDmCAAA7Is7pQAAAIJQTU2Npk6dqn79+um+++6TJJWWlqp58+Zq1aqVpW/79u1VWlra4DiXL1/W5cuX3ccXL16UJFVVVamqqsrr864d0xdjNzV2yirZK6+dskr2ymunrJK98topq+TbvLczJptSAAAAQSgnJ0dFRUXatWvXHY0ze/ZsvfDCC/Xat2zZooiIiDsa+2a2bt3qs7GbGjtlleyV105ZJXvltVNWyV557ZRV8k3eiooKj/uyKQUAABBkJk+erPXr12vnzp1KSEhwt8fFxenKlSs6f/685W6pU6dOKS4ursGxnn32WU2fPt19fPHiRSUmJmrIkCFyuVxen3tVVZW2bt2q73//+woLC/P6+E2JnbJK9sprp6ySvfLaKatkr7x2yir5Nm/tXdWeYFMKAAAgSBhjNGXKFK1Zs0bbt2/Xvffeaznfu3dvhYWFadu2bcrOzpYkFRcX67///a/S0tIaHNPpdMrpdNZrDwsL82nR7uvxmxI7ZZXslddOWSV75bVTVsleee2UVfJN3tsZj00pAACAIJGTk6M333xT69atU1RUlPs5UdHR0QoPD1d0dLTGjRun6dOnq3Xr1nK5XJoyZYrS0tL4y3sAAMDv2JQCAAAIErm5uZKkgQMHWtqXLl2qsWPHSpIWLFigkJAQZWdn6/Llyxo6dKj+9Kc/+XmmAAAAbEoBAAAEDWPMLfu0aNFCCxcu1MKFC/0wIwAAgBsLCfQEAAAAAAAAYD+2u1Oq9ieIt/M0+NtRVVWliooKXbx40RYPR7NTXjtlleyV105ZJXvltVNWyV55fZ21tk7w5M4ju6GW8h47ZZXslddOWSV75bVTVsleee2UVfJt3tupo2y3KVVWViZJSkxMDPBMAABAU1dWVqbo6OhAT6NJoZYCAACe8KSOchib/QiwpqZGJ0+eVFRUlBwOh9fHv3jxohITE/X555/L5XJ5ffymxk557ZRVsldeO2WV7JXXTlkle+X1dVZjjMrKyhQfH6+QEJ52cD1qKe+xU1bJXnntlFWyV147ZZXslddOWSXf5r2dOsp2d0qFhIQoISHB59/H5XLZ4kKuZae8dsoq2SuvnbJK9sprp6ySvfL6Mit3SDWMWsr77JRVsldeO2WV7JXXTlkle+W1U1bJd3k9raP40R8AAAAAAAD8jk0pAAAAAAAA+B2bUl7mdDo1a9YsOZ3OQE/FL+yU105ZJXvltVNWyV557ZRVsldeO2W1GzutrZ2ySvbKa6eskr3y2imrZK+8dsoqNZ28tnvQOQAAAAAAAAKPO6UAAAAAAADgd2xKAQAAAAAAwO/YlAIAAAAAAIDfsSl1nZ07d+qHP/yh4uPj5XA4tHbtWsv5U6dOaezYsYqPj1dERIQyMjL0z3/+85bj5uXlqUuXLmrRooW6d++uv//975bzxhj95je/UYcOHRQeHq709HSPxr0Tvsi6aNEi9e/fXzExMYqJiVF6err27dtn6TN27Fg5HA7LR0ZGhrfj1eOLvMuWLauXpUWLFpY+wbK2AwcOrJfV4XDokUcecfcJxNrOnj1bffr0UVRUlGJjY5WZmani4mJLn8rKSuXk5KhNmzZq2bKlsrOzderUqZuO68m6nTt3TqNHj5bL5VKrVq00btw4Xbp0yesZr+eLvFVVVZoxY4a6d++uyMhIxcfH68knn9TJkyct/Tp27FhvfefMmeOTnJLv1taT6zRY1lZSg/9uHQ6H5s2b5+7TFNf29ddf18CBA+VyueRwOHT+/HmPxl64cKE6duyoFi1aKDU1td57UGNeQ9weaqmvUUtRS12PWopaqim+31JLUUvVFYhaik2p65SXl6tnz55auHBhvXPGGGVmZurf//631q1bp8LCQiUnJys9PV3l5eU3HPP999/XqFGjNG7cOBUWFiozM1OZmZkqKipy9/nDH/6gP/7xj/rzn/+svXv3KjIyUkOHDlVlZaVPckq+ybp9+3aNGjVK+fn5KigoUGJiooYMGaITJ05Y+mVkZKikpMT9sWLFCq/nq8sXeSXJ5XJZshw/ftxyPljWdvXq1ZacRUVFCg0N1Y9+9CNLP3+v7Y4dO5STk6M9e/Zo69atqqqq0pAhQyxZpk2bpnfeeUd5eXnasWOHTp48qaysrJuO68m6jR49WocPH9bWrVu1fv167dy5U+PHj/dZVsk3eSsqKnTgwAHNnDlTBw4c0OrVq1VcXKxHH320Xt/f/va3lvWdMmWKT3JKvltb6dbXabCsrSRLzpKSEi1ZskQOh0PZ2dmWfk1tbSsqKpSRkaFf/epXHo/7t7/9TdOnT9esWbN04MAB9ezZU0OHDtXp06fdfRp7zcBz1FLXUEtRS9VFLUUtJTW991tqKWqp6wWsljJokCSzZs0a93FxcbGRZIqKitxt1dXVpl27dmbRokU3HGfEiBHmkUcesbSlpqaaCRMmGGOMqampMXFxcWbevHnu8+fPnzdOp9OsWLHCS2luzltZ67p69aqJiooyb7zxhrttzJgxZvjw4d6YdqN5K+/SpUtNdHT0Dc8H89ouWLDAREVFmUuXLrnbmsLanj592kgyO3bsMMZce73DwsJMXl6eu8+RI0eMJFNQUNDgGJ6s2yeffGIkmQ8++MDdZ+PGjcbhcJgTJ074IlqDvJG3Ifv27TOSzPHjx91tycnJZsGCBV6b++3yVtZbXafBvrbDhw83gwYNsrQ1tbW9Xn5+vpFkvvzyy1uO07dvX5OTk+M+rq6uNvHx8Wb27NnGGO+9hvActRS1FLXUjVFLBff7LbVU8K4ttZT3aynulPLQ5cuXJclyS3FISIicTqd27dp1w68rKChQenq6pW3o0KEqKCiQJB07dkylpaWWPtHR0UpNTXX38bfGZq2roqJCVVVVat26taV9+/btio2NVUpKiiZNmqSzZ896Z+KNdCd5L126pOTkZCUmJmr48OE6fPiw+1wwr+3ixYs1cuRIRUZGWtoDvbYXLlyQJPc1t3//flVVVVnWoEuXLkpKSrrhGniybgUFBWrVqpXuv/9+d5/09HSFhIRo7969Xs91I97Ie6NxHQ6HWrVqZWmfM2eO2rRpo169emnevHm6evXqnYe4jTlJ3sl6s+s0mNf21KlT2rBhg8aNG1fvXFNa28a4cuWK9u/fb3l9QkJClJ6e7n59vPXvA41HLUUt1RBqKWqpWsHyfls7LrVU8K0ttZRvaik2pTxU+2I/++yz+vLLL3XlyhXNnTtXX3zxhUpKSm74daWlpWrfvr2lrX379iotLXWfr227UR9/a2zWumbMmKH4+HjLRZuRkaG//OUv2rZtm+bOnasdO3Zo2LBhqq6u9kUUjzQ2b0pKipYsWaJ169Zp+fLlqqmp0QMPPKAvvvhCUvCu7b59+1RUVKSf/vSnlvZAr21NTY2mTp2qfv366b777pN0bQ2aN29eryC42Rp4sm6lpaWKjY21nG/WrJlat27tt7X1Vt66KisrNWPGDI0aNUoul8vd/vOf/1wrV65Ufn6+JkyYoBdffFG/+MUvvJbnZryZ9VbXaTCv7RtvvKGoqKh6t1g3tbVtjDNnzqi6uvqW/27v9DXEnaGWopaqi1qKWup6wfJ+Sy0VvGtLLeWbWqpZo7/SZsLCwrR69WqNGzdOrVu3VmhoqNLT0zVs2DAZYwI9Pa/yRtY5c+Zo5cqV2r59u+UnSSNHjnR/3r17d/Xo0UPf+MY3tH37dg0ePNjrWTzR2LxpaWlKS0tzHz/wwAPq2rWrXnvtNf3ud7/zx9RvmzfWdvHixerevbv69u1raQ/02ubk5KioqOi2fkp5N/NF3qqqKo0YMULGGOXm5lrOTZ8+3f15jx491Lx5c02YMEGzZ8+W0+n02hwa4s2sgb5OPeGra3nJkiUaPXp0vYcIB8vaoumjlqKWqotailoqkKilGifQ16knqKXuLtwpdRt69+6tgwcP6vz58yopKdGmTZt09uxZderU6YZfExcXV+9p9KdOnVJcXJz7fG3bjfoEQmOy1po/f77mzJmjLVu2qEePHjft26lTJ7Vt21b/+te/vDX1RrmTvLXCwsLUq1cvd5ZgXNvy8nKtXLmywVtW6/Ln2k6ePFnr169Xfn6+EhIS3O1xcXG6cuVKvb82cbM18GTd4uLiLA/8k6SrV6/q3Llzfllbb+atVVtEHT9+XFu3brX8ZK8hqampunr1qv7zn/80NoZHfJH1enWv02BcW0l67733VFxcXO+n8g0J9No2Rtu2bRUaGnrLf7feuGZwZ6ilqKVuhlrKilrKd6ilqKVqUUtdE9BaqtFPowpyqvNQw4Z89tlnJiQkxGzevPmGfUaMGGF+8IMfWNrS0tLqPZxz/vz57vMXLlwI6AMcG+JJVmOMmTt3rnG5XB4/6Ozzzz83DofDrFu3ztPp3jFv5r3e1atXTUpKipk2bZoxJvjW1phrDyR1Op3mzJkzt+zrj7WtqakxOTk5Jj4+3nz22Wf1ztc+jG/VqlXutk8//dSjh3PebN1qH+D44Ycfuvts3rzZ5w9w9EVeY4y5cuWKyczMNN26dTOnT5/2aC7Lly83ISEh5ty5c7cfxAO+ylpX3es02Na21pgxY0zv3r09mkug1/Z6t/twzsmTJ7uPq6urzT333FPv4Zx3es3Ac9RSVtRSt0YtZUUt5X3UUl+jlqKWqitQtRSbUtcpKyszhYWFprCw0EgyL730kiksLHT/1YS33nrL5Ofnm6NHj5q1a9ea5ORkk5WVZRnjiSeeML/85S/dx7t37zbNmjUz8+fPN0eOHDGzZs0yYWFh5tChQ+4+c+bMMa1atTLr1q0zH3/8sRk+fLi59957zVdffXVXZZ0zZ45p3ry5WbVqlSkpKXF/lJWVub/nM888YwoKCsyxY8fMu+++a77zne+Yzp07m8rKSp9l9VXeF154wWzevNkcPXrU7N+/34wcOdK0aNHCHD582PKaBMPa1nrwwQfNj3/84wa/ZyDWdtKkSSY6Otps377dcs1VVFS4+0ycONEkJSWZf/zjH+bDDz80aWlpJi0tzTJOSkqKWb16tfvYk3XLyMgwvXr1Mnv37jW7du0ynTt3NqNGjfJZVl/lvXLlinn00UdNQkKCOXjwoGXcy5cvG2OMef/9982CBQvMwYMHzdGjR83y5ctNu3btzJNPPnlXZfX0Og2Wta114cIFExERYXJzc+t936a6tiUlJaawsNAsWrTISDI7d+40hYWF5uzZs+4+gwYNMq+++qr7eOXKlcbpdJply5aZTz75xIwfP960atXKlJaWuvt48hrizlBLUUtRSzWctRa1FLVUU3q/pZailmoKtRSbUtep3UWs+zFmzBhjjDGvvPKKSUhIMGFhYSYpKck899xz7v/Q1BowYIC7f6233nrLfOtb3zLNmzc33bp1Mxs2bLCcr6mpMTNnzjTt27c3TqfTDB482BQXF/syqk+yJicnNzjmrFmzjDHGVFRUmCFDhph27dqZsLAwk5ycbH72s59ZLvK7Ke/UqVNNUlKSad68uWnfvr15+OGHzYEDByxfEyxra8zXu+Bbtmyp9z0DtbYN5ZRkli5d6u7z1VdfmaeeesrExMSYiIgI89hjj5mSkpJ641z/NZ6s29mzZ82oUaNMy5YtjcvlMj/5yU/c/9PgK77Ie+zYsRuOm5+fb4wxZv/+/SY1NdVER0ebFi1amK5du5oXX3zRp0WyL7J6ep0Gy9rWeu2110x4eLg5f/58ve/bVNd21qxZt+yTnJzsfn+p9eqrr7r/u9y3b1+zZ88ey3lPXkPcGWopailqqYazGkMtRS3V9N5vqaWopZpCLeX4/6EAAAAAAAAAv+FB5wAAAAAAAPA7NqUAAAAAAADgd2xKAQAAAAAAwO/YlAIAAAAAAIDfsSkFAAAAAAAAv2NTCgAAAAAAAH7HphQAAAAAAAD8jk0pAAAAAAAA+B2bUgDQSA6HQ2vXrg30NAAAAO5K1FIA2JQCcFcaO3asHA5HvY+MjIxATw0AAKDJo5YC0BQ0C/QEAKCxMjIytHTpUkub0+kM0GwAAADuLtRSAAKNO6UA3LWcTqfi4uIsHzExMZKu3Q6em5urYcOGKTw8XJ06ddKqVassX3/o0CENGjRI4eHhatOmjcaPH69Lly5Z+ixZskTdunWT0+lUhw4dNHnyZMv5M2fO6LHHHlNERIQ6d+6st99+27ehAQAAvIRaCkCgsSkFIGjNnDlT2dnZ+uijjzR69GiNHDlSR44ckSSVl5dr6NChiomJ0QcffKC8vDy9++67lkIpNzdXOTk5Gj9+vA4dOqS3335b3/zmNy3f44UXXtCIESP08ccf6+GHH9bo0aN17tw5v+YEAADwBWopAD5nAOAuNGbMGBMaGmoiIyMtH7///e+NMcZIMhMnTrR8TWpqqpk0aZIxxpjXX3/dxMTEmEuXLrnPb9iwwYSEhJjS0lJjjDHx8fHm17/+9Q3nIMk899xz7uNLly4ZSWbjxo1eywkAAOAL1FIAmgKeKQXgrvXQQw8pNzfX0ta6dWv352lpaZZzaWlpOnjwoCTpyJEj6tmzpyIjI93n+/Xrp5qaGhUXF8vhcOjkyZMaPHjwTefQo0cP9+eRkZFyuVw6ffp0YyMBAAD4DbUUgEBjUwrAXSsyMrLeLeDeEh4e7lG/sLAwy7HD4VBNTY0vpgQAAOBV1FIAAo1nSgEIWnv27Kl33LVrV0lS165d9dFHH6m8vNx9fvfu3QoJCVFKSoqioqLUsWNHbdu2za9zBgAAaCqopQD4GndKAbhrXb58WaWlpZa2Zs2aqW3btpKkvLw83X///XrwwQf117/+Vfv27dPixYslSaNHj9asWbM0ZswYPf/88/rf//6nKVOm6IknnlD79u0lSc8//7wmTpyo2NhYDRs2TGVlZdq9e7emTJni36AAAAA+QC0FINDYlAJw19q0aZM6dOhgaUtJSdGnn34q6dpfc1m5cqWeeuopdejQQStWrNC3v/1tSVJERIQ2b96sp59+Wn369FFERISys7P10ksvuccaM2aMKisrtWDBAj3zzDNq27atHn/8cf8FBAAA8CFqKQCB5jDGmEBPAgC8zeFwaM2aNcrMzAz0VAAAAO461FIA/IFnSgEAAAAAAMDv2JQCAAAAAACA3/HrewAAAAAAAPA77pQCAAAAAACA37EpBQAAAAAAAL9jUwoAAAAAAAB+x6YUAAAAAAAA/I5NKQAAAAAAAPgdm1IAAAAAAADwOzalAAAAAAAA4HdsSgEAAAAAAMDv2JQCAAAAAACA3/0/zSw7NsR9es8AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Path to the log file\n", + "log_file_path = \"/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs/log_train_finetuned.txt\"\n", + "\n", + "# Variables to store extracted data\n", + "epochs = []\n", + "loss_values = []\n", + "accuracy_values = []\n", + "\n", + "# Reading the log file\n", + "with open(log_file_path, 'r') as file:\n", + " for line in file:\n", + " if line.strip().startswith(\"{\"): # Check for the start of a log entry\n", + " try:\n", + " log_data = eval(line.strip()) # Convert the line into a dictionary\n", + "\n", + " # Debugging: Print the parsed log data\n", + " print(f\"Parsed Log Data: {log_data}\")\n", + "\n", + " # Extract necessary fields\n", + " epoch = log_data.get('epoch')\n", + " loss = log_data.get('loss') # The loss value in the log\n", + " avg_loss = log_data.get('avg_loss') # The average loss\n", + " avg_acc = log_data.get('avg_acc') # The average accuracy\n", + "\n", + " # Debugging: Check if loss and accuracy are being extracted\n", + " print(f\"Epoch: {epoch}, Loss: {loss}, Average Loss: {avg_loss}, Average Accuracy: {avg_acc}\")\n", + "\n", + " # Append to lists if values are present\n", + " if epoch and (loss is not None or avg_loss is not None) and (avg_acc is not None):\n", + " epochs.append(epoch) # Store the epoch\n", + " loss_values.append(float(loss) if loss is not None else float(avg_loss))\n", + " accuracy_values.append(float(avg_acc))\n", + "\n", + " except Exception as e:\n", + " print(f\"Error processing line: {e}\")\n", + "\n", + "# Check if data was extracted\n", + "print(f\"Epochs: {epochs}\")\n", + "print(f\"Loss Values: {loss_values}\")\n", + "print(f\"Accuracy Values: {accuracy_values}\")\n", + "\n", + "# Plotting Loss and Accuracy if data is present\n", + "if epochs and loss_values and accuracy_values:\n", + " plt.figure(figsize=(12, 6))\n", + "\n", + " # Subplot for loss\n", + " plt.subplot(1, 2, 1)\n", + " plt.plot(epochs, loss_values, label='Loss', color='blue', marker='o')\n", + " plt.xlabel('Epoch')\n", + " plt.ylabel('Loss')\n", + " plt.title('Training Loss over Epochs')\n", + " plt.grid(True)\n", + "\n", + " # Subplot for accuracy\n", + " plt.subplot(1, 2, 2)\n", + " plt.plot(epochs, accuracy_values, label='Accuracy', color='green', marker='o')\n", + " plt.xlabel('Epoch')\n", + " plt.ylabel('Accuracy (%)')\n", + " plt.title('Training Accuracy over Epochs')\n", + " plt.grid(True)\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "else:\n", + " print(\"No data to plot. Please check the log file for correct format.\")\n" + ] + } + ], + "metadata": { + "environment": { + "kernel": "python3", + "name": "common-cu113.m122", + "type": "gcloud", + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/base-cu113:m122" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/__pycache__/metrics.cpython-312.pyc b/__pycache__/metrics.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fae3f226bcb0874408b8edbe8970b4ec5b83e1a Binary files /dev/null and b/__pycache__/metrics.cpython-312.pyc differ diff --git a/__pycache__/recalibration.cpython-312.pyc b/__pycache__/recalibration.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e57ffb9e472c0d3c7057d245d623f5619a5b496a Binary files /dev/null and b/__pycache__/recalibration.cpython-312.pyc differ diff --git a/__pycache__/visualization.cpython-312.pyc b/__pycache__/visualization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56762108ac31867df1327177d13a8d6f7b3a035f Binary files /dev/null and b/__pycache__/visualization.cpython-312.pyc differ diff --git a/data_preprocessor.py b/data_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..654df2996a78a981b10f25f5e76818307c4a9c5d --- /dev/null +++ b/data_preprocessor.py @@ -0,0 +1,170 @@ +import time +import pandas as pd + +import sys + +class DataPreprocessor: + def __init__(self, input_file_path): + self.input_file_path = input_file_path + self.unique_students = None + self.unique_problems = None + self.unique_prob_hierarchy = None + self.unique_steps = None + self.unique_kcs = None + + def analyze_dataset(self): + file_iterator = self.load_file_iterator() + + start_time = time.time() + self.unique_students = {"st"} + self.unique_problems = {"pr"} + self.unique_prob_hierarchy = {"ph"} + self.unique_kcs = {"kc"} + for chunk_data in file_iterator: + for student_id, std_groups in chunk_data.groupby('Anon Student Id'): + self.unique_students.update({student_id}) + prob_hierarchy = std_groups.groupby('Level (Workspace Id)') + for hierarchy, hierarchy_groups in prob_hierarchy: + self.unique_prob_hierarchy.update({hierarchy}) + prob_name = hierarchy_groups.groupby('Problem Name') + for problem_name, prob_name_groups in prob_name: + self.unique_problems.update({problem_name}) + sub_skills = prob_name_groups['KC Model(MATHia)'] + for a in sub_skills: + if str(a) != "nan": + temp = a.split("~~") + for kc in temp: + self.unique_kcs.update({kc}) + self.unique_students.remove("st") + self.unique_problems.remove("pr") + self.unique_prob_hierarchy.remove("ph") + self.unique_kcs.remove("kc") + end_time = time.time() + print("Time Taken to analyze dataset = ", end_time - start_time) + print("Length of unique students->", len(self.unique_students)) + print("Length of unique problems->", len(self.unique_problems)) + print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy)) + print("Length of Unique Knowledge components ->", len(self.unique_kcs)) + + def analyze_dataset_by_section(self, workspace_name): + file_iterator = self.load_file_iterator() + + start_time = time.time() + self.unique_students = {"st"} + self.unique_problems = {"pr"} + self.unique_prob_hierarchy = {"ph"} + self.unique_steps = {"s"} + self.unique_kcs = {"kc"} + # with open("workspace_info.txt", 'a') as f: + # sys.stdout = f + for chunk_data in file_iterator: + for student_id, std_groups in chunk_data.groupby('Anon Student Id'): + prob_hierarchy = std_groups.groupby('Level (Workspace Id)') + for hierarchy, hierarchy_groups in prob_hierarchy: + if workspace_name == hierarchy: + # print("Workspace : ", hierarchy) + self.unique_students.update({student_id}) + self.unique_prob_hierarchy.update({hierarchy}) + prob_name = hierarchy_groups.groupby('Problem Name') + for problem_name, prob_name_groups in prob_name: + self.unique_problems.update({problem_name}) + step_names = prob_name_groups['Step Name'] + sub_skills = prob_name_groups['KC Model(MATHia)'] + for step in step_names: + if str(step) != "nan": + self.unique_steps.update({step}) + for a in sub_skills: + if str(a) != "nan": + temp = a.split("~~") + for kc in temp: + self.unique_kcs.update({kc}) + self.unique_problems.remove("pr") + self.unique_prob_hierarchy.remove("ph") + self.unique_steps.remove("s") + self.unique_kcs.remove("kc") + end_time = time.time() + print("Time Taken to analyze dataset = ", end_time - start_time) + print("Workspace-> ",workspace_name) + print("Length of unique students->", len(self.unique_students)) + print("Length of unique problems->", len(self.unique_problems)) + print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy)) + print("Length of unique step names ->", len(self.unique_steps)) + print("Length of unique knowledge components ->", len(self.unique_kcs)) + # f.close() + # sys.stdout = sys.__stdout__ + + def analyze_dataset_by_school(self, workspace_name, school_id=None): + file_iterator = self.load_file_iterator(sep=",") + + start_time = time.time() + self.unique_schools = set() + self.unique_class = set() + self.unique_students = set() + self.unique_problems = set() + self.unique_steps = set() + self.unique_kcs = set() + self.unique_actions = set() + self.unique_outcomes = set() + self.unique_new_steps_w_action_attempt = set() + self.unique_new_steps_w_kcs = set() + self.unique_new_steps_w_action_attempt_kcs = set() + + for chunk_data in file_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + # if school and school == school_id: + self.unique_schools.add(school) + for class_id, class_group in school_group.groupby('CF (Anon Class Id)'): + self.unique_class.add(class_id) + for student_id, std_group in class_group.groupby('Anon Student Id'): + self.unique_students.add(student_id) + for prob, prob_group in std_group.groupby('Problem Name'): + self.unique_problems.add(prob) + + step_names = set(prob_group['Step Name']) + sub_skills = set(prob_group['KC Model(MATHia)']) + actions = set(prob_group['Action']) + outcomes = set(prob_group['Outcome']) + + self.unique_steps.update(step_names) + self.unique_kcs.update(sub_skills) + self.unique_actions.update(actions) + self.unique_outcomes.update(outcomes) + + for step in step_names: + if pd.isna(step): + step_group = prob_group[pd.isna(prob_group['Step Name'])] + else: + step_group = prob_group[prob_group['Step Name']==step] + + for kc in set(step_group['KC Model(MATHia)']): + new_step = f"{step}:{kc}" + self.unique_new_steps_w_kcs.add(new_step) + + for action, action_group in step_group.groupby('Action'): + for attempt, attempt_group in action_group.groupby('Attempt At Step'): + new_step = f"{step}:{action}:{attempt}" + self.unique_new_steps_w_action_attempt.add(new_step) + + for kc in set(attempt_group["KC Model(MATHia)"]): + new_step = f"{step}:{action}:{attempt}:{kc}" + self.unique_new_steps_w_action_attempt_kcs.add(new_step) + + + end_time = time.time() + print("Time Taken to analyze dataset = ", end_time - start_time) + print("Workspace-> ",workspace_name) + print("Length of unique students->", len(self.unique_students)) + print("Length of unique problems->", len(self.unique_problems)) + print("Length of unique classes->", len(self.unique_class)) + print("Length of unique step names ->", len(self.unique_steps)) + print("Length of unique knowledge components ->", len(self.unique_kcs)) + print("Length of unique actions ->", len(self.unique_actions)) + print("Length of unique outcomes ->", len(self.unique_outcomes)) + print("Length of unique new step names with actions and attempts ->", len(self.unique_new_steps_w_action_attempt)) + print("Length of unique new step names with actions, attempts and kcs ->", len(self.unique_new_steps_w_action_attempt_kcs)) + print("Length of unique new step names with kcs ->", len(self.unique_new_steps_w_kcs)) + + def load_file_iterator(self, sep="\t"): + chunk_iterator = pd.read_csv(self.input_file_path, sep=sep, header=0, iterator=True, chunksize=1000000) + return chunk_iterator + diff --git a/hint_fine_tuning.py b/hint_fine_tuning.py new file mode 100644 index 0000000000000000000000000000000000000000..0601d96047fb216cc78dd8a62df2f93a8459950e --- /dev/null +++ b/hint_fine_tuning.py @@ -0,0 +1,382 @@ +import argparse +import os +import sys +import torch +import torch.nn as nn +from torch.utils.data import DataLoader, random_split, TensorDataset +from src.dataset import TokenizerDataset +from src.bert import BERT +from src.pretrainer import BERTFineTuneTrainer1 +from src.vocab import Vocab +import pandas as pd + + +# class CustomBERTModel(nn.Module): +# def __init__(self, vocab_size, output_dim, pre_trained_model_path): +# super(CustomBERTModel, self).__init__() +# hidden_size = 768 +# self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=12, attn_heads=12, dropout=0.1) +# checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu')) +# if isinstance(checkpoint, dict): +# self.bert.load_state_dict(checkpoint) +# elif isinstance(checkpoint, BERT): +# self.bert = checkpoint +# else: +# raise TypeError(f"Expected state_dict or BERT instance, got {type(checkpoint)} instead.") +# self.fc = nn.Linear(hidden_size, output_dim) + +# def forward(self, sequence, segment_info): +# sequence = sequence.to(next(self.parameters()).device) +# segment_info = segment_info.to(sequence.device) + +# if sequence.size(0) == 0 or sequence.size(1) == 0: +# raise ValueError("Input sequence tensor has 0 elements. Check data preprocessing.") + +# x = self.bert(sequence, segment_info) +# print(f"BERT output shape: {x.shape}") + +# if x.size(0) == 0 or x.size(1) == 0: +# raise ValueError("BERT output tensor has 0 elements. Check input dimensions.") + +# cls_embeddings = x[:, 0] +# logits = self.fc(cls_embeddings) +# return logits + +# class CustomBERTModel(nn.Module): +# def __init__(self, vocab_size, output_dim, pre_trained_model_path): +# super(CustomBERTModel, self).__init__() +# hidden_size = 764 # Ensure this is 768 +# self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=12, attn_heads=12, dropout=0.1) + +# # Load the pre-trained model's state_dict +# checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu')) +# if isinstance(checkpoint, dict): +# self.bert.load_state_dict(checkpoint) +# else: +# raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.") + +# # Fully connected layer with input size 768 +# self.fc = nn.Linear(hidden_size, output_dim) + +# def forward(self, sequence, segment_info): +# sequence = sequence.to(next(self.parameters()).device) +# segment_info = segment_info.to(sequence.device) + +# x = self.bert(sequence, segment_info) +# print(f"BERT output shape: {x.shape}") # Should output (batch_size, seq_len, 768) + +# cls_embeddings = x[:, 0] # Extract CLS token embeddings +# print(f"CLS Embeddings shape: {cls_embeddings.shape}") # Should output (batch_size, 768) + +# logits = self.fc(cls_embeddings) # Should now pass a tensor of size (batch_size, 768) to `fc` + +# return logits + + +# for test +class CustomBERTModel(nn.Module): + def __init__(self, vocab_size, output_dim, pre_trained_model_path): + super(CustomBERTModel, self).__init__() + self.hidden = 764 # Ensure this is defined correctly + self.bert = BERT(vocab_size=vocab_size, hidden=self.hidden, n_layers=12, attn_heads=12, dropout=0.1) + + # Load the pre-trained model's state_dict + checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu')) + if isinstance(checkpoint, dict): + self.bert.load_state_dict(checkpoint) + else: + raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.") + + self.fc = nn.Linear(self.hidden, output_dim) + + def forward(self, sequence, segment_info): + x = self.bert(sequence, segment_info) + cls_embeddings = x[:, 0] # Extract CLS token embeddings + logits = self.fc(cls_embeddings) # Pass to fully connected layer + return logits + +def preprocess_labels(label_csv_path): + try: + labels_df = pd.read_csv(label_csv_path) + labels = labels_df['last_hint_class'].values.astype(int) + return torch.tensor(labels, dtype=torch.long) + except Exception as e: + print(f"Error reading dataset file: {e}") + return None + + +def preprocess_data(data_path, vocab, max_length=128): + try: + with open(data_path, 'r') as f: + sequences = f.readlines() + except Exception as e: + print(f"Error reading data file: {e}") + return None, None + + if len(sequences) == 0: + raise ValueError(f"No sequences found in data file {data_path}. Check the file content.") + + tokenized_sequences = [] + + for sequence in sequences: + sequence = sequence.strip() + if sequence: + encoded = vocab.to_seq(sequence, seq_len=max_length) + encoded = encoded[:max_length] + [vocab.vocab.get('[PAD]', 0)] * (max_length - len(encoded)) + segment_label = [0] * max_length + + tokenized_sequences.append({ + 'input_ids': torch.tensor(encoded), + 'segment_label': torch.tensor(segment_label) + }) + + if not tokenized_sequences: + raise ValueError("Tokenization resulted in an empty list. Check the sequences and tokenization logic.") + + tokenized_sequences = [t for t in tokenized_sequences if len(t['input_ids']) == max_length] + + if not tokenized_sequences: + raise ValueError("All tokenized sequences are of unexpected length. This suggests an issue with the tokenization logic.") + + input_ids = torch.cat([t['input_ids'].unsqueeze(0) for t in tokenized_sequences], dim=0) + segment_labels = torch.cat([t['segment_label'].unsqueeze(0) for t in tokenized_sequences], dim=0) + + print(f"Input IDs shape: {input_ids.shape}") + print(f"Segment labels shape: {segment_labels.shape}") + + return input_ids, segment_labels + + +def collate_fn(batch): + inputs = [] + labels = [] + segment_labels = [] + + for item in batch: + if item is None: + continue + + if isinstance(item, dict): + inputs.append(item['input_ids'].unsqueeze(0)) + labels.append(item['label'].unsqueeze(0)) + segment_labels.append(item['segment_label'].unsqueeze(0)) + + if len(inputs) == 0 or len(segment_labels) == 0: + print("Empty batch encountered. Returning None to skip this batch.") + return None + + try: + inputs = torch.cat(inputs, dim=0) + labels = torch.cat(labels, dim=0) + segment_labels = torch.cat(segment_labels, dim=0) + except Exception as e: + print(f"Error concatenating tensors: {e}") + return None + + return { + 'input': inputs, + 'label': labels, + 'segment_label': segment_labels + } + +def custom_collate_fn(batch): + processed_batch = collate_fn(batch) + + if processed_batch is None or len(processed_batch['input']) == 0: + # Return a valid batch with at least one element instead of an empty one + return { + 'input': torch.zeros((1, 128), dtype=torch.long), + 'label': torch.zeros((1,), dtype=torch.long), + 'segment_label': torch.zeros((1, 128), dtype=torch.long) + } + + return processed_batch + + +def train_without_progress_status(trainer, epoch, shuffle): + for epoch_idx in range(epoch): + print(f"EP_train:{epoch_idx}:") + for batch in trainer.train_data: + if batch is None: + continue + + # Check if batch is a string (indicating an issue) + if isinstance(batch, str): + print(f"Error: Received a string instead of a dictionary in batch: {batch}") + raise ValueError(f"Unexpected string in batch: {batch}") + + # Validate the batch structure before passing to iteration + if isinstance(batch, dict): + # Verify that all expected keys are present and that the values are tensors + if all(key in batch for key in ['input_ids', 'segment_label', 'labels']): + if all(isinstance(batch[key], torch.Tensor) for key in batch): + try: + print(f"Batch Structure: {batch}") # Debugging batch before iteration + trainer.iteration(epoch_idx, batch) + except Exception as e: + print(f"Error during batch processing: {e}") + sys.stdout.flush() + raise e # Propagate the exception for better debugging + else: + print(f"Error: Expected all values in batch to be tensors, but got: {batch}") + raise ValueError("Batch contains non-tensor values.") + else: + print(f"Error: Batch missing expected keys. Batch keys: {batch.keys()}") + raise ValueError("Batch does not contain expected keys.") + else: + print(f"Error: Expected batch to be a dictionary but got {type(batch)} instead.") + raise ValueError(f"Invalid batch structure: {batch}") + +# def main(opt): +# # device = torch.device("cpu") +# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +# vocab = Vocab(opt.vocab_file) +# vocab.load_vocab() + +# input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=128) +# labels = preprocess_labels(opt.dataset) + +# if input_ids is None or segment_labels is None or labels is None: +# print("Error in preprocessing data. Exiting.") +# return + +# dataset = TensorDataset(input_ids, segment_labels, torch.tensor(labels, dtype=torch.long)) +# val_size = len(dataset) - int(0.8 * len(dataset)) +# val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size]) + +# train_dataloader = DataLoader( +# train_dataset, +# batch_size=32, +# shuffle=True, +# collate_fn=custom_collate_fn +# ) +# val_dataloader = DataLoader( +# val_dataset, +# batch_size=32, +# shuffle=False, +# collate_fn=custom_collate_fn +# ) + +# custom_model = CustomBERTModel( +# vocab_size=len(vocab.vocab), +# output_dim=2, +# pre_trained_model_path=opt.pre_trained_model_path +# ).to(device) + +# trainer = BERTFineTuneTrainer1( +# bert=custom_model.bert, +# vocab_size=len(vocab.vocab), +# train_dataloader=train_dataloader, +# test_dataloader=val_dataloader, +# lr=5e-5, +# num_labels=2, +# with_cuda=torch.cuda.is_available(), +# log_freq=10, +# workspace_name=opt.output_dir, +# log_folder_path=opt.log_folder_path +# ) + +# trainer.train(epoch=20) + +# # os.makedirs(opt.output_dir, exist_ok=True) +# # output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model.pth') +# # torch.save(custom_model.state_dict(), output_model_file) +# # print(f'Model saved to {output_model_file}') + +# os.makedirs(opt.output_dir, exist_ok=True) +# output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth') +# torch.save(custom_model, output_model_file) +# print(f'Model saved to {output_model_file}') + + +def main(opt): + # Set device to GPU if available, otherwise use CPU + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + print(torch.cuda.is_available()) # Should return True if GPU is available + print(torch.cuda.device_count()) + + # Load vocabulary + vocab = Vocab(opt.vocab_file) + vocab.load_vocab() + + # Preprocess data and labels + input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=128) + labels = preprocess_labels(opt.dataset) + + if input_ids is None or segment_labels is None or labels is None: + print("Error in preprocessing data. Exiting.") + return + + # Transfer tensors to the correct device (GPU/CPU) + input_ids = input_ids.to(device) + segment_labels = segment_labels.to(device) + labels = torch.tensor(labels, dtype=torch.long).to(device) + + # Create TensorDataset and split into train and validation sets + dataset = TensorDataset(input_ids, segment_labels, labels) + val_size = len(dataset) - int(0.8 * len(dataset)) + val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size]) + + # Create DataLoaders for training and validation + train_dataloader = DataLoader( + train_dataset, + batch_size=32, + shuffle=True, + collate_fn=custom_collate_fn + ) + val_dataloader = DataLoader( + val_dataset, + batch_size=32, + shuffle=False, + collate_fn=custom_collate_fn + ) + + # Initialize custom BERT model and move it to the device + custom_model = CustomBERTModel( + vocab_size=len(vocab.vocab), + output_dim=2, + pre_trained_model_path=opt.pre_trained_model_path + ).to(device) + + # Initialize the fine-tuning trainer + trainer = BERTFineTuneTrainer1( + bert=custom_model.bert, + vocab_size=len(vocab.vocab), + train_dataloader=train_dataloader, + test_dataloader=val_dataloader, + lr=5e-5, + num_labels=2, + with_cuda=torch.cuda.is_available(), + log_freq=10, + workspace_name=opt.output_dir, + log_folder_path=opt.log_folder_path + ) + + # Train the model + trainer.train(epoch=20) + + # Save the model to the specified output directory + # os.makedirs(opt.output_dir, exist_ok=True) + # output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth') + # torch.save(custom_model.state_dict(), output_model_file) + # print(f'Model saved to {output_model_file}') + os.makedirs(opt.output_dir, exist_ok=True) + output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth') + torch.save(custom_model, output_model_file) + print(f'Model saved to {output_model_file}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Fine-tune BERT model.') + parser.add_argument('--dataset', type=str, default='/home/jupyter/bert/dataset/hint_based/ratio_proportion_change_3/er/er_train.csv', help='Path to the dataset file.') + parser.add_argument('--data_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/gt/er.txt', help='Path to the input sequence file.') + parser.add_argument('--output_dir', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/output/hint_classification', help='Directory to save the fine-tuned model.') + parser.add_argument('--pre_trained_model_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/output/pretrain:1800ms:64hs:4l:8a:50s:64b:1000e:-5lr/bert_trained.seq_encoder.model.ep68', help='Path to the pre-trained BERT model.') + parser.add_argument('--vocab_file', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/pretraining/vocab.txt', help='Path to the vocabulary file.') + parser.add_argument('--log_folder_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs', help='Path to the folder for saving logs.') + + + opt = parser.parse_args() + main(opt) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..79fb58834a741e8be10a14a96fc922ae0d580e86 --- /dev/null +++ b/main.py @@ -0,0 +1,322 @@ +import argparse + +from torch.utils.data import DataLoader +import torch +import torch.nn as nn + +from src.bert import BERT +from src.pretrainer import BERTTrainer, BERTFineTuneTrainer, BERTAttention +from src.dataset import PretrainerDataset, TokenizerDataset +from src.vocab import Vocab + +import time +import os +import tqdm +import pickle + +def train(): + parser = argparse.ArgumentParser() + + parser.add_argument('-workspace_name', type=str, default=None) + parser.add_argument('-code', type=str, default=None, help="folder for pretraining outputs and logs") + parser.add_argument('-finetune_task', type=str, default=None, help="folder inside finetuning") + parser.add_argument("-attention", type=bool, default=False, help="analyse attention scores") + parser.add_argument("-diff_test_folder", type=bool, default=False, help="use for different test folder") + parser.add_argument("-embeddings", type=bool, default=False, help="get and analyse embeddings") + parser.add_argument('-embeddings_file_name', type=str, default=None, help="file name of embeddings") + parser.add_argument("-pretrain", type=bool, default=False, help="pretraining: true, or false") + # parser.add_argument('-opts', nargs='+', type=str, default=None, help='List of optional steps') + parser.add_argument("-max_mask", type=int, default=0.15, help="% of input tokens selected for masking") + # parser.add_argument("-p", "--pretrain_dataset", type=str, default="pretraining/pretrain.txt", help="pretraining dataset for bert") + # parser.add_argument("-pv", "--pretrain_val_dataset", type=str, default="pretraining/test.txt", help="pretraining validation dataset for bert") +# default="finetuning/test.txt", + parser.add_argument("-vocab_path", type=str, default="pretraining/vocab.txt", help="built vocab model path with bert-vocab") + + parser.add_argument("-train_dataset_path", type=str, default="train.txt", help="fine tune train dataset for progress classifier") + parser.add_argument("-val_dataset_path", type=str, default="val.txt", help="test set for evaluate fine tune train set") + parser.add_argument("-test_dataset_path", type=str, default="test.txt", help="test set for evaluate fine tune train set") + parser.add_argument("-num_labels", type=int, default=2, help="Number of labels") + parser.add_argument("-train_label_path", type=str, default="train_label.txt", help="fine tune train dataset for progress classifier") + parser.add_argument("-val_label_path", type=str, default="val_label.txt", help="test set for evaluate fine tune train set") + parser.add_argument("-test_label_path", type=str, default="test_label.txt", help="test set for evaluate fine tune train set") + ##### change Checkpoint for finetuning + parser.add_argument("-pretrained_bert_checkpoint", type=str, default=None, help="checkpoint of saved pretrained bert model") #."output_feb09/bert_trained.model.ep40" + parser.add_argument('-check_epoch', type=int, default=None) + + parser.add_argument("-hs", "--hidden", type=int, default=64, help="hidden size of transformer model") #64 + parser.add_argument("-l", "--layers", type=int, default=4, help="number of layers") #4 + parser.add_argument("-a", "--attn_heads", type=int, default=4, help="number of attention heads") #8 + parser.add_argument("-s", "--seq_len", type=int, default=50, help="maximum sequence length") + + parser.add_argument("-b", "--batch_size", type=int, default=500, help="number of batch_size") #64 + parser.add_argument("-e", "--epochs", type=int, default=50)#1501, help="number of epochs") #501 + # Use 50 for pretrain, and 10 for fine tune + parser.add_argument("-w", "--num_workers", type=int, default=4, help="dataloader worker size") + + # Later run with cuda + parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") + parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") + # parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") + parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") + # parser.add_argument("--on_memory", type=bool, default=False, help="Loading on memory: true or false") + + parser.add_argument("--dropout", type=float, default=0.1, help="dropout of network") + parser.add_argument("--lr", type=float, default=1e-05, help="learning rate of adam") #1e-3 + parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") + parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") + parser.add_argument("--adam_beta2", type=float, default=0.98, help="adam first beta value") #0.999 + + parser.add_argument("-o", "--output_path", type=str, default="bert_trained.seq_encoder.model", help="ex)output/bert.model") + # parser.add_argument("-o", "--output_path", type=str, default="output/bert_fine_tuned.model", help="ex)output/bert.model") + + args = parser.parse_args() + for k,v in vars(args).items(): + if 'path' in k: + if v: + if k == "output_path": + if args.code: + setattr(args, f"{k}", args.workspace_name+f"/output/{args.code}/"+v) + elif args.finetune_task: + setattr(args, f"{k}", args.workspace_name+f"/output/{args.finetune_task}/"+v) + else: + setattr(args, f"{k}", args.workspace_name+"/output/"+v) + elif k != "vocab_path": + if args.pretrain: + setattr(args, f"{k}", args.workspace_name+"/pretraining/"+v) + else: + if args.code: + setattr(args, f"{k}", args.workspace_name+f"/{args.code}/"+v) + elif args.finetune_task: + if args.diff_test_folder and "test" in k: + setattr(args, f"{k}", args.workspace_name+f"/finetuning/"+v) + else: + setattr(args, f"{k}", args.workspace_name+f"/finetuning/{args.finetune_task}/"+v) + else: + setattr(args, f"{k}", args.workspace_name+"/finetuning/"+v) + else: + setattr(args, f"{k}", args.workspace_name+"/"+v) + + print(f"args.{k} : {getattr(args, f'{k}')}") + + print("Loading Vocab", args.vocab_path) + vocab_obj = Vocab(args.vocab_path) + vocab_obj.load_vocab() + print("Vocab Size: ", len(vocab_obj.vocab)) + + if args.attention: + print(f"Attention aggregate...... code: {args.code}, dataset: {args.finetune_task}") + if args.code: + new_folder = f"{args.workspace_name}/plots/{args.code}/" + if not os.path.exists(new_folder): + os.makedirs(new_folder) + + train_dataset = TokenizerDataset(args.train_dataset_path, None, vocab_obj, seq_len=args.seq_len) + train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + print("Load Pre-trained BERT model") + cuda_condition = torch.cuda.is_available() and args.with_cuda + device = torch.device("cuda:0" if cuda_condition else "cpu") + bert = torch.load(args.pretrained_bert_checkpoint, map_location=device) + trainer = BERTAttention(bert, vocab_obj, train_dataloader = train_data_loader, workspace_name = args.workspace_name, code=args.code, finetune_task = args.finetune_task) + trainer.getAttention() + + elif args.embeddings: + print("Get embeddings... and cluster... ") + train_dataset = TokenizerDataset(args.test_dataset_path, None, vocab_obj, seq_len=args.seq_len) + train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + print("Load Pre-trained BERT model") + cuda_condition = torch.cuda.is_available() and args.with_cuda + device = torch.device("cuda:0" if cuda_condition else "cpu") + bert = torch.load(args.pretrained_bert_checkpoint).to(device) + available_gpus = list(range(torch.cuda.device_count())) + if torch.cuda.device_count() > 1: + print("Using %d GPUS for BERT" % torch.cuda.device_count()) + bert = nn.DataParallel(bert, device_ids=available_gpus) + + data_iter = tqdm.tqdm(enumerate(train_data_loader), + desc="Model: %s" % (args.pretrained_bert_checkpoint.split("/")[-1]), + total=len(train_data_loader), bar_format="{l_bar}{r_bar}") + all_embeddings = [] + for i, data in data_iter: + data = {key: value.to(device) for key, value in data.items()} + embedding = bert(data["input"], data["segment_label"]) + # print(embedding.shape, embedding[:, 0].shape) + embeddings = [h for h in embedding[:,0].cpu().detach().numpy()] + all_embeddings.extend(embeddings) + + new_emb_folder = f"{args.workspace_name}/embeddings" + if not os.path.exists(new_emb_folder): + os.makedirs(new_emb_folder) + pickle.dump(all_embeddings, open(f"{new_emb_folder}/{args.embeddings_file_name}.pkl", "wb")) + else: + if args.pretrain: + print("Pre-training......") + print("Loading Pretraining Train Dataset", args.train_dataset_path) + print(f"Workspace: {args.workspace_name}") + pretrain_dataset = PretrainerDataset(args.train_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask) + + print("Loading Pretraining Validation Dataset", args.val_dataset_path) + pretrain_valid_dataset = PretrainerDataset(args.val_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask) \ + if args.val_dataset_path is not None else None + + print("Loading Pretraining Test Dataset", args.test_dataset_path) + pretrain_test_dataset = PretrainerDataset(args.test_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask) \ + if args.test_dataset_path is not None else None + + print("Creating Dataloader") + pretrain_data_loader = DataLoader(pretrain_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + pretrain_val_data_loader = DataLoader(pretrain_valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers)\ + if pretrain_valid_dataset is not None else None + pretrain_test_data_loader = DataLoader(pretrain_test_dataset, batch_size=args.batch_size, num_workers=args.num_workers)\ + if pretrain_test_dataset is not None else None + + print("Building BERT model") + bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads, dropout=args.dropout) + + if args.pretrained_bert_checkpoint: + print(f"BERT model : {args.pretrained_bert_checkpoint}") + bert = torch.load(args.pretrained_bert_checkpoint) + + new_log_folder = f"{args.workspace_name}/logs" + new_output_folder = f"{args.workspace_name}/output" + if args.code: # is sent almost all the time + new_log_folder = f"{args.workspace_name}/logs/{args.code}" + new_output_folder = f"{args.workspace_name}/output/{args.code}" + + if not os.path.exists(new_log_folder): + os.makedirs(new_log_folder) + if not os.path.exists(new_output_folder): + os.makedirs(new_output_folder) + + print(f"Creating BERT Trainer .... masking: True, max_mask: {args.max_mask}") + trainer = BERTTrainer(bert, len(vocab_obj.vocab), train_dataloader=pretrain_data_loader, + val_dataloader=pretrain_val_data_loader, test_dataloader=pretrain_test_data_loader, + lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, + with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, + log_folder_path=new_log_folder) + + start_time = time.time() + print(f'Pretraining Starts, Time: {time.strftime("%D %T", time.localtime(start_time))}') + # if need to pretrain from a check-point, need :check_epoch + repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs) + counter = 0 + patience = 20 + for epoch in repoch: + print(f'Training Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}') + trainer.train(epoch) + print(f'Training Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n') + + if pretrain_val_data_loader is not None: + print(f'Validation Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}') + trainer.val(epoch) + print(f'Validation Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n') + + if trainer.save_model: # or epoch%10 == 0 and epoch > 4 + trainer.save(epoch, args.output_path) + counter = 0 + if pretrain_test_data_loader is not None: + print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}') + trainer.test(epoch) + print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n') + else: + counter +=1 + if counter >= patience: + print(f"Early stopping at epoch {epoch}") + break + + end_time = time.time() + print("Time Taken to pretrain model = ", end_time - start_time) + print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}') + else: + print("Fine Tuning......") + print("Loading Train Dataset", args.train_dataset_path) + train_dataset = TokenizerDataset(args.train_dataset_path, args.train_label_path, vocab_obj, seq_len=args.seq_len) + +# print("Loading Validation Dataset", args.val_dataset_path) +# val_dataset = TokenizerDataset(args.val_dataset_path, args.val_label_path, vocab_obj, seq_len=args.seq_len) \ +# if args.val_dataset_path is not None else None + + print("Loading Test Dataset", args.test_dataset_path) + test_dataset = TokenizerDataset(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len) \ + if args.test_dataset_path is not None else None + + print("Creating Dataloader...") + train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + # val_data_loader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ + # if val_dataset is not None else None + test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ + if test_dataset is not None else None + + print("Load Pre-trained BERT model") + # bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) + cuda_condition = torch.cuda.is_available() and args.with_cuda + device = torch.device("cuda:0" if cuda_condition else "cpu") + bert = torch.load(args.pretrained_bert_checkpoint, map_location=device) + + # if args.finetune_task == "SL": + # if args.workspace_name == "ratio_proportion_change4": + # num_labels = 9 + # elif args.workspace_name == "ratio_proportion_change3": + # num_labels = 9 + # elif args.workspace_name == "scale_drawings_3": + # num_labels = 9 + # elif args.workspace_name == "sales_tax_discounts_two_rates": + # num_labels = 3 + # else: + # num_labels = 2 + # # num_labels = 1 + # print(f"Number of Labels : {args.num_labels}") + new_log_folder = f"{args.workspace_name}/logs" + new_output_folder = f"{args.workspace_name}/output" + if args.finetune_task: # is sent almost all the time + new_log_folder = f"{args.workspace_name}/logs/{args.finetune_task}" + new_output_folder = f"{args.workspace_name}/output/{args.finetune_task}" + + if not os.path.exists(new_log_folder): + os.makedirs(new_log_folder) + if not os.path.exists(new_output_folder): + os.makedirs(new_output_folder) + + print("Creating BERT Fine Tune Trainer") + trainer = BERTFineTuneTrainer(bert, len(vocab_obj.vocab), + train_dataloader=train_data_loader, test_dataloader=test_data_loader, + lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, + with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq, + workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder) + + print("Fine-tune training Start....") + start_time = time.time() + repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs) + counter = 0 + patience = 10 + for epoch in repoch: + print(f'Training Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}') + trainer.train(epoch) + print(f'Training Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n') + + if test_data_loader is not None: + print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}') + trainer.test(epoch) + # pickle.dump(trainer.probability_list, open(f"{args.workspace_name}/output/aaai/change4_mid_prob_{epoch}.pkl","wb")) + print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n') + + # if val_data_loader is not None: + # print(f'Validation Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}') + # trainer.val(epoch) + # print(f'Validation Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n') + + if trainer.save_model: # or epoch%10 == 0 + trainer.save(epoch, args.output_path) + counter = 0 + else: + counter +=1 + if counter >= patience: + print(f"Early stopping at epoch {epoch}") + break + + end_time = time.time() + print("Time Taken to fine-tune model = ", end_time - start_time) + print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}') + + +if __name__ == "__main__": + train() \ No newline at end of file diff --git a/metrics.py b/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..2a34726c5f37b658a284c7d976b484fc4f5095e3 --- /dev/null +++ b/metrics.py @@ -0,0 +1,149 @@ +import numpy as np +from scipy.special import softmax + + +class CELoss(object): + + def compute_bin_boundaries(self, probabilities = np.array([])): + + #uniform bin spacing + if probabilities.size == 0: + bin_boundaries = np.linspace(0, 1, self.n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + else: + #size of bins + bin_n = int(self.n_data/self.n_bins) + + bin_boundaries = np.array([]) + + probabilities_sort = np.sort(probabilities) + + for i in range(0,self.n_bins): + bin_boundaries = np.append(bin_boundaries,probabilities_sort[i*bin_n]) + bin_boundaries = np.append(bin_boundaries,1.0) + + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + + def get_probabilities(self, output, labels, logits): + #If not probabilities apply softmax! + if logits: + self.probabilities = softmax(output, axis=1) + else: + self.probabilities = output + + self.labels = labels + self.confidences = np.max(self.probabilities, axis=1) + self.predictions = np.argmax(self.probabilities, axis=1) + self.accuracies = np.equal(self.predictions,labels) + + def binary_matrices(self): + idx = np.arange(self.n_data) + #make matrices of zeros + pred_matrix = np.zeros([self.n_data,self.n_class]) + label_matrix = np.zeros([self.n_data,self.n_class]) + #self.acc_matrix = np.zeros([self.n_data,self.n_class]) + pred_matrix[idx,self.predictions] = 1 + label_matrix[idx,self.labels] = 1 + + self.acc_matrix = np.equal(pred_matrix, label_matrix) + + + def compute_bins(self, index = None): + self.bin_prop = np.zeros(self.n_bins) + self.bin_acc = np.zeros(self.n_bins) + self.bin_conf = np.zeros(self.n_bins) + self.bin_score = np.zeros(self.n_bins) + + if index == None: + confidences = self.confidences + accuracies = self.accuracies + else: + confidences = self.probabilities[:,index] + accuracies = self.acc_matrix[:,index] + + + for i, (bin_lower, bin_upper) in enumerate(zip(self.bin_lowers, self.bin_uppers)): + # Calculated |confidence - accuracy| in each bin + in_bin = np.greater(confidences,bin_lower.item()) * np.less_equal(confidences,bin_upper.item()) + self.bin_prop[i] = np.mean(in_bin) + + if self.bin_prop[i].item() > 0: + self.bin_acc[i] = np.mean(accuracies[in_bin]) + self.bin_conf[i] = np.mean(confidences[in_bin]) + self.bin_score[i] = np.abs(self.bin_conf[i] - self.bin_acc[i]) + +class MaxProbCELoss(CELoss): + def loss(self, output, labels, n_bins = 15, logits = True): + self.n_bins = n_bins + super().compute_bin_boundaries() + super().get_probabilities(output, labels, logits) + super().compute_bins() + +#http://people.cs.pitt.edu/~milos/research/AAAI_Calibration.pdf +class ECELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.dot(self.bin_prop,self.bin_score) + +class MCELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.max(self.bin_score) + +#https://arxiv.org/abs/1905.11001 +#Overconfidence Loss (Good in high risk applications where confident but wrong predictions can be especially harmful) +class OELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.dot(self.bin_prop,self.bin_conf * np.maximum(self.bin_conf-self.bin_acc,np.zeros(self.n_bins))) + + +#https://arxiv.org/abs/1904.01685 +class SCELoss(CELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + sce = 0.0 + self.n_bins = n_bins + self.n_data = len(output) + self.n_class = len(output[0]) + + super().compute_bin_boundaries() + super().get_probabilities(output, labels, logits) + super().binary_matrices() + + for i in range(self.n_class): + super().compute_bins(i) + sce += np.dot(self.bin_prop,self.bin_score) + + return sce/self.n_class + +class TACELoss(CELoss): + + def loss(self, output, labels, threshold = 0.01, n_bins = 15, logits = True): + tace = 0.0 + self.n_bins = n_bins + self.n_data = len(output) + self.n_class = len(output[0]) + + super().get_probabilities(output, labels, logits) + self.probabilities[self.probabilities < threshold] = 0 + super().binary_matrices() + + for i in range(self.n_class): + super().compute_bin_boundaries(self.probabilities[:,i]) + super().compute_bins(i) + tace += np.dot(self.bin_prop,self.bin_score) + + return tace/self.n_class + +#create TACELoss with threshold fixed at 0 +class ACELoss(TACELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + return super().loss(output, labels, 0.0 , n_bins, logits) diff --git a/new_fine_tuning/.DS_Store b/new_fine_tuning/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..38734ca2de71d90578b12a191d5ff30a57f26d5c Binary files /dev/null and b/new_fine_tuning/.DS_Store differ diff --git a/new_fine_tuning/README.md b/new_fine_tuning/README.md new file mode 100644 index 0000000000000000000000000000000000000000..68814e830971cdd78d8a453ce6b4d5c15ff01dc8 --- /dev/null +++ b/new_fine_tuning/README.md @@ -0,0 +1,197 @@ +## Pre-training Data + +### ratio_proportion_change3 : Calculating Percent Change and Final Amounts +> clear;python3 prepare_pretraining_input_vocab_file.py -analyze_dataset_by_section True -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain1000.txt -train_info_path pretraining/pretrain1000_info.txt -test_file_path pretraining/test1000.txt -test_info_path pretraining/test1000_info.txt + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain2000.txt -train_info_path pretraining/pretrain2000_info.txt -test_file_path pretraining/test2000.txt -test_info_path pretraining/test2000_info.txt + +#### Test simple +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code full -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path full.txt -train_info_path full_info.txt + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code gt -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path er.txt -train_info_path er_info.txt -test_file_path me.txt -test_info_path me_info.txt + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code correct -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path correct.txt -train_info_path correct_info.txt -test_file_path incorrect.txt -test_info_path incorrect_info.txt -final_step FinalAnswer + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code progress -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path graduated.txt -train_info_path graduated_info.txt -test_file_path promoted.txt -test_info_path promoted_info.txt + +### ratio_proportion_change4 : Using Percents and Percent Change +> clear;python3 prepare_pretraining_input_vocab_file.py -analyze_dataset_by_section True -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain1000.txt -train_info_path pretraining/pretrain1000_info.txt -test_file_path pretraining/test1000.txt -test_info_path pretraining/test1000_info.txt + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain2000.txt -train_info_path pretraining/pretrain2000_info.txt -test_file_path pretraining/test2000.txt -test_info_path pretraining/test2000_info.txt + +#### Test simple +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code full -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path full.txt -train_info_path full_info.txt + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code gt -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path er.txt -train_info_path er_info.txt -test_file_path me.txt -test_info_path me_info.txt + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code correct -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path correct.txt -train_info_path correct_info.txt -test_file_path incorrect.txt -test_info_path incorrect_info.txt -final_step FinalAnswer + +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code progress -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path graduated.txt -train_info_path graduated_info.txt -test_file_path promoted.txt -test_info_path promoted_info.txt + +## Pretraining + +### ratio_proportion_change3 : Calculating Percent Change and Final Amounts +> clear;python3 src/main.py -workspace_name ratio_proportion_change3_1920 -code pretrain1000 --pretrain_dataset pretraining/pretrain1000.txt --pretrain_val_dataset pretraining/test1000.txt +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000 --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt + +#### Test simple models +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 1 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 2 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 2 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 4 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 4 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 8 + + + +### ratio_proportion_change4 : Using Percents and Percent Change +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain1000 --pretrain_dataset pretraining/pretrain1000.txt --pretrain_val_dataset pretraining/test1000.txt +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000 --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt + +#### Test simple models +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_1l1h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 1 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_1l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 2 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_2l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 2 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_2l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 4 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_4l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 4 + +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_4l8h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 8 + + +## Preparing Fine Tuning Data + +### ratio_proportion_change3 : Calculating Percent Change and Final Amounts +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -final_step FinalAnswer + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task check2 --train_dataset finetuning/check2/train.txt --test_dataset finetuning/check2/test.txt --train_label finetuning/check2/train_label.txt --test_label finetuning/check2/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51 + +#### Attention Head Check + + + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task er ;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task correct ;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task promoted + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task promoted + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task promoted + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task promoted + + + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task promoted + +clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset full/full_attn.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task full + + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task promoted + + + me + + er + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset pretraining/attention_train.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep273 --attention True + + + +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset pretraining/attention_train.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep1021 --attention True + + + +### ratio_proportion_change4 : Using Percents and Percent Change +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -final_step FinalAnswer + +### scale_drawings_3 : Calculating Measurements Using a Scale +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name scale_drawings_3 -opt_step1 opt1-check opt1-ratio-L-n opt1-ratio-L-d opt1-ratio-R-n opt1-ratio-R-d opt1-me2-top-3 opt1-me2-top-4 opt1-me2-top-2 opt1-me2-top-1 opt1-me2-middle-1 opt1-me2-bottom-1 -opt_step2 opt2-check opt2-ratio-L-n opt2-ratio-L-d opt2-ratio-R-n opt2-ratio-R-d opt2-me2-top-3 opt2-me2-top-4 opt2-me2-top-1 opt2-me2-top-2 opt2-me2-middle-1 opt2-me2-bottom-1 -final_step unk-value1 unk-value2 + +### sales_tax_discounts_two_rates : Solving Problems with Both Sales Tax and Discounts +> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name sales_tax_discounts_two_rates -opt_step1 optionalTaskGn salestaxFactor2 discountFactor2 multiplyOrderStatementGn -final_step totalCost1 + + +# Fine Tuning Pre-trained model + +## ratio_proportion_change3 : Calculating Percent Change and Final Amounts +> Selected Pretrained model: **ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279** +> New **bert/ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731** + +### 10per +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51 + +### IS +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task IS --train_dataset finetuning/IS/train.txt --test_dataset finetuning/FS/train.txt --train_label finetuning/IS/train_label.txt --test_label finetuning/FS/train_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51 + +### FS +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task FS --train_dataset finetuning/FS/train.txt --test_dataset finetuning/IS/train.txt --train_label finetuning/FS/train_label.txt --test_label finetuning/IS/train_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51 + +### correctness +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51 + +### SL +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51 + +### effectiveness +> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task effectiveness --train_dataset finetuning/effectiveness/train.txt --test_dataset finetuning/effectiveness/test.txt --train_label finetuning/effectiveness/train_label.txt --test_label finetuning/effectiveness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51 + + +## ratio_proportion_change4 : Using Percents and Percent Change +> Selected Pretrained model: **ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287** +### 10per +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51 + +### IS + +### FS + +### correctness +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51 + +### SL +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51 + +### effectiveness +> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task effectiveness --train_dataset finetuning/effectiveness/train.txt --test_dataset finetuning/effectiveness/test.txt --train_label finetuning/effectiveness/train_label.txt --test_label finetuning/effectiveness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51 + + +## scale_drawings_3 : Calculating Measurements Using a Scale +> Selected Pretrained model: **scale_drawings_3/output/bert_trained.seq_encoder.model.ep252** +### 10per +> clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51 + +### IS + +### FS + +### correctness +> clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51 + +### SL +> clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51 + +### effectiveness + +## sales_tax_discounts_two_rates : Solving Problems with Both Sales Tax and Discounts +> Selected Pretrained model: **sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255** + +### 10per +> clear;python3 src/main.py -workspace_name sales_tax_discounts_two_rates -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255 --epochs 51 + +### IS + +### FS + +### correctness +> clear;python3 src/main.py -workspace_name sales_tax_discounts_two_rates -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255 --epochs 51 + +### SL + +### effectiveness \ No newline at end of file diff --git a/new_fine_tuning/__pycache__/metrics.cpython-312.pyc b/new_fine_tuning/__pycache__/metrics.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75731582bfb95eb6dc8390a3f37958ca28fbb611 Binary files /dev/null and b/new_fine_tuning/__pycache__/metrics.cpython-312.pyc differ diff --git a/new_fine_tuning/__pycache__/recalibration.cpython-312.pyc b/new_fine_tuning/__pycache__/recalibration.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..42b0e44966312a8b2e864f2289bfa1e34bc47cae Binary files /dev/null and b/new_fine_tuning/__pycache__/recalibration.cpython-312.pyc differ diff --git a/new_fine_tuning/__pycache__/visualization.cpython-312.pyc b/new_fine_tuning/__pycache__/visualization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0d04b637efe2ce6513823bfbd9f33554ecd0b20 Binary files /dev/null and b/new_fine_tuning/__pycache__/visualization.cpython-312.pyc differ diff --git a/new_hint_fine_tuned.py b/new_hint_fine_tuned.py new file mode 100644 index 0000000000000000000000000000000000000000..b6ddd90f11b44ee6cc530c553dc9190b8a6355bb --- /dev/null +++ b/new_hint_fine_tuned.py @@ -0,0 +1,131 @@ +import argparse +import os +import torch +import torch.nn as nn +from torch.utils.data import DataLoader, random_split, TensorDataset +from src.dataset import TokenizerDataset +from src.bert import BERT +from src.pretrainer import BERTFineTuneTrainer1 +from src.vocab import Vocab +import pandas as pd + +def preprocess_labels(label_csv_path): + try: + labels_df = pd.read_csv(label_csv_path) + labels = labels_df['last_hint_class'].values.astype(int) + return torch.tensor(labels, dtype=torch.long) + except Exception as e: + print(f"Error reading dataset file: {e}") + return None + +def preprocess_data(data_path, vocab, max_length=128): + try: + with open(data_path, 'r') as f: + sequences = f.readlines() + except Exception as e: + print(f"Error reading data file: {e}") + return None, None + + tokenized_sequences = [] + for sequence in sequences: + sequence = sequence.strip() + if sequence: + encoded = vocab.to_seq(sequence, seq_len=max_length) + encoded = encoded[:max_length] + [vocab.vocab.get('[PAD]', 0)] * (max_length - len(encoded)) + segment_label = [0] * max_length + + tokenized_sequences.append({ + 'input_ids': torch.tensor(encoded), + 'segment_label': torch.tensor(segment_label) + }) + + input_ids = torch.cat([t['input_ids'].unsqueeze(0) for t in tokenized_sequences], dim=0) + segment_labels = torch.cat([t['segment_label'].unsqueeze(0) for t in tokenized_sequences], dim=0) + + print(f"Input IDs shape: {input_ids.shape}") + print(f"Segment labels shape: {segment_labels.shape}") + + return input_ids, segment_labels + +def custom_collate_fn(batch): + inputs = [item['input_ids'].unsqueeze(0) for item in batch] + labels = [item['label'].unsqueeze(0) for item in batch] + segment_labels = [item['segment_label'].unsqueeze(0) for item in batch] + + inputs = torch.cat(inputs, dim=0) + labels = torch.cat(labels, dim=0) + segment_labels = torch.cat(segment_labels, dim=0) + + return { + 'input': inputs, + 'label': labels, + 'segment_label': segment_labels + } + +def main(opt): + # Set device to GPU if available, otherwise use CPU + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + # Load vocabulary + vocab = Vocab(opt.vocab_file) + vocab.load_vocab() + + # Preprocess data and labels + input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=50) # Using sequence length 50 + labels = preprocess_labels(opt.dataset) + + if input_ids is None or segment_labels is None or labels is None: + print("Error in preprocessing data. Exiting.") + return + + # Create TensorDataset and split into train and validation sets + dataset = TensorDataset(input_ids, segment_labels, labels) + val_size = len(dataset) - int(0.8 * len(dataset)) + val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size]) + + # Create DataLoaders for training and validation + train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn) + val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn) + + # Initialize custom BERT model and move it to the device + custom_model = CustomBERTModel( + vocab_size=len(vocab.vocab), + output_dim=2, + pre_trained_model_path=opt.pre_trained_model_path + ).to(device) + + # Initialize the fine-tuning trainer + trainer = BERTFineTuneTrainer1( + bert=custom_model, + vocab_size=len(vocab.vocab), + train_dataloader=train_dataloader, + test_dataloader=val_dataloader, + lr=1e-5, # Using learning rate 10^-5 as specified + num_labels=2, + with_cuda=torch.cuda.is_available(), + log_freq=10, + workspace_name=opt.output_dir, + log_folder_path=opt.log_folder_path + ) + + # Train the model + trainer.train(epoch=20) + + # Save the model + os.makedirs(opt.output_dir, exist_ok=True) + output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_3.pth') + torch.save(custom_model, output_model_file) + print(f'Model saved to {output_model_file}') + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Fine-tune BERT model.') + parser.add_argument('--dataset', type=str, default='/home/jupyter/bert/dataset/hint_based/ratio_proportion_change_3/er/er_train.csv', help='Path to the dataset file.') + parser.add_argument('--data_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/gt/er.txt', help='Path to the input sequence file.') + parser.add_argument('--output_dir', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/output/hint_classification', help='Directory to save the fine-tuned model.') + parser.add_argument('--pre_trained_model_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/output/pretrain:1800ms:64hs:4l:8a:50s:64b:1000e:-5lr/bert_trained.seq_encoder.model.ep68', help='Path to the pre-trained BERT model.') + parser.add_argument('--vocab_file', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/pretraining/vocab.txt', help='Path to the vocabulary file.') + parser.add_argument('--log_folder_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct', help='Path to the folder for saving logs.') + + + opt = parser.parse_args() + main(opt) diff --git a/new_test_saved_finetuned_model.py b/new_test_saved_finetuned_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8ca30e55b6a764fbe1c5bc702056d7757c18e2c4 --- /dev/null +++ b/new_test_saved_finetuned_model.py @@ -0,0 +1,613 @@ +import argparse +import os +import torch +import torch.nn as nn +from torch.optim import Adam +from torch.utils.data import DataLoader +import pickle +print("here1",os.getcwd()) +from src.dataset import TokenizerDataset, TokenizerDatasetForCalibration +from src.vocab import Vocab +print("here3",os.getcwd()) +from src.bert import BERT +from src.seq_model import BERTSM +from src.classifier_model import BERTForClassification, BERTForClassificationWithFeats +# from src.new_finetuning.optim_schedule import ScheduledOptim +import metrics, recalibration, visualization +from recalibration import ModelWithTemperature +import tqdm +import sys +import time +import numpy as np + +from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +from collections import defaultdict +print("here3",os.getcwd()) +class BERTFineTuneTrainer: + + def __init__(self, bertFinetunedClassifierwithFeats: BERT, #BERTForClassificationWithFeats + vocab_size: int, test_dataloader: DataLoader = None, + lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000, + with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None, + num_labels=2, log_folder_path: str = None): + """ + :param bert: BERT model which you want to train + :param vocab_size: total word vocab size + :param test_dataloader: test dataset data loader [can be None] + :param lr: learning rate of optimizer + :param betas: Adam optimizer betas + :param weight_decay: Adam optimizer weight decay param + :param with_cuda: traning with cuda + :param log_freq: logging frequency of the batch iteration + """ + + # Setup cuda device for BERT training, argument -c, --cuda should be true + # cuda_condition = torch.cuda.is_available() and with_cuda + # self.device = torch.device("cuda:0" if cuda_condition else "cpu") + self.device = torch.device("cpu") #torch.device("cuda:0" if cuda_condition else "cpu") + # print(cuda_condition, " Device used = ", self.device) + print(" Device used = ", self.device) + + # available_gpus = list(range(torch.cuda.device_count())) + + # This BERT model will be saved every epoch + self.model = bertFinetunedClassifierwithFeats.to("cpu") + print(self.model.parameters()) + for param in self.model.parameters(): + param.requires_grad = False + # Initialize the BERT Language Model, with BERT model + # self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device) + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device) + # self.model = bertFinetunedClassifierwithFeats + # print(self.model.bert.parameters()) + # for param in self.model.bert.parameters(): + # param.requires_grad = False + # BERTForClassificationWithFeats(self.bert, num_labels, 18).to(self.device) + + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device) + # Distributed GPU training if CUDA can detect more than 1 GPU + # if with_cuda and torch.cuda.device_count() > 1: + # print("Using %d GPUS for BERT" % torch.cuda.device_count()) + # self.model = nn.DataParallel(self.model, device_ids=available_gpus) + + # Setting the train, validation and test data loader + # self.train_data = train_dataloader + # self.val_data = val_dataloader + self.test_data = test_dataloader + + # self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9 + self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) + # self.optim_schedule = ScheduledOptim(self.optim, self.model.bert.hidden, n_warmup_steps=warmup_steps) + # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1) + self.criterion = nn.CrossEntropyLoss() + + # if num_labels == 1: + # self.criterion = nn.MSELoss() + # elif num_labels == 2: + # self.criterion = nn.BCEWithLogitsLoss() + # # self.criterion = nn.CrossEntropyLoss() + # elif num_labels > 2: + # self.criterion = nn.CrossEntropyLoss() + # self.criterion = nn.BCEWithLogitsLoss() + + + self.log_freq = log_freq + self.log_folder_path = log_folder_path + # self.workspace_name = workspace_name + # self.finetune_task = finetune_task + # self.save_model = False + # self.avg_loss = 10000 + self.start_time = time.time() + # self.probability_list = [] + for fi in ['test']: #'val', + f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w') + f.close() + print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) + + # def train(self, epoch): + # self.iteration(epoch, self.train_data) + + # def val(self, epoch): + # self.iteration(epoch, self.val_data, phase="val") + + def test(self, epoch): + # if epoch == 0: + # self.avg_loss = 10000 + self.iteration(epoch, self.test_data, phase="test") + + def iteration(self, epoch, data_loader, phase="train"): + """ + loop over the data_loader for training or testing + if on train status, backward operation is activated + and also auto save the model every peoch + + :param epoch: current epoch index + :param data_loader: torch.utils.data.DataLoader for iteration + :param train: boolean value of is train or test + :return: None + """ + + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(data_loader), + desc="EP_%s:%d" % (phase, epoch), + total=len(data_loader), + bar_format="{l_bar}{r_bar}") + + avg_loss = 0.0 + total_correct = 0 + total_element = 0 + plabels = [] + tlabels = [] + probabs = [] + positive_class_probs=[] + if phase == "train": + self.model.train() + else: + self.model.eval() + # self.probability_list = [] + + with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f: + sys.stdout = f + for i, data in data_iter: + # 0. batch_data will be sent into the device(GPU or cpu) + data = {key: value.to(self.device) for key, value in data.items()} + if phase == "train": + logits = self.model.forward(data["input"], data["segment_label"], data["feat"]) + else: + with torch.no_grad(): + logits = self.model.forward(data["input"].cpu(), data["segment_label"].cpu(), data["feat"].cpu()) + + logits = logits.cpu() + loss = self.criterion(logits, data["label"]) + # if torch.cuda.device_count() > 1: + # loss = loss.mean() + + # 3. backward and optimization only in train + # if phase == "train": + # self.optim_schedule.zero_grad() + # loss.backward() + # self.optim_schedule.step_and_update_lr() + + # prediction accuracy + probs = nn.Softmax(dim=-1)(logits) # Probabilities + probabs.extend(probs.detach().cpu().numpy().tolist()) + predicted_labels = torch.argmax(probs, dim=-1) #correct + # self.probability_list.append(probs) + # true_labels = torch.argmax(data["label"], dim=-1) + plabels.extend(predicted_labels.cpu().numpy()) + tlabels.extend(data['label'].cpu().numpy()) + positive_class_probs = [prob[1] for prob in probabs] + # Compare predicted labels to true labels and calculate accuracy + correct = (data['label'] == predicted_labels).sum().item() + + avg_loss += loss.item() + total_correct += correct + # total_element += true_labels.nelement() + total_element += data["label"].nelement() + # print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element) + + post_fix = { + "epoch": epoch, + "iter": i, + "avg_loss": avg_loss / (i + 1), + "avg_acc": total_correct / total_element * 100 if total_element != 0 else 0, + "loss": loss.item() + } + if i % self.log_freq == 0: + data_iter.write(str(post_fix)) + + precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0) + recalls = recall_score(tlabels, plabels, average="weighted") + f1_scores = f1_score(tlabels, plabels, average="weighted") + cmatrix = confusion_matrix(tlabels, plabels) + end_time = time.time() + auc_score = roc_auc_score(tlabels, positive_class_probs) + final_msg = { + "avg_loss": avg_loss / len(data_iter), + "total_acc": total_correct * 100.0 / total_element, + "precisions": precisions, + "recalls": recalls, + "f1_scores": f1_scores, + # "confusion_matrix": f"{cmatrix}", + # "true_labels": f"{tlabels}", + # "predicted_labels": f"{plabels}", + "time_taken_from_start": end_time - self.start_time, + "auc_score":auc_score + } + with open("result.txt", 'w') as file: + for key, value in final_msg.items(): + file.write(f"{key}: {value}\n") + print(final_msg) + fpr, tpr, thresholds = roc_curve(tlabels, positive_class_probs) + with open("roc_data.pkl", "wb") as f: + pickle.dump((fpr, tpr, thresholds), f) + print(final_msg) + f.close() + with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1: + sys.stdout = f1 + final_msg = { + "epoch": f"EP{epoch}_{phase}", + "confusion_matrix": f"{cmatrix}", + "true_labels": f"{tlabels if epoch == 0 else ''}", + "predicted_labels": f"{plabels}", + "probabilities": f"{probabs}", + "time_taken_from_start": end_time - self.start_time + } + print(final_msg) + f1.close() + sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ + + + +class BERTFineTuneCalibratedTrainer: + + def __init__(self, bertFinetunedClassifierwithFeats: BERT, #BERTForClassificationWithFeats + vocab_size: int, test_dataloader: DataLoader = None, + lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000, + with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None, + num_labels=2, log_folder_path: str = None): + """ + :param bert: BERT model which you want to train + :param vocab_size: total word vocab size + :param test_dataloader: test dataset data loader [can be None] + :param lr: learning rate of optimizer + :param betas: Adam optimizer betas + :param weight_decay: Adam optimizer weight decay param + :param with_cuda: traning with cuda + :param log_freq: logging frequency of the batch iteration + """ + + # Setup cuda device for BERT training, argument -c, --cuda should be true + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(cuda_condition, " Device used = ", self.device) + + # available_gpus = list(range(torch.cuda.device_count())) + + # This BERT model will be saved every epoch + self.model = bertFinetunedClassifierwithFeats + print(self.model.parameters()) + for param in self.model.parameters(): + param.requires_grad = False + # Initialize the BERT Language Model, with BERT model + # self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device) + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device) + # self.model = bertFinetunedClassifierwithFeats + # print(self.model.bert.parameters()) + # for param in self.model.bert.parameters(): + # param.requires_grad = False + # BERTForClassificationWithFeats(self.bert, num_labels, 18).to(self.device) + + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device) + # Distributed GPU training if CUDA can detect more than 1 GPU + # if with_cuda and torch.cuda.device_count() > 1: + # print("Using %d GPUS for BERT" % torch.cuda.device_count()) + # self.model = nn.DataParallel(self.model, device_ids=available_gpus) + + # Setting the train, validation and test data loader + # self.train_data = train_dataloader + # self.val_data = val_dataloader + self.test_data = test_dataloader + + # self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9 + self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) + # self.optim_schedule = ScheduledOptim(self.optim, self.model.bert.hidden, n_warmup_steps=warmup_steps) + # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1) + self.criterion = nn.CrossEntropyLoss() + + # if num_labels == 1: + # self.criterion = nn.MSELoss() + # elif num_labels == 2: + # self.criterion = nn.BCEWithLogitsLoss() + # # self.criterion = nn.CrossEntropyLoss() + # elif num_labels > 2: + # self.criterion = nn.CrossEntropyLoss() + # self.criterion = nn.BCEWithLogitsLoss() + + + self.log_freq = log_freq + self.log_folder_path = log_folder_path + # self.workspace_name = workspace_name + # self.finetune_task = finetune_task + # self.save_model = False + # self.avg_loss = 10000 + self.start_time = time.time() + # self.probability_list = [] + for fi in ['test']: #'val', + f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w') + f.close() + print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) + + # def train(self, epoch): + # self.iteration(epoch, self.train_data) + + # def val(self, epoch): + # self.iteration(epoch, self.val_data, phase="val") + + def test(self, epoch): + # if epoch == 0: + # self.avg_loss = 10000 + self.iteration(epoch, self.test_data, phase="test") + + def iteration(self, epoch, data_loader, phase="train"): + """ + loop over the data_loader for training or testing + if on train status, backward operation is activated + and also auto save the model every peoch + + :param epoch: current epoch index + :param data_loader: torch.utils.data.DataLoader for iteration + :param train: boolean value of is train or test + :return: None + """ + + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(data_loader), + desc="EP_%s:%d" % (phase, epoch), + total=len(data_loader), + bar_format="{l_bar}{r_bar}") + + avg_loss = 0.0 + total_correct = 0 + total_element = 0 + plabels = [] + tlabels = [] + probabs = [] + + if phase == "train": + self.model.train() + else: + self.model.eval() + # self.probability_list = [] + + with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f: + sys.stdout = f + for i, data in data_iter: + # 0. batch_data will be sent into the device(GPU or cpu) + # print(data_pair[0]) + data = {key: value.to(self.device) for key, value in data[0].items()} + # print(f"data : {data}") + # data = {key: value.to(self.device) for key, value in data.items()} + + # if phase == "train": + # logits = self.model.forward(data["input"], data["segment_label"], data["feat"]) + # else: + with torch.no_grad(): + # logits = self.model.forward(data["input"], data["segment_label"], data["feat"]) + logits = self.model.forward(data) + + loss = self.criterion(logits, data["label"]) + if torch.cuda.device_count() > 1: + loss = loss.mean() + + # 3. backward and optimization only in train + # if phase == "train": + # self.optim_schedule.zero_grad() + # loss.backward() + # self.optim_schedule.step_and_update_lr() + + # prediction accuracy + probs = nn.Softmax(dim=-1)(logits) # Probabilities + probabs.extend(probs.detach().cpu().numpy().tolist()) + predicted_labels = torch.argmax(probs, dim=-1) #correct + # self.probability_list.append(probs) + # true_labels = torch.argmax(data["label"], dim=-1) + plabels.extend(predicted_labels.cpu().numpy()) + tlabels.extend(data['label'].cpu().numpy()) + positive_class_probs = [prob[1] for prob in probabs] + + # Compare predicted labels to true labels and calculate accuracy + correct = (data['label'] == predicted_labels).sum().item() + + avg_loss += loss.item() + total_correct += correct + # total_element += true_labels.nelement() + total_element += data["label"].nelement() + # print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element) + + post_fix = { + "epoch": epoch, + "iter": i, + "avg_loss": avg_loss / (i + 1), + "avg_acc": total_correct / total_element * 100 if total_element != 0 else 0, + "loss": loss.item() + } + if i % self.log_freq == 0: + data_iter.write(str(post_fix)) + + precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0) + recalls = recall_score(tlabels, plabels, average="weighted") + f1_scores = f1_score(tlabels, plabels, average="weighted") + cmatrix = confusion_matrix(tlabels, plabels) + auc_score = roc_auc_score(tlabels, positive_class_probs) + end_time = time.time() + final_msg = { + "avg_loss": avg_loss / len(data_iter), + "total_acc": total_correct * 100.0 / total_element, + "precisions": precisions, + "recalls": recalls, + "f1_scores": f1_scores, + "auc_score":auc_score, + # "confusion_matrix": f"{cmatrix}", + # "true_labels": f"{tlabels}", + # "predicted_labels": f"{plabels}", + "time_taken_from_start": end_time - self.start_time + } + with open("result.txt", 'w') as file: + for key, value in final_msg.items(): + file.write(f"{key}: {value}\n") + with open("plabels.txt","w") as file: + file.write(plabels) + + print(final_msg) + fpr, tpr, thresholds = roc_curve(tlabels, positive_class_probs) + f.close() + with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1: + sys.stdout = f1 + final_msg = { + + "confusion_matrix": f"{cmatrix}", + "true_labels": f"{tlabels if epoch == 0 else ''}", + "predicted_labels": f"{plabels}", + "probabilities": f"{probabs}", + "time_taken_from_start": end_time - self.start_time + } + print(final_msg) + f1.close() + sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ + + + +def train(): + parser = argparse.ArgumentParser() + + parser.add_argument('-workspace_name', type=str, default=None) + parser.add_argument('-code', type=str, default=None, help="folder for pretraining outputs and logs") + parser.add_argument('-finetune_task', type=str, default=None, help="folder inside finetuning") + parser.add_argument("-attention", type=bool, default=False, help="analyse attention scores") + parser.add_argument("-diff_test_folder", type=bool, default=False, help="use for different test folder") + parser.add_argument("-embeddings", type=bool, default=False, help="get and analyse embeddings") + parser.add_argument('-embeddings_file_name', type=str, default=None, help="file name of embeddings") + parser.add_argument("-pretrain", type=bool, default=False, help="pretraining: true, or false") + # parser.add_argument('-opts', nargs='+', type=str, default=None, help='List of optional steps') + parser.add_argument("-max_mask", type=int, default=0.15, help="% of input tokens selected for masking") + # parser.add_argument("-p", "--pretrain_dataset", type=str, default="pretraining/pretrain.txt", help="pretraining dataset for bert") + # parser.add_argument("-pv", "--pretrain_val_dataset", type=str, default="pretraining/test.txt", help="pretraining validation dataset for bert") +# default="finetuning/test.txt", + parser.add_argument("-vocab_path", type=str, default="pretraining/vocab.txt", help="built vocab model path with bert-vocab") + + parser.add_argument("-train_dataset_path", type=str, default="train.txt", help="fine tune train dataset for progress classifier") + parser.add_argument("-val_dataset_path", type=str, default="val.txt", help="test set for evaluate fine tune train set") + parser.add_argument("-test_dataset_path", type=str, default="test.txt", help="test set for evaluate fine tune train set") + parser.add_argument("-num_labels", type=int, default=2, help="Number of labels") + parser.add_argument("-train_label_path", type=str, default="train_label.txt", help="fine tune train dataset for progress classifier") + parser.add_argument("-val_label_path", type=str, default="val_label.txt", help="test set for evaluate fine tune train set") + parser.add_argument("-test_label_path", type=str, default="test_label.txt", help="test set for evaluate fine tune train set") + ##### change Checkpoint for finetuning + parser.add_argument("-pretrained_bert_checkpoint", type=str, default=None, help="checkpoint of saved pretrained bert model") + parser.add_argument("-finetuned_bert_classifier_checkpoint", type=str, default=None, help="checkpoint of saved finetuned bert model") #."output_feb09/bert_trained.model.ep40" + #."output_feb09/bert_trained.model.ep40" + parser.add_argument('-check_epoch', type=int, default=None) + + parser.add_argument("-hs", "--hidden", type=int, default=64, help="hidden size of transformer model") #64 + parser.add_argument("-l", "--layers", type=int, default=4, help="number of layers") #4 + parser.add_argument("-a", "--attn_heads", type=int, default=4, help="number of attention heads") #8 + parser.add_argument("-s", "--seq_len", type=int, default=128, help="maximum sequence length") + + parser.add_argument("-b", "--batch_size", type=int, default=500, help="number of batch_size") #64 + parser.add_argument("-e", "--epochs", type=int, default=1)#1501, help="number of epochs") #501 + # Use 50 for pretrain, and 10 for fine tune + parser.add_argument("-w", "--num_workers", type=int, default=0, help="dataloader worker size") + + # Later run with cuda + parser.add_argument("--with_cuda", type=bool, default=False, help="training with CUDA: true, or false") + parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") + # parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") + parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") + # parser.add_argument("--on_memory", type=bool, default=False, help="Loading on memory: true or false") + + parser.add_argument("--dropout", type=float, default=0.1, help="dropout of network") + parser.add_argument("--lr", type=float, default=1e-05, help="learning rate of adam") #1e-3 + parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") + parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") + parser.add_argument("--adam_beta2", type=float, default=0.98, help="adam first beta value") #0.999 + + parser.add_argument("-o", "--output_path", type=str, default="bert_trained.seq_encoder.model", help="ex)output/bert.model") + # parser.add_argument("-o", "--output_path", type=str, default="output/bert_fine_tuned.model", help="ex)output/bert.model") + + args = parser.parse_args() + for k,v in vars(args).items(): + if 'path' in k: + if v: + if k == "output_path": + if args.code: + setattr(args, f"{k}", args.workspace_name+f"/output/{args.code}/"+v) + elif args.finetune_task: + setattr(args, f"{k}", args.workspace_name+f"/output/{args.finetune_task}/"+v) + else: + setattr(args, f"{k}", args.workspace_name+"/output/"+v) + elif k != "vocab_path": + if args.pretrain: + setattr(args, f"{k}", args.workspace_name+"/pretraining/"+v) + else: + if args.code: + setattr(args, f"{k}", args.workspace_name+f"/{args.code}/"+v) + elif args.finetune_task: + if args.diff_test_folder and "test" in k: + setattr(args, f"{k}", args.workspace_name+f"/finetuning/"+v) + else: + setattr(args, f"{k}", args.workspace_name+f"/finetuning/{args.finetune_task}/"+v) + else: + setattr(args, f"{k}", args.workspace_name+"/finetuning/"+v) + else: + setattr(args, f"{k}", args.workspace_name+"/"+v) + + print(f"args.{k} : {getattr(args, f'{k}')}") + + print("Loading Vocab", args.vocab_path) + vocab_obj = Vocab(args.vocab_path) + vocab_obj.load_vocab() + print("Vocab Size: ", len(vocab_obj.vocab)) + + + print("Testing using finetuned model......") + print("Loading Test Dataset", args.test_dataset_path) + test_dataset = TokenizerDataset(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len) + # test_dataset = TokenizerDatasetForCalibration(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len) + + print("Creating Dataloader...") + test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + + print("Load fine-tuned BERT classifier model with feats") + # cuda_condition = torch.cuda.is_available() and args.with_cuda + device = torch.device("cpu") #torch.device("cuda:0" if cuda_condition else "cpu") + finetunedBERTclassifier = torch.load(args.finetuned_bert_classifier_checkpoint, map_location=device) + if isinstance(finetunedBERTclassifier, torch.nn.DataParallel): + finetunedBERTclassifier = finetunedBERTclassifier.module + + new_log_folder = f"{args.workspace_name}/logs" + new_output_folder = f"{args.workspace_name}/output" + if args.finetune_task: # is sent almost all the time + new_log_folder = f"{args.workspace_name}/logs/{args.finetune_task}" + new_output_folder = f"{args.workspace_name}/output/{args.finetune_task}" + + if not os.path.exists(new_log_folder): + os.makedirs(new_log_folder) + if not os.path.exists(new_output_folder): + os.makedirs(new_output_folder) + + print("Creating BERT Fine Tuned Test Trainer") + trainer = BERTFineTuneTrainer(finetunedBERTclassifier, + len(vocab_obj.vocab), test_dataloader=test_data_loader, + lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, + with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq, + workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder) + + # trainer = BERTFineTuneCalibratedTrainer(finetunedBERTclassifier, + # len(vocab_obj.vocab), test_dataloader=test_data_loader, + # lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, + # with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq, + # workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder) + print("Testing fine-tuned model Start....") + start_time = time.time() + repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs) + counter = 0 + # patience = 10 + for epoch in repoch: + print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}') + trainer.test(epoch) + # pickle.dump(trainer.probability_list, open(f"{args.workspace_name}/output/aaai/change4_mid_prob_{epoch}.pkl","wb")) + print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n') + end_time = time.time() + print("Time Taken to fine-tune model = ", end_time - start_time) + print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}') + + + +if __name__ == "__main__": + train() \ No newline at end of file diff --git a/plot.png b/plot.png new file mode 100644 index 0000000000000000000000000000000000000000..c376bb1abde7ba8cc3a1740c9dc1a8d60b0ea105 Binary files /dev/null and b/plot.png differ diff --git a/prepare_pretraining_input_vocab_file.py b/prepare_pretraining_input_vocab_file.py new file mode 100644 index 0000000000000000000000000000000000000000..22ff78becb446f3719c54fc1d3eb2c4ac4f7e3c5 --- /dev/null +++ b/prepare_pretraining_input_vocab_file.py @@ -0,0 +1,4755 @@ +import argparse +import pickle +import random +import copy +import pandas as pd +import numpy as np +from collections import Counter +import os +from data_preprocessor import DataPreprocessor + +def prepare_pretraining_files(data_processor, options): + + # steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + + val_file = open(options.val_file_path, "w") + val_info = open(options.val_info_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + + + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + # if options.workspace_name == section: + if "ratio_proportion_change3" == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + # step_names_token = [step for step in prob_groups['Step Name'] if str(step) != 'nan'] + # print(step_names_token) + + # writtenTrain = False + # writtenTest = False + + student_groups.sort_values(by="Time", inplace=True) + # prob_list = list(pd.unique(student_groups["Problem Name"])) + # print(len(prob_list), prob_list) + + # first_prob_list = prob_list[:3] + # last_prob_list = prob_list[-3:] + # print(len(first_prob_list), first_prob_list) + # print(len(last_prob_list), last_prob_list) + + # final_prob_list = first_prob_list + last_prob_list + # print(len(prob_list), len(final_prob_list), final_prob_list) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + # if not prob in final_prob_list: + # continue + # print(prob) + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups["Step Name"])) + unique_steps_len = len(set([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not(s in options.opt_step2)])) + if unique_steps_len < 4: + continue + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 1800: + time_stamps_list.add(time_stamps[i+1]) + + # progress = "" + + step_names_token = [] + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + + for index, row in prob_groups[['Time', 'Step Name', 'CF (Etalon)', 'Outcome', 'Help Level','CF (Workspace Progress Status)']].iterrows(): + + step = row["Step Name"] + progress = row["CF (Workspace Progress Status)"] + etalon = row["CF (Etalon)"] + + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + unique_steps_len = len(set([s for s in step_names_token if not (s in options.opt_step1) and not(s in options.opt_step2)])) + + # 4 and more in sequence + if step_names_token and unique_steps_len > 4: + # and len(step_names_token) > 3 + # For information + # indices = [str(i) for i in prob_groups.index] + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + proba = random.random() + + # if prob in first_prob_list: + if proba <= 0.8: + # writtenTrain = True + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + # train_info.write(",".join([str(progress),str(prob), str(student), str(len(step_names_token)), + # "\t".join(map(str, outcome)), "\t".join(map(str, help_level))])) + # progress, problem name, student id, auto_complete, total steps length, er or me, outcome seq, help_level seq, encoding in steps length + train_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + f"{1 if means_and_extremes else 0}", "\t".join(map(str, outcome)), + "\t".join(map(str, help_level)), "\t".join(map(str, where_opt))])) + train_info.write("\n") + + elif proba > 0.9: + # elif prob in last_prob_list: + + # writtenTest = True + + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + # test_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + # "\t".join(map(str, outcome)), "\t".join(map(str, help_level))])) + # progress, problem name, student id, total steps length, er or me, outcome seq, help_level seq, encoding in steps length + test_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + f"{1 if means_and_extremes else 0}", "\t".join(map(str, outcome)), + "\t".join(map(str, help_level)), "\t".join(map(str, where_opt))])) + test_info.write("\n") + else: + val_file.write("\t".join(step_names_token)) + val_file.write("\n") + # test_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + # "\t".join(map(str, outcome)), "\t".join(map(str, help_level))])) + # progress, problem name, student id, total steps length, er or me, outcome seq, help_level seq, encoding in steps length + val_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + f"{1 if means_and_extremes else 0}", "\t".join(map(str, outcome)), + "\t".join(map(str, help_level)), "\t".join(map(str, where_opt))])) + val_info.write("\n") + # Indicates actions of next student + # Indicates next problem + # if writtenTrain: + # train_file.write("\n") + # train_info.write("\n") + # if writtenTest: + # test_file.write("\n") + # test_info.write("\n") + # if not writtenTrain and not writtenTest: + # print(f"Student {student} is not involved in workspace : {options.workspace_name}.") + + + train_file.close() + train_info.close() + + val_file.close() + val_info.close() + + test_file.close() + test_info.close() + +def prepare_school_pretraining_files(data_processor, options): + + # steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + + val_file = open(options.val_file_path, "w") + val_info = open(options.val_info_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + + + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + for class_id, class_group in school_group.groupby('CF (Anon Class Id)'): + for student, student_groups in class_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time") + # prob_list = list(pd.unique(student_groups["Problem Name"])) + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + # if not prob in final_prob_list: + # continue + # print(prob) + step_names_token = [] + means_and_extremes = False + for index, row in prob_groups[['Time', 'Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', 'CF (Workspace Progress Status)', 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + progress = row["CF (Workspace Progress Status)"] + action = row["Action"] + attempt = row["Attempt At Step"] + autofilled = row["CF (Is Autofilled)"] + step = row["Step Name"] + scenario = row['CF (Problem Scenario Tags)'] + + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + + if not autofilled: + new_step = f"{step}:{action}:{attempt}" + step_names_token.append(new_step) + + if step_names_token: + where_opt = [] + step1 = False + step2 = False + strategy_data = False + for step_oh in step_names_token: + step = step_oh.split(":") + if len(step) == 3: + step = step[0] + else: + step = ":".join(step[:2]) + + # print(f"changed {step_oh} = ? {step}") + if step == options.opt_step1[0]: + where_opt.append("_1") + step1 = True + elif step == options.opt_step2[0]: + where_opt.append("_2") + step2 = True + elif step in options.opt_step1[1:]: + where_opt.append("1") + if step1: + strategy_data = True + elif step in options.opt_step2[1:]: + where_opt.append("2") + if step2: + strategy_data = True + else: + where_opt.append("0") + + if strategy_data and step_names_token[-1].split(":")[-2] != "Done": + strategy_data = False + + if strategy_data: + proba = random.random() + step_names_tokens = [":".join(s.split(":")[:-2]) for s in step_names_token] + step_names_token = [] + for s in step_names_tokens: + if s != "nan": + if not step_names_token or s != step_names_token[-1]: + step_names_token.append(s) + # if prob in first_prob_list: + if proba <= 0.8: + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + # school, class, student id, progress, problem name, scenario, prefered ER or ME, total steps length, encoding in steps length + train_info.write(",".join([str(school), str(class_id), str(student), str(progress), str(prob), str(scenario), f"{1 if means_and_extremes else 0}", str(len(step_names_token)), "\t".join(map(str, where_opt))])) + train_info.write("\n") + + elif proba > 0.9: + # elif prob in last_prob_list: + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + # school, class, student id, progress, problem name, scenario, prefered ER or ME, total steps length, encoding in steps length + test_info.write(",".join([str(school), str(class_id), str(student), str(progress), str(prob), str(scenario), f"{1 if means_and_extremes else 0}", str(len(step_names_token)), "\t".join(map(str, where_opt))])) + test_info.write("\n") + + else: + val_file.write("\t".join(step_names_token)) + val_file.write("\n") + # school, class, student id, progress, problem name, scenario, prefered ER or ME, total steps length, encoding in steps length + val_info.write(",".join([str(school), str(class_id), str(student), str(progress), str(prob), str(scenario), f"{1 if means_and_extremes else 0}", str(len(step_names_token)), "\t".join(map(str, where_opt))])) + val_info.write("\n") + # break + # break + # break + # break + # break + + + + train_file.close() + train_info.close() + + val_file.close() + val_info.close() + + test_file.close() + test_info.close() + +def prepare_school_coded_pretraining_files(data_processor, options): + + # steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + + val_file = open(options.val_file_path, "w") + val_info = open(options.val_info_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + + + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + print(f"{school} : {school_group.shape}") + school_group = school_group[(school_group['CF (Is StepByStep)'] == False) & + (school_group['CF (Encounter)'] == 0) & + (school_group['CF (Is Review Mode)'] == -1) ] + print(f"{school} : {school_group.shape}") + # for class_id, class_group in school_groups.groupby('CF (Anon Class Id)'): + for student, student_groups in school_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + # At least 3 last problems are selected + prob_list= list(student_groups[student_groups["CF (Workspace Progress Status)"]=="GRADUATED"]["Problem Name"]) + prob_list = prob_list[-int(len(prob_list)/2):] + for prob, prob_groups in student_groups.groupby("Problem Name"): + if not prob in prob_list: + continue + progress = list(pd.unique(prob_groups["CF (Workspace Progress Status)"]))[0] + if progress != "GRADUATED": + continue + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups[prob_groups["CF (Is Autofilled)"] == False]["Step Name"])) + unique_steps_len = len([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not (s in options.opt_step2)]) + if unique_steps_len < 4: + continue + class_id = list(pd.unique(prob_groups["CF (Anon Class Id)"])) + step_names_token = [] + original_steps_actions_attempts_help_levels_outcomes = [] + original_steps = [] + means_and_extremes = False + for index, row in prob_groups[['Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', + 'Outcome', 'Help Level', 'CF (Workspace Progress Status)', + 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + step = row["Step Name"] + action = row["Action"] # ['Attempt', 'Hint Request', 'Hint Level Change', 'Done'] + attempt = row["Attempt At Step"] # number + outcome = row["Outcome"] # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + help_level = row["Help Level"] # number + # progress = row["CF (Workspace Progress Status)"] + scenario = row['CF (Problem Scenario Tags)'] + + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + if row['CF (Is Autofilled)'] == True: + continue + prev = step_names_token[-1] if step_names_token else "" + prev_step = step_names_token[-1].split("-")[0] if step_names_token else "" + + if not step_names_token or step != prev_step: + if step in options.opt_step1 or step in options.opt_step2: + new_step = step + else: + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + step_names_token.append(new_step) + else: + if not (step in options.opt_step1 or step in options.opt_step2): + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + if prev < new_step: + step_names_token[-1] = new_step + original_steps_actions_attempts_help_levels_outcomes.append(f"{step}-{action}-{attempt}-{help_level}-{outcome}") + original_steps.append(step) + + unique_steps_len = len([s for s in original_steps if not (s in options.opt_step1) and not(s in options.opt_step2)]) + if step_names_token and unique_steps_len > 4: + proba = random.random() + # if prob in first_prob_list: + if proba <= 0.8: + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + # school, class, student id, progress, problem name, scenario, + # prefered ER or ME, total steps length, + # original seq-action-attempt-help_level-outcome + train_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) + train_info.write("\n") + + elif proba > 0.9: + # elif prob in last_prob_list: + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + # school, class, student id, progress, problem name, scenario, + # prefered ER or ME, total steps length, + # original seq-action-attempt-help_level-outcome + test_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) + test_info.write("\n") + + else: + val_file.write("\t".join(step_names_token)) + val_file.write("\n") + # school, class, student id, progress, problem name, scenario, + # prefered ER or ME, total steps length, + # original seq-action-attempt-help_level-outcome + val_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) + val_info.write("\n") + # break + # break + # break + # break + # break + + + + train_file.close() + train_info.close() + + val_file.close() + val_info.close() + + test_file.close() + test_info.close() + + +def prepare_school_attention_files(data_processor, options): + + # steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + + val_file = open(options.val_file_path, "w") + val_info = open(options.val_info_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + + + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + for class_id, class_group in school_group.groupby('CF (Anon Class Id)'): + for student, student_groups in class_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time") +# prob_list = list(pd.unique(student_groups["Problem Name"])) +# if len(prob_list) > 0 : +# first_fews = int(len(prob_list)/2) +# last_fews = len(prob_list) - first_fews +# first_prob_list = prob_list[:first_fews] +# last_prob_list = prob_list[-last_fews:] + + # final_prob_list = first_prob_list + last_prob_list + for prob, prob_groups in student_groups.groupby("Problem Name"): + step_names_token = [] + means_and_extremes = False + for index, row in prob_groups[['Time', 'Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', 'CF (Workspace Progress Status)', 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + progress = row["CF (Workspace Progress Status)"] + action = row["Action"] + attempt = row["Attempt At Step"] + autofilled = row["CF (Is Autofilled)"] + step = row["Step Name"] + scenario = row['CF (Problem Scenario Tags)'] + + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + + if not autofilled: + new_step = f"{step}:{action}:{attempt}" + step_names_token.append(new_step) + + if step_names_token: + where_opt = [] + step1 = False + step2 = False + strategy_data = False + for step_oh in step_names_token: + step = step_oh.split(":") + if len(step) == 3: + step = step[0] + else: + step = ":".join(step[:2]) + + # print(f"changed {step_oh} = ? {step}") + if step == options.opt_step1[0]: + where_opt.append("_1") + step1 = True + elif step == options.opt_step2[0]: + where_opt.append("_2") + step2 = True + elif step in options.opt_step1[1:]: + where_opt.append("1") + if step1: + strategy_data = True + elif step in options.opt_step2[1:]: + where_opt.append("2") + if step2: + strategy_data = True + else: + where_opt.append("0") + + if strategy_data and step_names_token[-1].split(":")[-2] != "Done": + strategy_data = False + + if strategy_data: + # proba = random.random() + step_names_tokens = [":".join(s.split(":")[:-2]) for s in step_names_token] + step_names_token = [] + for s in step_names_tokens: + if s != "nan": + if not step_names_token or s != step_names_token[-1]: + step_names_token.append(s) + # if prob in first_prob_list: + if progress == "GRADUATED":# and means_and_extremes:# and prob in first_prob_list: + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + # school, class, student id, progress, problem name, scenario, prefered ER or ME, total steps length, encoding in steps length + train_info.write(",".join([str(school), str(class_id), str(student), str(progress), str(prob), str(scenario), f"{1 if means_and_extremes else 0}", str(len(step_names_token)), "\t".join(map(str, where_opt))])) + train_info.write("\n") + + elif progress == "PROMOTED":# and means_and_extremes:# and prob in last_prob_list: + # elif prob in last_prob_list: + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + # school, class, student id, progress, problem name, scenario, prefered ER or ME, total steps length, encoding in steps length + test_info.write(",".join([str(school), str(class_id), str(student), str(progress), str(prob), str(scenario), f"{1 if means_and_extremes else 0}", str(len(step_names_token)), "\t".join(map(str, where_opt))])) + test_info.write("\n") + + # else: + # val_file.write("\t".join(step_names_token)) + # val_file.write("\n") + # # school, class, student id, progress, problem name, scenario, prefered ER or ME, total steps length, encoding in steps length + # val_info.write(",".join([str(school), str(class_id), str(student), str(progress), str(prob), str(scenario), f"{1 if means_and_extremes else 0}", str(len(step_names_token)), "\t".join(map(str, where_opt))])) + # val_info.write("\n") + # break + # break + # break + # break + # break + + + + train_file.close() + train_info.close() + + val_file.close() + val_info.close() + + test_file.close() + test_info.close() + +def prepare_finetuning_10per_files(data_processor, options): + ''' + Used for L@S paper. + Only two strategies were defined as: + 0: non-opt strategy + 1: opt used strategy + ''' + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + +# val_file = open(options.val_file_path, "w") +# val_info = open(options.val_info_path, "w") +# val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if "ratio_proportion_change3" == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + for prob, prob_groups in student_groups.groupby("Problem Name"): + + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups["Step Name"])) + unique_steps_len = len(set([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not(s in options.opt_step2)])) + if unique_steps_len < 4: + continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 1800: + time_stamps_list.add(time_stamps[i+1]) + + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + auto_complete = True + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + unique_steps_len = len(set([s for s in step_names_token if not (s in options.opt_step1) and not(s in options.opt_step2)])) + # 4 and more in sequence + if step_names_token and unique_steps_len > 4: + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + if any_opt1: + label_opt = "1" + + if options.opt_step2: + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "1" + + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + "\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), + f"{1 if means_and_extremes else 0}"]) + overall_data.append(["\t".join(step_names_token), info]) + overall_labels.append(label_opt) + + # overall_data.append('') + # overall_labels.append('') + + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + + train_len = int(len(overall_labels) * 0.10) + sample_size = int(train_len/2) + print(f"sample_size: {sample_size}") + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + + balanced_test = min(len(indices_of_zeros), len(indices_of_ones)) + test_sampled_instances = random.sample(indices_of_zeros, balanced_test) + test_sampled_instances.extend(random.sample(indices_of_ones, balanced_test)) + + # writtenTrain = False + # writtenTest = False + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + + steps_seq = all_data[0] + info = all_data[1] + + if index in sampled_instances: + train_file.write(steps_seq) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label) + train_label.write("\n") + elif index in test_sampled_instances: + # proba = random.random() + # if proba <0.5: + test_file.write(steps_seq) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label) + test_label.write("\n") +# else: +# val_file.write(steps_seq) +# val_file.write("\n") + +# val_info.write(info) +# val_info.write("\n") + +# val_label.write(label) +# val_label.write("\n") + + train_file.close() + train_info.close() + train_label.close() + + # val_file.close() + # val_info.close() + # val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_finetuning_IS_FS_files(data_processor, options): + ''' + Used for L@S paper. This function gathers first three problems of each student. + Only two strategies were defined as: + 0: non-opt strategy + 1: opt used strategy + train: IS + test: FS + ''' + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + +# val_file = open(options.val_file_path, "w") +# val_info = open(options.val_info_path, "w") +# val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if "ratio_proportion_change3" == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + + prob_list = list(pd.unique(student_groups["Problem Name"])) + if len(prob_list) < 3: + continue + selected = 3 #1. int(len(prob_list)/2) + #2. 3 & <6 + #3. 3 & <3 + first_prob_list = prob_list[:selected] + last_prob_list = prob_list[-selected:] + + for prob, prob_groups in student_groups.groupby("Problem Name"): + + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups["Step Name"])) + unique_steps_len = len(set([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not(s in options.opt_step2)])) + if unique_steps_len < 4: + continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 1800: + time_stamps_list.add(time_stamps[i+1]) + + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + auto_complete = True + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + unique_steps_len = len(set([s for s in step_names_token if not (s in options.opt_step1) and not(s in options.opt_step2)])) + # 4 and more in sequence + if step_names_token and unique_steps_len > 4: + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + if any_opt1: + label_opt = "1" + + if options.opt_step2: + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "1" + + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + "\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), + f"{1 if means_and_extremes else 0}"]) + if prob in first_prob_list: + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label_opt) + train_label.write("\n") + elif prob in last_prob_list: + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label_opt) + test_label.write("\n") + + train_file.close() + train_info.close() + train_label.close() + + # val_file.close() + # val_info.close() + # val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_finetuning_IS_files_old(data_processor, opts): + ''' + Used for L@S paper. This function gathers first three problems of each student. + Only two strategies were defined as: + 0: non-opt strategy + 1: opt used strategy + ''' + + options = copy.deepcopy(opts) + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test"): + if v: + f_path = v.split("/") + f_path = f_path[0]+"/"+f_path[1]+"/IS/"+f_path[2] + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + trainr_label = open(options.trainr_label_path, "w") + train_gt_label = open(options.train_gt_label_path, "w") + + # test_file = open(options.test_file_path, "w") + # test_info = open(options.test_info_path, "w") + # test_label = open(options.test_label_path, "w") + # testr_label = open(options.testr_label_path, "w") + # test_gt_label = open(options.test_gt_label_path, "w") + + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if options.workspace_name == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + if len(prob_list) < 3: + continue + + first_prob_list = prob_list[:3] +# last_prob_list = prob_list[-3:] +# # print(len(first_prob_list), len(last_prob_list)) + +# final_prob_list = first_prob_list + last_prob_list + # print(len(prob_list), len(final_prob_list)) #, final_prob_list) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + if not prob in first_prob_list: + continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + finals = len(options.final_step) + totals = 0 + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + if finals == 0: + totals += 1 + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + errors = 0 + for step, out in zip(step_names_token, outcome): + if (finals and step in options.final_step) or totals > 0: + out = out.split(":") + if any(any(ind in o for o in out) for ind in error_ind): + errors +=1 + + if finals: + totals = finals + # 4 and more in sequence + if step_names_token: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + if any_opt1: + label_opt = "1" + + + if options.opt_step2: + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "1" + + correctness = 1 - errors/totals + strat_correct = "0" + if correctness > 0.75: + strat_correct = "1" + + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)),"\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), str(correctness)]) + + overall_data.append(["\t".join(step_names_token), strat_correct, info, f"{1 if means_and_extremes else 0}"]) + overall_labels.append(label_opt) + + overall_data.append('') + overall_labels.append('') + +# overall_labels = np.array(overall_labels) +# indices_of_zeros = list(np.where(overall_labels == '0')[0]) +# indices_of_ones = list(np.where(overall_labels == '1')[0]) + +# zeros_instances_size = int(1 * len(indices_of_zeros)) +# ones_instances_size = int(1 * len(indices_of_ones)) +# sample_size = min(zeros_instances_size, ones_instances_size) +# sampled_instances = random.sample(indices_of_zeros, sample_size) +# sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + + writtenTrain = False + # writtenTest = False + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + if all_data: + steps_seq = all_data[0] + strat_correct = all_data[1] + info = all_data[2] + me_opt = all_data[3] + + # if index in sampled_instances: + writtenTrain = True + train_file.write(steps_seq) + train_file.write("\n") + train_label.write(label) + train_label.write("\n") + trainr_label.write(strat_correct) + trainr_label.write("\n") + train_info.write(info) + train_info.write("\n") + train_gt_label.write(me_opt) + train_gt_label.write("\n") + # else: + # writtenTest = True + # test_file.write(steps_seq) + # test_file.write("\n") + # # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + # test_label.write(label) + # test_label.write("\n") + # # testr_label.write(str(correctness)) + # testr_label.write(strat_correct) + # testr_label.write("\n") + # test_info.write(info) + # test_info.write("\n") + # test_gt_label.write(me_opt) + # test_gt_label.write("\n") + else: + # Indicates actions of next student + # Indicates next problem + if writtenTrain: + writtenTrain = False + train_file.write("\n") + train_info.write("\n") + train_label.write("\n") + trainr_label.write("\n") + train_gt_label.write("\n") + # if writtenTest: + # writtenTest = False + # test_file.write("\n") + # test_info.write("\n") + # test_label.write("\n") + # testr_label.write("\n") + # test_gt_label.write("\n") + + train_file.close() + train_info.close() + train_label.close() + trainr_label.close() + train_gt_label.close() + + # test_file.close() + # test_info.close() + # test_label.close() + # testr_label.close() + # test_gt_label.close() + +def prepare_finetuning_FS_files_old(data_processor, opts): + ''' + Used for L@S paper. This function gathers last three problems of each student. + Only two strategies were defined as: + 0: non-opt strategy + 1: opt used strategy + ''' + + options = copy.deepcopy(opts) + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test"): + if v: + f_path = v.split("/") + f_path = f_path[0]+"/"+f_path[1]+"/FS/"+f_path[2] + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + trainr_label = open(options.trainr_label_path, "w") + train_gt_label = open(options.train_gt_label_path, "w") + + # test_file = open(options.test_file_path, "w") + # test_info = open(options.test_info_path, "w") + # test_label = open(options.test_label_path, "w") + # testr_label = open(options.testr_label_path, "w") + # test_gt_label = open(options.test_gt_label_path, "w") + + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if options.workspace_name == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + if len(prob_list) < 3: + continue + + # first_prob_list = prob_list[:3] + last_prob_list = prob_list[-3:] +# # print(len(first_prob_list), len(last_prob_list)) + +# final_prob_list = first_prob_list + last_prob_list + # print(len(prob_list), len(final_prob_list)) #, final_prob_list) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + if not prob in last_prob_list: + continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + finals = len(options.final_step) + totals = 0 + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + if finals == 0: + totals += 1 + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + errors = 0 + for step, out in zip(step_names_token, outcome): + if (finals and step in options.final_step) or totals > 0: + out = out.split(":") + if any(any(ind in o for o in out) for ind in error_ind): + errors +=1 + + if finals: + totals = finals + # 4 and more in sequence + if step_names_token: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + if any_opt1: + label_opt = "1" + + + if options.opt_step2: + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "1" + + correctness = 1 - errors/totals + strat_correct = "0" + if correctness > 0.75: + strat_correct = "1" + + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)),"\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), str(correctness)]) + + overall_data.append(["\t".join(step_names_token), strat_correct, info, f"{1 if means_and_extremes else 0}"]) + overall_labels.append(label_opt) + + overall_data.append('') + overall_labels.append('') + +# overall_labels = np.array(overall_labels) +# indices_of_zeros = list(np.where(overall_labels == '0')[0]) +# indices_of_ones = list(np.where(overall_labels == '1')[0]) + +# zeros_instances_size = int(0.10 * len(indices_of_zeros)) +# ones_instances_size = int(0.10 * len(indices_of_ones)) +# sample_size = min(zeros_instances_size, ones_instances_size) +# sampled_instances = random.sample(indices_of_zeros, sample_size) +# sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + + writtenTrain = False + # writtenTest = False + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + if all_data: + steps_seq = all_data[0] + strat_correct = all_data[1] + info = all_data[2] + me_opt = all_data[3] + + # if index in sampled_instances: + writtenTrain = True + train_file.write(steps_seq) + train_file.write("\n") + train_label.write(label) + train_label.write("\n") + trainr_label.write(strat_correct) + trainr_label.write("\n") + train_info.write(info) + train_info.write("\n") + train_gt_label.write(me_opt) + train_gt_label.write("\n") + # else: + # writtenTest = True + # test_file.write(steps_seq) + # test_file.write("\n") + # # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + # test_label.write(label) + # test_label.write("\n") + # # testr_label.write(str(correctness)) + # testr_label.write(strat_correct) + # testr_label.write("\n") + # test_info.write(info) + # test_info.write("\n") + # test_gt_label.write(me_opt) + # test_gt_label.write("\n") + else: + # Indicates actions of next student + # Indicates next problem + if writtenTrain: + writtenTrain = False + train_file.write("\n") + train_info.write("\n") + train_label.write("\n") + trainr_label.write("\n") + train_gt_label.write("\n") + # if writtenTest: + # writtenTest = False + # test_file.write("\n") + # test_info.write("\n") + # test_label.write("\n") + # testr_label.write("\n") + # test_gt_label.write("\n") + + train_file.close() + train_info.close() + train_label.close() + trainr_label.close() + train_gt_label.close() + + # test_file.close() + # test_info.close() + # test_label.close() + # testr_label.close() + # test_gt_label.close() + + +def prepare_finetuning_correctness_files(data_processor, options): + ''' + Ongoing research. Student strategy learning/predicting. + FinalAnswer step + Correct: 1 , correctness of final strategy > 0.75 + Incorrect: 0 , else < 0.75 + ''' + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + +# val_file = open(options.val_file_path, "w") +# val_info = open(options.val_info_path, "w") +# val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if "ratio_proportion_change3" == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + for prob, prob_groups in student_groups.groupby("Problem Name"): + + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups["Step Name"])) + unique_steps_len = len(set([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not(s in options.opt_step2)])) + if unique_steps_len < 4: + continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 1800: + time_stamps_list.add(time_stamps[i+1]) + + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + final_correct = 0 + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + auto_complete = True + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + if step == "FinalAnswer": + final_correct += 1 + unique_steps_len = len(set([s for s in step_names_token if not (s in options.opt_step1) and not(s in options.opt_step2)])) + # 4 and more in sequence + if step_names_token and unique_steps_len > 4: + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if final_correct == 1: + label_opt = "1" + + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + "\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), + f"{1 if means_and_extremes else 0}"]) + overall_data.append(["\t".join(step_names_token), info]) + overall_labels.append(label_opt) + + # overall_data.append('') + # overall_labels.append('') + + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + + train_len = int(len(overall_labels) * 0.10) + sample_size = int(train_len/2) + print(f"sample_size: {sample_size}") + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + + # writtenTrain = False + # writtenTest = False + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + + steps_seq = all_data[0] + info = all_data[1] + + if index in sampled_instances: + train_file.write(steps_seq) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label) + train_label.write("\n") + else: + # proba = random.random() + # if proba <0.5: + test_file.write(steps_seq) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label) + test_label.write("\n") +# else: +# val_file.write(steps_seq) +# val_file.write("\n") + +# val_info.write(info) +# val_info.write("\n") + +# val_label.write(label) +# val_label.write("\n") + + train_file.close() + train_info.close() + train_label.close() + + # val_file.close() + # val_info.close() + # val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_finetuning_correctness_files_old(data_processor, opts): + ''' + Ongoing research. Student strategy learning/predicting. + Correct, 1: correctness of final strategy > 0.75 + Incorrect, 0: else < 0.75 + ''' + options = copy.deepcopy(opts) + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test"): + if v: + f_path = v.split("/") + f_path = f_path[0]+"/"+f_path[1]+"/fa_correctness/"+f_path[2] + # f_path = f_path[0]+"/"+f_path[1]+"/check2/"+f_path[2] + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + # trainr_label = open(options.trainr_label_path, "w") + # train_gt_label = open(options.train_gt_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + # testr_label = open(options.testr_label_path, "w") + # test_gt_label = open(options.test_gt_label_path, "w") + ws = "_".join(options.workspace_name.split("_")[:-1]) + print("Workspace: ", ws) + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if ws == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + # if len(prob_list) < 3: + # continue + +# first_prob_list = prob_list[:3] + # last_prob_list = prob_list[-3:] +# # print(len(first_prob_list), len(last_prob_list)) + +# final_prob_list = first_prob_list + last_prob_list + # print(len(prob_list), len(final_prob_list)) #, final_prob_list) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only +# if not prob in last_prob_list: +# continue + # print(options.final_step in list(prob_groups["Step Name"])) + # if not (options.final_step in list(prob_groups["Step Name"])): + # continue + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + # finals = len(options.final_step) + + + totals = 0 + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + # if finals == 0: + # totals += 1 + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + errors = 0 + for step, out in zip(step_names_token, outcome): + if (step in options.final_step):# or totals > 0: + out = out.split(":") + totals = len(out) + # print(totals) + for ind in error_ind: + if ind in out: + errors +=1 + + # if finals: + # totals = finals + # 4 and more in sequence + if step_names_token and totals>0: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + all_opt1 = all(any(opt in step for step in step_names_token) for opt in options.opt_step1) + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + + if any_opt1: + label_opt = "2" + if all_opt1: + label_opt = "1" + + + if options.opt_step2: + all_opt2 = all(any(opt in step for step in step_names_token) for opt in options.opt_step2) + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "4" + if all_opt2: + label_opt = "3" + if any_opt1 and any_opt2: + label_opt = "5" + if any_opt1 and all_opt2: + label_opt = "6" + if all_opt1 and any_opt2: + label_opt = "7" + if all_opt1 and all_opt2: + label_opt = "8" + + + correctness = 1 - errors/totals + strat_correct = "0" + if correctness > 0.75: + strat_correct = "1" + + # if not means_and_extremes and label_opt == "2": + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)),"\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), str(correctness), f"{1 if means_and_extremes else 0}"]) + + overall_data.append(["\t".join(step_names_token), label_opt, info]) + overall_labels.append(strat_correct) + + overall_data.append('') + overall_labels.append('') + + overall_labels = np.array(overall_labels, dtype=str) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + + per = 0.20 + zeros_instances_size = int(per * len(indices_of_zeros)) + ones_instances_size = int(per * len(indices_of_ones)) + + sample_size = min(zeros_instances_size, ones_instances_size) + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + + writtenTrain = False + writtenTest = False + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + if all_data: + steps_seq = all_data[0] + label_opt = all_data[1] + info = all_data[2] + # me_opt = all_data[3] + + if index in sampled_instances: + writtenTrain = True + train_file.write(steps_seq) + train_file.write("\n") + train_label.write(label) + train_label.write("\n") + # trainr_label.write(label_opt) + # trainr_label.write("\n") + train_info.write(info) + train_info.write("\n") + # train_gt_label.write(me_opt) + # train_gt_label.write("\n") + else: + writtenTest = True + test_file.write(steps_seq) + test_file.write("\n") + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + test_label.write(label) + test_label.write("\n") + # testr_label.write(str(correctness)) + # testr_label.write(label_opt) + # testr_label.write("\n") + test_info.write(info) + test_info.write("\n") + # test_gt_label.write(me_opt) + # test_gt_label.write("\n") + else: + # Indicates actions of next student + # Indicates next problem + if writtenTrain: + writtenTrain = False + train_file.write("\n") + train_info.write("\n") + train_label.write("\n") + # trainr_label.write("\n") + # train_gt_label.write("\n") + if writtenTest: + writtenTest = False + test_file.write("\n") + test_info.write("\n") + test_label.write("\n") + # testr_label.write("\n") + # test_gt_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + # trainr_label.close() + # train_gt_label.close() + + test_file.close() + test_info.close() + test_label.close() + # testr_label.close() + # test_gt_label.close() + +def prepare_finetuning_correctness_aaai_files(data_processor, opts): + ''' + Ongoing research. Student strategy learning/predicting. + Correct, 1: correctness of final strategy > 0.75 + Incorrect, 0: else < 0.75 + ''' + options = copy.deepcopy(opts) + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test") or k.startswith("val"): + if v: + f_path = v.split("/") + # f_path = f_path[0]+"/"+f_path[1]+"/correctness/"+f_path[2] + f_path = f_path[0]+"/"+f_path[1]+"/aaai/"+f_path[2] + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + # steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + + val_file = open(options.val_file_path, "w") + val_info = open(options.val_info_path, "w") + val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + high_performer = pickle.load(open(f"{options.workspace_name}/aaai/change3_high_performers.pkl", "rb")) + mid_performer = pickle.load(open(f"{options.workspace_name}/aaai/change3_mid_performers.pkl", "rb")) + low_performer = pickle.load(open(f"{options.workspace_name}/aaai/change3_low_performers.pkl", "rb")) + prob_sel_list = pickle.load(open(f"{options.workspace_name}/aaai/change3_problem_list.pkl", "rb")) + + ws = "_".join(options.workspace_name.split("_")[:-1]) + + print(ws, len(high_performer), len(mid_performer), len(low_performer), len(prob_sel_list)) + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + # if options.workspace_name == section: + if ws == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + if student in high_performer or student in mid_performer or student in low_performer: + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + if not prob in prob_sel_list: + continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + totals = 0 + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + errors = 0 + for step, out in zip(step_names_token, outcome): + if (step in options.final_step): + out = out.split(":") + totals = len(out) + # print(totals) + for ind in error_ind: + if ind in out: + errors +=1 + + # 4 and more in sequence + if step_names_token and totals>0: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + + + correctness = 1 - errors/totals + strat_correct = "0" + if correctness > 0.75: + strat_correct = "1" + + # if not means_and_extremes and label_opt == "2": + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress), str(correctness), f"{1 if means_and_extremes else 0}",str(prob), str(student), str(auto_complete), str(len(step_names_token)),"\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt))]) + + overall_data.append(["\t".join(step_names_token), info]) + overall_labels.append(strat_correct) + + # overall_data.append('') + # overall_labels.append('') + + overall_labels = np.array(overall_labels) + + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + if all_data: + steps_seq = all_data[0] + info = all_data[1] + student = info.split(",")[4] + + if student in high_performer: + train_file.write(steps_seq) + train_file.write("\n") + train_label.write(label) + train_label.write("\n") + train_info.write(info) + train_info.write("\n") + elif student in mid_performer: + val_file.write(steps_seq) + val_file.write("\n") + val_label.write(label) + val_label.write("\n") + val_info.write(info) + val_info.write("\n") + elif student in low_performer: + test_file.write(steps_seq) + test_file.write("\n") + test_label.write(label) + test_label.write("\n") + test_info.write(info) + test_info.write("\n") + + + + train_file.close() + train_info.close() + train_label.close() + + val_file.close() + val_info.close() + val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_finetuning_SL_files(data_processor, opts): + ''' + Ongoing research. Student strategy learning/predicting. + We have defined 9 strategy as: + Notation; Label + UU; 0 + CU; 1 + PU; 2 + UC; 3 + UP; 4 + PP; 5 + PC; 6 + CP; 7 + CC; 8 + ''' + options = copy.deepcopy(opts) + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test"): + if v: + f_path = v.split("/") + f_path = f_path[0]+"/"+f_path[1]+"/SL/"+f_path[2] + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + trainr_label = open(options.trainr_label_path, "w") + train_gt_label = open(options.train_gt_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + testr_label = open(options.testr_label_path, "w") + test_gt_label = open(options.test_gt_label_path, "w") + + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if options.workspace_name == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + # if len(prob_list) < 3: + # continue + +# first_prob_list = prob_list[:3] + # last_prob_list = prob_list[-3:] +# # print(len(first_prob_list), len(last_prob_list)) + +# final_prob_list = first_prob_list + last_prob_list + # print(len(prob_list), len(final_prob_list)) #, final_prob_list) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + # if not prob in last_prob_list: + # continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + finals = len(options.final_step) + totals = 0 + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + if finals == 0: + totals += 1 + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + errors = 0 + for step, out in zip(step_names_token, outcome): + if (finals and step in options.final_step) or totals > 0: + out = out.split(":") + if any(any(ind in o for o in out) for ind in error_ind): + errors +=1 + + if finals: + totals = finals + # 4 and more in sequence + if step_names_token: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + all_opt1 = all(any(opt in step for step in step_names_token) for opt in options.opt_step1) + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + + if any_opt1: + label_opt = "2" + if all_opt1: + label_opt = "1" + + + if options.opt_step2: + all_opt2 = all(any(opt in step for step in step_names_token) for opt in options.opt_step2) + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "4" + if all_opt2: + label_opt = "3" + if any_opt1 and any_opt2: + label_opt = "5" + if any_opt1 and all_opt2: + label_opt = "6" + if all_opt1 and any_opt2: + label_opt = "7" + if all_opt1 and all_opt2: + label_opt = "8" + + + correctness = 1 - errors/totals + strat_correct = "0" + if correctness > 0.75: + strat_correct = "1" + + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)),"\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), str(correctness)]) + + overall_data.append(["\t".join(step_names_token), strat_correct, info, f"{1 if means_and_extremes else 0}"]) + overall_labels.append(label_opt) + + overall_data.append('') + overall_labels.append('') + + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + indices_of_twos = list(np.where(overall_labels == '2')[0]) + indices_of_threes = list(np.where(overall_labels == '3')[0]) + indices_of_fours = list(np.where(overall_labels == '4')[0]) + indices_of_fives = list(np.where(overall_labels == '5')[0]) + indices_of_sixes = list(np.where(overall_labels == '6')[0]) + indices_of_sevens = list(np.where(overall_labels == '7')[0]) + indices_of_eights = list(np.where(overall_labels == '8')[0]) + + per = 0.20 + zeros_instances_size = int(per * len(indices_of_zeros)) + ones_instances_size = int(per * len(indices_of_ones)) + twos_instances_size = int(per * len(indices_of_twos)) + threes_instances_size = int(per * len(indices_of_threes)) + fours_instances_size = int(per * len(indices_of_fours)) + fives_instances_size = int(per * len(indices_of_fives)) + sixes_instances_size = int(per * len(indices_of_sixes)) + sevens_instances_size = int(per * len(indices_of_sevens)) + eights_instances_size = int(per * len(indices_of_eights)) + + sample_size = min(zeros_instances_size, ones_instances_size, twos_instances_size, threes_instances_size, fours_instances_size, fives_instances_size, sixes_instances_size, sevens_instances_size, eights_instances_size) + print(f"Sample size.... {sample_size}") + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + sampled_instances.extend(random.sample(indices_of_twos, sample_size)) + sampled_instances.extend(random.sample(indices_of_threes, sample_size)) + sampled_instances.extend(random.sample(indices_of_fours, sample_size)) + sampled_instances.extend(random.sample(indices_of_fives, sample_size)) + sampled_instances.extend(random.sample(indices_of_sixes, sample_size)) + sampled_instances.extend(random.sample(indices_of_sevens, sample_size)) + sampled_instances.extend(random.sample(indices_of_eights, sample_size)) + + writtenTrain = False + writtenTest = False + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + if all_data: + steps_seq = all_data[0] + strat_correct = all_data[1] + info = all_data[2] + me_opt = all_data[3] + + if index in sampled_instances: + writtenTrain = True + train_file.write(steps_seq) + train_file.write("\n") + train_label.write(label) + train_label.write("\n") + trainr_label.write(strat_correct) + trainr_label.write("\n") + train_info.write(info) + train_info.write("\n") + train_gt_label.write(me_opt) + train_gt_label.write("\n") + else: + writtenTest = True + test_file.write(steps_seq) + test_file.write("\n") + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + test_label.write(label) + test_label.write("\n") + # testr_label.write(str(correctness)) + testr_label.write(strat_correct) + testr_label.write("\n") + test_info.write(info) + test_info.write("\n") + test_gt_label.write(me_opt) + test_gt_label.write("\n") + else: + # Indicates actions of next student + # Indicates next problem + if writtenTrain: + writtenTrain = False + train_file.write("\n") + train_info.write("\n") + train_label.write("\n") + trainr_label.write("\n") + train_gt_label.write("\n") + if writtenTest: + writtenTest = False + test_file.write("\n") + test_info.write("\n") + test_label.write("\n") + testr_label.write("\n") + test_gt_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + trainr_label.close() + train_gt_label.close() + + test_file.close() + test_info.close() + test_label.close() + testr_label.close() + test_gt_label.close() + +def prepare_finetuning_effectiveness_files(data_processor, opts): + ''' + Ongoing research. Student strategy learning/predicting. + We have defined 9 strategy as: + Notation; Label + UU; 0 + CU; 1 + PU; 2 + UC; 3 + UP; 4 + PP; 5 + PC; 6 + CP; 7 + CC; 8 + + if UU and CU and PU and gt = ER and correct, a positive instance + if UU and UC and UP and gt = ME and correct, a positive instance + else a strategy PP, PC, CP, CC and gt = ER/ME or incorrect, a negative instance + ''' + options = copy.deepcopy(opts) + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test"): + if v: + f_path = v.split("/") + f_path = f_path[0]+"/"+f_path[1]+"/effectiveness/"+f_path[2] + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + trainr_label = open(options.trainr_label_path, "w") + train_gt_label = open(options.train_gt_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + testr_label = open(options.testr_label_path, "w") + test_gt_label = open(options.test_gt_label_path, "w") + + overall_data = [] + overall_labels = [] + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if options.workspace_name == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + # if len(prob_list) < 3: + # continue + +# first_prob_list = prob_list[:3] + # last_prob_list = prob_list[-3:] +# # print(len(first_prob_list), len(last_prob_list)) + +# final_prob_list = first_prob_list + last_prob_list + # print(len(prob_list), len(final_prob_list)) #, final_prob_list) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + # if not prob in last_prob_list: + # continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + finals = len(options.final_step) + totals = 0 + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + if finals == 0: + totals += 1 + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + errors = 0 + for step, out in zip(step_names_token, outcome): + if (finals and step in options.final_step) or totals > 0: + out = out.split(":") + if any(any(ind in o for o in out) for ind in error_ind): + errors +=1 + + if finals: + totals = finals + # 4 and more in sequence + if step_names_token: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + all_opt1 = all(any(opt in step for step in step_names_token) for opt in options.opt_step1) + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + + if any_opt1: + label_opt = "2" + if all_opt1: + label_opt = "1" + + + if options.opt_step2: + all_opt2 = all(any(opt in step for step in step_names_token) for opt in options.opt_step2) + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "4" + if all_opt2: + label_opt = "3" + if any_opt1 and any_opt2: + label_opt = "5" + if any_opt1 and all_opt2: + label_opt = "6" + if all_opt1 and any_opt2: + label_opt = "7" + if all_opt1 and all_opt2: + label_opt = "8" + + + correctness = 1 - errors/totals + strat_correct = "0" + if correctness > 0.75: + strat_correct = "1" + + label_effectiveness = "0" + if label_opt in ["0", "1", "2"] and not means_and_extremes and strat_correct == "1": + label_effectiveness = "1" + elif label_opt in ["0", "3", "4"] and means_and_extremes and strat_correct == "1": + label_effectiveness = "1" + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + info = ",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)),"\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), str(correctness), label_opt, f"{1 if means_and_extremes else 0}"]) + + overall_data.append(["\t".join(step_names_token), strat_correct, info, f"{1 if means_and_extremes else 0}"]) + overall_labels.append(label_effectiveness) + + overall_data.append('') + overall_labels.append('') + + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + + per = 0.20 + zeros_instances_size = int(per * len(indices_of_zeros)) + ones_instances_size = int(per * len(indices_of_ones)) + + sample_size = min(zeros_instances_size, ones_instances_size) + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + + writtenTrain = False + writtenTest = False + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + if all_data: + steps_seq = all_data[0] + strat_correct = all_data[1] + info = all_data[2] + me_opt = all_data[3] + + if index in sampled_instances: + writtenTrain = True + train_file.write(steps_seq) + train_file.write("\n") + train_label.write(label) + train_label.write("\n") + trainr_label.write(strat_correct) + trainr_label.write("\n") + train_info.write(info) + train_info.write("\n") + train_gt_label.write(me_opt) + train_gt_label.write("\n") + else: + writtenTest = True + test_file.write(steps_seq) + test_file.write("\n") + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + test_label.write(label) + test_label.write("\n") + # testr_label.write(str(correctness)) + testr_label.write(strat_correct) + testr_label.write("\n") + test_info.write(info) + test_info.write("\n") + test_gt_label.write(me_opt) + test_gt_label.write("\n") + else: + # Indicates actions of next student + # Indicates next problem + if writtenTrain: + writtenTrain = False + train_file.write("\n") + train_info.write("\n") + train_label.write("\n") + trainr_label.write("\n") + train_gt_label.write("\n") + if writtenTest: + writtenTest = False + test_file.write("\n") + test_info.write("\n") + test_label.write("\n") + testr_label.write("\n") + test_gt_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + trainr_label.close() + train_gt_label.close() + + test_file.close() + test_info.close() + test_label.close() + testr_label.close() + test_gt_label.close() + +def prepare_attn_test_files(data_processor, opts): + options = copy.deepcopy(opts) + + if options.code: + new_folder = f"{options.workspace_name}/{options.code}" + if not os.path.exists(new_folder): + os.makedirs(new_folder) + + + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test"): + if v: + f_path = (f"/{options.code}/").join(v.split("/")) + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + + if options.code != "full": + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if options.workspace_name == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + + for prob, prob_groups in student_groups.groupby("Problem Name"): + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + means_and_extremes = False + finals = len(options.final_step) + totals = 0 + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + if finals == 0: + totals += 1 + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + errors = 0 + for step, out in zip(step_names_token, outcome): + if (finals and step in options.final_step) or totals > 0: + out = out.split(":") + if any(any(ind in o for o in out) for ind in error_ind): + errors +=1 + + if finals: + totals = finals + # 4 and more in sequence + if step_names_token: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + all_opt1 = all(any(opt in step for step in step_names_token) for opt in options.opt_step1) + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + + if any_opt1: + label_opt = "2" + if all_opt1: + label_opt = "1" + + + if options.opt_step2: + all_opt2 = all(any(opt in step for step in step_names_token) for opt in options.opt_step2) + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "4" + if all_opt2: + label_opt = "3" + if any_opt1 and any_opt2: + label_opt = "5" + if any_opt1 and all_opt2: + label_opt = "6" + if all_opt1 and any_opt2: + label_opt = "7" + if all_opt1 and all_opt2: + label_opt = "8" + + + correctness = 1 - errors/totals + opt_correct = "0" + if correctness > 0.75: + opt_correct = "1" + + proba = random.random() + + # if proba <= 0.1: + # if not means_and_extremes: + # if prob in first_prob_list: + if options.code == "full" or (options.code == "gt" and not means_and_extremes) or (options.code == "correct" and opt_correct == "1") or (options.code == "progress" and progress == "GRADUATED"): + if label_opt == "0": + continue + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + train_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + "\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), + str(correctness), f"{1 if means_and_extremes else 0}", label_opt])) + train_info.write("\n") + # if means_and_extremes: + # if prob in last_prob_list: + else: + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + test_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + "\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt)), + str(correctness), f"{1 if means_and_extremes else 0}", label_opt])) + test_info.write("\n") + + train_file.close() + train_info.close() + + if options.code != "full": + test_file.close() + test_info.close() + +def prepare_finetuning_future_files(data_processor, opts): + options = copy.deepcopy(opts) + for k,v in vars(opts).items(): + if k.startswith("train") or k.startswith("test"): + if v: + f_path = ("/effectiveness/").join(v.split("/")) + setattr(options, f"{k}", f_path) + print(f"options.{k} : {getattr(options, f'{k}')}") + + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + chunk_iterator = data_processor.load_file_iterator() + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + trainr_label = open(options.trainr_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + testr_label = open(options.testr_label_path, "w") + + for chunk_data in chunk_iterator: + for section, section_groups in chunk_data.groupby("Level (Workspace Id)"): + if options.workspace_name == section: + for student, student_groups in section_groups.groupby("Anon Student Id"): + writtenTrain = False + writtenTest = False + + student_groups.sort_values(by="Time") + prob_list = list(pd.unique(student_groups["Problem Name"])) + + # if len(prob_list) < 6: + # continue + +# first_prob_list = prob_list[:3] +# last_prob_list = prob_list[-3:] +# # print(len(first_prob_list), len(last_prob_list)) + +# final_prob_list = first_prob_list + last_prob_list + # print(len(prob_list), len(final_prob_list)) #, final_prob_list) + + for prob, prob_groups in student_groups.groupby("Problem Name"): + # For first 3 and last 3 only + # if not prob in final_prob_list: + # continue + + step_names_token = [] + + time_stamps = list(prob_groups["Time"]) + time_stamps_list = set() + for i in range(len(time_stamps)-1): + if (time_stamps[i+1] - time_stamps[i]) < 2000: + time_stamps_list.add(time_stamps[i+1]) + + progress = "" + outcome = [] + help_level = [] + auto_complete = False + errors = 0 + totals = 0 + means_and_extremes = False + + for index, row in prob_groups[['Time', 'Step Name', 'Outcome', 'Help Level','CF (Workspace Progress Status)', 'CF (Etalon)']].iterrows(): + step = row["Step Name"] + etalon = row["CF (Etalon)"] + progress = row["CF (Workspace Progress Status)"] + if not pd.isna(step): + if step in options.opt_step1: + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + # break + except Exception as e: + pass + if (step in options.opt_step1 or step in options.opt_step2) and row["Time"] in time_stamps_list: + # if row["Time"] in time_stamps_list: + auto_complete = True + # print(row) + continue + # if not step_names_token or step != step_names_token[-1]: + # step_names_token.append(step) + + if not step_names_token or step != step_names_token[-1]: + step_names_token.append(step) + # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + outcome.append(row['Outcome']) + help_level.append(str(row["Help Level"])) + totals += 1 + else: + outcome[-1] = outcome[-1]+":"+row['Outcome'] + help_level[-1] = help_level[-1]+":"+str(row['Help Level']) + + error_ind = ['BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + for out in outcome: + out = out.split(":") + if any(any(ind in o for o in out) for ind in error_ind): + errors +=1 + # 4 and more in sequence + if step_names_token: # and len(step_names_token) > 3 + + where_opt = [] + for stp in step_names_token: + if stp in options.opt_step1: + where_opt.append("1") + elif stp in options.opt_step2: + where_opt.append("2") + else: + where_opt.append("0") + + label_opt = "0" + if options.opt_step1: + all_opt1 = all(any(opt in step for step in step_names_token) for opt in options.opt_step1) + any_opt1 = any(any(opt in step for step in step_names_token) for opt in options.opt_step1[1:]) + + if any_opt1: + label_opt = "2" + if all_opt1: + label_opt = "1" + + + if options.opt_step2: + all_opt2 = all(any(opt in step for step in step_names_token) for opt in options.opt_step2) + any_opt2 = any(any(opt in step for step in step_names_token) for opt in options.opt_step2[1:]) + if any_opt2: + label_opt = "4" + if all_opt2: + label_opt = "3" + if any_opt1 and any_opt2: + label_opt = "5" + if any_opt1 and all_opt2: + label_opt = "6" + if all_opt1 and any_opt2: + label_opt = "7" + if all_opt1 and all_opt2: + label_opt = "8" + + + correctness = 1 - errors/totals + opt_correct = "0" + if correctness < 0.25: + opt_correct = "0" + elif correctness < 0.5: + opt_correct = "1" + elif correctness < 0.75: + opt_correct = "2" + else: + opt_correct = "3" + + + + proba = random.random() + + # if proba <= 0.1: + if not means_and_extremes: + # if prob in first_prob_list: + writtenTrain = True + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + train_label.write(label_opt) + train_label.write("\n") + # trainr_label.write(str(correctness)) + trainr_label.write(opt_correct) + trainr_label.write("\n") + train_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + "\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt))])) + train_info.write("\n") + + if means_and_extremes: + # if prob in last_prob_list: + # else: + writtenTest = True + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + # progress, problem name, student id, total steps length, outcome seq, help_level seq, encoding in steps length + test_label.write(label_opt) + test_label.write("\n") + # testr_label.write(str(correctness)) + testr_label.write(opt_correct) + testr_label.write("\n") + test_info.write(",".join([str(progress),str(prob), str(student), str(auto_complete), str(len(step_names_token)), + "\t".join(map(str, outcome)), "\t".join(map(str, help_level)), "\t".join(map(str, where_opt))])) + test_info.write("\n") + # Indicates actions of next student + # Indicates next problem + if writtenTrain: + train_file.write("\n") + train_info.write("\n") + train_label.write("\n") + trainr_label.write("\n") + if writtenTest: + test_file.write("\n") + test_info.write("\n") + test_label.write("\n") + testr_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + trainr_label.close() + + test_file.close() + test_info.close() + test_label.close() + testr_label.close() + +def prepare_school_coded_finetuning_partial_seq_files(data_processor, options): + ''' + Ongoing research. + FinalAnswer step correctness + Correct: 0 if attempt at step>1 + 1 if attempt at step==1 + ''' + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + +# val_file = open(options.val_file_path, "w") +# val_info = open(options.val_info_path, "w") +# val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + overall_data = [] + overall_labels = [] + # kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + # kcs = [kc if not pd.isna(kc) for kc in kcs] + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + print(f"{school} : {school_group.shape}") + school_group = school_group[(school_group['CF (Is StepByStep)'] == False) & + (school_group['CF (Encounter)'] == 0) & + (school_group['CF (Is Review Mode)'] == -1) ] + print(f"{school} : {school_group.shape}") + # for class_id, class_group in school_groups.groupby('CF (Anon Class Id)'): + for student, student_groups in school_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + # prob_list = list(pd.unique(student_groups["Problem Name"])) + for prob, prob_groups in student_groups.groupby("Problem Name"): + + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups[prob_groups["CF (Is Autofilled)"] == False]["Step Name"])) + unique_steps_len = len([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not (s in options.opt_step2)]) + if unique_steps_len < 4: + continue + unique_opt_steps_len = len([s for s in unique_steps if not pd.isna(s) and (s in options.opt_step1[1:] or s in options.opt_step2[1:])]) + if unique_opt_steps_len < 2: + continue + + class_id = list(pd.unique(prob_groups["CF (Anon Class Id)"])) + step_names_token = [] + original_steps_actions_attempts_help_levels_outcomes = [] + original_steps = [] + means_and_extremes = False + correctness = "0" + opt_used = False + for index, row in prob_groups[['Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', + 'Outcome', 'Help Level', 'CF (Workspace Progress Status)', + 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + step = row["Step Name"] + action = row["Action"] # ['Attempt', 'Hint Request', 'Hint Level Change', 'Done'] + attempt = row["Attempt At Step"] # number + outcome = row["Outcome"] # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + help_level = row["Help Level"] # number + progress = row["CF (Workspace Progress Status)"] + scenario = row['CF (Problem Scenario Tags)'] + + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + if row['CF (Is Autofilled)'] == True: + continue + prev = step_names_token[-1] if step_names_token else "" + prev_step = step_names_token[-1].split("-")[0] if step_names_token else "" + + if not step_names_token or step != prev_step: + if step in options.opt_step1 or step in options.opt_step2: + new_step = step + opt_used = True + else: + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if step != "FinalAnswer": + step_names_token.append(new_step) + else: + step_names_token.append("FinalAnswer") + else: + if not (step in options.opt_step1 or step in options.opt_step2 or step == "FinalAnswer"): + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if prev < new_step: + step_names_token[-1] = new_step + + if step == "FinalAnswer" and opt_used: + if attempt == 1 and outcome == "OK": + correctness = "1" + else: + correctness = "0" + original_steps_actions_attempts_help_levels_outcomes.append(f"{step}-{action}-{attempt}-{help_level}-{outcome}") + original_steps.append(step) + + unique_steps_len = len([s for s in original_steps if not (s in options.opt_step1) and not(s in options.opt_step2)]) + if step_names_token and unique_steps_len > 4: + info = ",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))]) + overall_data.append(["\t".join(step_names_token), info]) + overall_labels.append(correctness) +# proba = random.random() +# # if prob in first_prob_list: +# if proba <= 0.8: +# train_file.write("\t".join(step_names_token)) +# train_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# train_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# train_info.write("\n") + +# elif proba > 0.9: +# # elif prob in last_prob_list: +# test_file.write("\t".join(step_names_token)) +# test_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# test_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# test_info.write("\n") + +# else: +# val_file.write("\t".join(step_names_token)) +# val_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# val_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# val_info.write("\n") + # break + # break + # break + # break + # break + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + + train_len = int(len(overall_labels) * 0.10) + sample_size = int(train_len/2) + print(f"sample_size: {sample_size}") + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + + indices_of_zeros = [i for i in indices_of_zeros if not i in sampled_instances ] + indices_of_ones = [i for i in indices_of_ones if not i in sampled_instances ] + + balanced_test = min(len(indices_of_zeros), len(indices_of_ones)) + print(f"balanced_test: {balanced_test}") + test_sampled_instances = random.sample(indices_of_zeros, balanced_test) + test_sampled_instances.extend(random.sample(indices_of_ones, balanced_test)) + + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + + steps_seq = all_data[0] + info = all_data[1] + + if index in sampled_instances: + train_file.write(steps_seq) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label) + train_label.write("\n") + elif index in test_sampled_instances: + # proba = random.random() + # if proba <0.5: + test_file.write(steps_seq) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label) + test_label.write("\n") +# else: +# val_file.write(steps_seq) +# val_file.write("\n") + +# val_info.write(info) +# val_info.write("\n") + +# val_label.write(label) +# val_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + + # val_file.close() + # val_info.close() + # val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_school_coded_finetuning_opts_files(data_processor, options): + ''' + Ongoing research. + Labels: + 0 - Opt 1 + 1 - Opt 2 + 2 - Both Opt + ''' + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + +# val_file = open(options.val_file_path, "w") +# val_info = open(options.val_info_path, "w") +# val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + overall_data = [] + overall_labels = [] + # kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + # kcs = [kc if not pd.isna(kc) for kc in kcs] + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + print(f"{school} : {school_group.shape}") + school_group = school_group[(school_group['CF (Is StepByStep)'] == False) & + (school_group['CF (Encounter)'] == 0) & + (school_group['CF (Is Review Mode)'] == -1) ] + print(f"{school} : {school_group.shape}") + # for class_id, class_group in school_groups.groupby('CF (Anon Class Id)'): + for student, student_groups in school_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + # prob_list = list(pd.unique(student_groups["Problem Name"])) + # prob_list= list(student_groups[student_groups["CF (Workspace Progress Status)"]=="GRADUATED"]["Problem Name"]) + # prob_list = prob_list[-int(len(prob_list)/2):] + for prob, prob_groups in student_groups.groupby("Problem Name"): + # if not prob in prob_list: + # continue + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups[prob_groups["CF (Is Autofilled)"] == False]["Step Name"])) + unique_steps_len = len([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not (s in options.opt_step2)]) + if unique_steps_len < 4: + continue + unique_opt_steps_len = len([s for s in unique_steps if not pd.isna(s) and (s in options.opt_step1[1:] or s in options.opt_step2[1:])]) + if unique_opt_steps_len < 2: + continue + print(unique_steps, unique_opt_steps_len) + class_id = list(pd.unique(prob_groups["CF (Anon Class Id)"])) + step_names_token = [] + original_steps_actions_attempts_help_levels_outcomes = [] + original_steps = [] + means_and_extremes = False + opt1_used = False + opt2_used = False + for index, row in prob_groups[['Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', + 'Outcome', 'Help Level', 'CF (Workspace Progress Status)', + 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + step = row["Step Name"] + action = row["Action"] # ['Attempt', 'Hint Request', 'Hint Level Change', 'Done'] + attempt = row["Attempt At Step"] # number + outcome = row["Outcome"] # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + help_level = row["Help Level"] # number + progress = row["CF (Workspace Progress Status)"] + scenario = row['CF (Problem Scenario Tags)'] + + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + if row['CF (Is Autofilled)'] == True: + continue + prev = step_names_token[-1] if step_names_token else "" + prev_step = step_names_token[-1].split("-")[0] if step_names_token else "" + + if not step_names_token or step != prev_step: + if step in options.opt_step1 or step in options.opt_step2: + new_step = step + if step in options.opt_step1[1:]: + opt1_used = True + elif step in options.opt_step2[2:]: + opt2_used = True + else: + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + step_names_token.append(new_step) + + else: + if not (step in options.opt_step1 or step in options.opt_step2 or step == "FinalAnswer"): + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if prev < new_step: + step_names_token[-1] = new_step + + original_steps_actions_attempts_help_levels_outcomes.append(f"{step}-{action}-{attempt}-{help_level}-{outcome}") + original_steps.append(step) + if (not opt1_used) and (not opt2_used): + continue + unique_steps_len = len([s for s in original_steps if not (s in options.opt_step1) and not(s in options.opt_step2)]) + if step_names_token and unique_steps_len > 4: + info = ",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))]) + overall_data.append(["\t".join(step_names_token), info]) + label = None + if opt1_used and opt2_used: + label = "2" + if (not opt1_used) and opt2_used: + label = "1" + if opt1_used and (not opt2_used): + label = "0" + print(f"opt1_used: {opt1_used}, opt2_used: {opt2_used} label : {label}") + overall_labels.append(label) +# proba = random.random() +# # if prob in first_prob_list: +# if proba <= 0.8: +# train_file.write("\t".join(step_names_token)) +# train_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# train_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# train_info.write("\n") + +# elif proba > 0.9: +# # elif prob in last_prob_list: +# test_file.write("\t".join(step_names_token)) +# test_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# test_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# test_info.write("\n") + +# else: +# val_file.write("\t".join(step_names_token)) +# val_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# val_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# val_info.write("\n") + # break + # break + # break + # break + # break + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + indices_of_twos = list(np.where(overall_labels == '2')[0]) + + train_len = int(len(overall_labels) * 0.10) + sample_size = int(train_len/3) + print(f"sample_size: {sample_size}") + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + sampled_instances.extend(random.sample(indices_of_twos, sample_size)) + + indices_of_zeros = [i for i in indices_of_zeros if not i in sampled_instances ] + indices_of_ones = [i for i in indices_of_ones if not i in sampled_instances ] + indices_of_twos = [i for i in indices_of_twos if not i in sampled_instances ] + + balanced_test = min(len(indices_of_zeros), len(indices_of_ones), len(indices_of_twos)) + print(f"balanced_test: {balanced_test}") + test_sampled_instances = random.sample(indices_of_zeros, balanced_test) + test_sampled_instances.extend(random.sample(indices_of_ones, balanced_test)) + test_sampled_instances.extend(random.sample(indices_of_twos, balanced_test)) + + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + + steps_seq = all_data[0] + info = all_data[1] + + if index in sampled_instances: + train_file.write(steps_seq) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label) + train_label.write("\n") + elif index in test_sampled_instances: + # proba = random.random() + # if proba <0.5: + test_file.write(steps_seq) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label) + test_label.write("\n") +# else: +# val_file.write(steps_seq) +# val_file.write("\n") + +# val_info.write(info) +# val_info.write("\n") + +# val_label.write(label) +# val_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + + # val_file.close() + # val_info.close() + # val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_school_coded_finetuning_opts_intentional_files(data_processor, options): + ''' + Ongoing research. + Labels: + 0 - Opt 1 + 1 - Opt 2 + 2 - Both Opt + ''' + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + + val_file = open(options.val_file_path, "w") + val_info = open(options.val_info_path, "w") + val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + # overall_data = [] + # overall_labels = [] + # kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + # kcs = [kc if not pd.isna(kc) for kc in kcs] + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + print(f"{school} : {school_group.shape}") + school_group = school_group[(school_group['CF (Is StepByStep)'] == False) & + (school_group['CF (Encounter)'] == 0) & + (school_group['CF (Is Review Mode)'] == -1) ] + print(f"{school} : {school_group.shape}") + # for class_id, class_group in school_groups.groupby('CF (Anon Class Id)'): + for student, student_groups in school_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + # prob_list = list(pd.unique(student_groups["Problem Name"])) + prob_list= list(pd.unique(student_groups[student_groups["CF (Workspace Progress Status)"]=="GRADUATED"]["Problem Name"])) + # prob_list = prob_list[-int(len(prob_list)/2):] + if len(prob_list) == 0: + continue + for prob, prob_groups in student_groups.groupby("Problem Name"): + # if not prob in prob_list: + # continue + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups[prob_groups["CF (Is Autofilled)"] == False]["Step Name"])) + unique_steps_len = len([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not (s in options.opt_step2)]) + if unique_steps_len < 4: + continue + unique_opt_steps_len = len([s for s in unique_steps if not pd.isna(s) and (s in options.opt_step1[1:] or s in options.opt_step2[1:])]) + if unique_opt_steps_len < 2: + continue + # print(unique_steps, unique_opt_steps_len) + class_id = list(pd.unique(prob_groups["CF (Anon Class Id)"])) + step_names_token = [] + original_steps_actions_attempts_help_levels_outcomes = [] + original_steps = [] + means_and_extremes = False + opt1_used = False + opt2_used = False + for index, row in prob_groups[['Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', + 'Outcome', 'Help Level', 'CF (Workspace Progress Status)', + 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + step = row["Step Name"] + action = row["Action"] # ['Attempt', 'Hint Request', 'Hint Level Change', 'Done'] + attempt = row["Attempt At Step"] # number + outcome = row["Outcome"] # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + help_level = row["Help Level"] # number + progress = row["CF (Workspace Progress Status)"] + scenario = row['CF (Problem Scenario Tags)'] + + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + if row['CF (Is Autofilled)'] == True: + continue + prev = step_names_token[-1] if step_names_token else "" + prev_step = step_names_token[-1].split("-")[0] if step_names_token else "" + + if not step_names_token or step != prev_step: + if step in options.opt_step1 or step in options.opt_step2: + new_step = step + if step in options.opt_step1[1:]: + opt1_used = True + elif step in options.opt_step2[2:]: + opt2_used = True + else: + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + step_names_token.append(new_step) + + else: + if not (step in options.opt_step1 or step in options.opt_step2 or step == "FinalAnswer"): + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if prev < new_step: + step_names_token[-1] = new_step + + original_steps_actions_attempts_help_levels_outcomes.append(f"{step}-{action}-{attempt}-{help_level}-{outcome}") + original_steps.append(step) + # if (not opt1_used) and (not opt2_used): + # continue + unique_steps_len = len([s for s in original_steps if not (s in options.opt_step1) and not(s in options.opt_step2)]) + if step_names_token and unique_steps_len > 4: + info = ",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))]) + # overall_data.append(["\t".join(step_names_token), info]) + # label = None + # if opt1_used and opt2_used: + # label = "2" + # if (not opt1_used) and opt2_used: + # label = "1" + # if opt1_used and (not opt2_used): + # label = "0" + # print(f"opt1_used: {opt1_used}, opt2_used: {opt2_used} label : {label}") + # overall_labels.append(label) + + proba = random.random() + # if prob in first_prob_list: + if proba <= 0.8: + train_file.write("\t".join(step_names_token)) + train_file.write("\n") + # school, class, student id, progress, problem name, scenario, + # prefered ER or ME, total steps length, + # original seq-action-attempt-help_level-outcome + train_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) + train_info.write("\n") + + elif proba > 0.9: + # elif prob in last_prob_list: + test_file.write("\t".join(step_names_token)) + test_file.write("\n") + # school, class, student id, progress, problem name, scenario, + # prefered ER or ME, total steps length, + # original seq-action-attempt-help_level-outcome + test_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) + test_info.write("\n") + + else: + val_file.write("\t".join(step_names_token)) + val_file.write("\n") + # school, class, student id, progress, problem name, scenario, + # prefered ER or ME, total steps length, + # original seq-action-attempt-help_level-outcome + val_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) + val_info.write("\n") + # break + # break + # break + # break + # break +# overall_labels = np.array(overall_labels) +# indices_of_zeros = list(np.where(overall_labels == '0')[0]) +# indices_of_ones = list(np.where(overall_labels == '1')[0]) +# indices_of_twos = list(np.where(overall_labels == '2')[0]) + +# train_len = int(len(overall_labels) * 0.10) +# sample_size = int(train_len/3) +# print(f"sample_size: {sample_size}") +# sampled_instances = random.sample(indices_of_zeros, sample_size) +# sampled_instances.extend(random.sample(indices_of_ones, sample_size)) +# sampled_instances.extend(random.sample(indices_of_twos, sample_size)) + +# indices_of_zeros = [i for i in indices_of_zeros if not i in sampled_instances ] +# indices_of_ones = [i for i in indices_of_ones if not i in sampled_instances ] +# indices_of_twos = [i for i in indices_of_twos if not i in sampled_instances ] + +# balanced_test = min(len(indices_of_zeros), len(indices_of_ones), len(indices_of_twos)) +# print(f"balanced_test: {balanced_test}") +# test_sampled_instances = random.sample(indices_of_zeros, balanced_test) +# test_sampled_instances.extend(random.sample(indices_of_ones, balanced_test)) +# test_sampled_instances.extend(random.sample(indices_of_twos, balanced_test)) + +# for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + +# steps_seq = all_data[0] +# info = all_data[1] + +# if index in sampled_instances: +# train_file.write(steps_seq) +# train_file.write("\n") + +# train_info.write(info) +# train_info.write("\n") + +# train_label.write(label) +# train_label.write("\n") +# elif index in test_sampled_instances: +# # proba = random.random() +# # if proba <0.5: +# test_file.write(steps_seq) +# test_file.write("\n") + +# test_info.write(info) +# test_info.write("\n") + +# test_label.write(label) +# test_label.write("\n") +# # else: +# # val_file.write(steps_seq) +# # val_file.write("\n") + +# # val_info.write(info) +# # val_info.write("\n") + +# # val_label.write(label) +# # val_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + + val_file.close() + val_info.close() + val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_school_coded_finetuning_correctness_after_opts_files(data_processor, options): + ''' + Ongoing research. + FinalAnswer step correctness + Correctness after opts: + 0 if attempt at step>1 + 1 if attempt at step==1 + ''' + kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + kcs = [kc for kc in kcs if not pd.isna(kc)] + kcs = np.array(sorted(list(kcs))) + print(kcs, type(kcs)) + print(f"KCs: {kcs}") + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + +# val_file = open(options.val_file_path, "w") +# val_info = open(options.val_info_path, "w") +# val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + overall_data = [] + overall_labels = [] + # kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + # kcs = [kc if not pd.isna(kc) for kc in kcs] + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + print(f"{school} : {school_group.shape}") + school_group = school_group[(school_group['CF (Is StepByStep)'] == False) & + (school_group['CF (Encounter)'] == 0) & + (school_group['CF (Is Review Mode)'] == -1) ] + print(f"{school} : {school_group.shape}") + # for class_id, class_group in school_groups.groupby('CF (Anon Class Id)'): + for student, student_groups in school_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + # prob_list = list(pd.unique(student_groups["Problem Name"])) + # prob_list= list(student_groups[student_groups["CF (Workspace Progress Status)"]=="GRADUATED"]["Problem Name"]) + # prob_list = prob_list[-int(len(prob_list)/2):] + for prob, prob_groups in student_groups.groupby("Problem Name"): + # if not prob in prob_list: + # continue + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups[prob_groups["CF (Is Autofilled)"] == False]["Step Name"])) + unique_steps_len = len([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not (s in options.opt_step2)]) + if unique_steps_len < 4: + continue + unique_opt_steps_len = len([s for s in unique_steps if not pd.isna(s) and (s in options.opt_step1[1:] or s in options.opt_step2[1:])]) + if unique_opt_steps_len < 2: + continue + # print(unique_steps, unique_opt_steps_len) + class_id = list(pd.unique(prob_groups["CF (Anon Class Id)"])) + step_names_token = [] + original_steps_actions_attempts_help_levels_outcomes = [] + original_steps = [] + means_and_extremes = False + opt1_used = False + opt2_used = False + final_after_opts = False + correctness = "0" + kcs_skills = [0 for i in kcs] + diff_skills = [0 for i in kcs] + finalanswer_skill = [0 for i in kcs] + for index, row in prob_groups[['Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', + 'Outcome', 'Help Level', 'CF (Workspace Progress Status)', + 'CF (Skill Previous p-Known)', 'CF (Skill New p-Known)', 'KC Model(MATHia)', + 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + step = row["Step Name"] + action = row["Action"] # ['Attempt', 'Hint Request', 'Hint Level Change', 'Done'] + attempt = row["Attempt At Step"] # number + outcome = row["Outcome"] # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + help_level = row["Help Level"] # number + progress = row["CF (Workspace Progress Status)"] + scenario = row['CF (Problem Scenario Tags)'] + kc = row['KC Model(MATHia)'] + prev_skill = row['CF (Skill Previous p-Known)'] + curr_skill = row['CF (Skill New p-Known)'] + # print(kc, prev_skill) + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + if row['CF (Is Autofilled)'] == True: + continue + prev = step_names_token[-1] if step_names_token else "" + prev_step = step_names_token[-1].split("-")[0] if step_names_token else "" + + if not step_names_token or step != prev_step: + if step in options.opt_step1 or step in options.opt_step2: + new_step = step + if step in options.opt_step1[1:]: + opt1_used = True + elif step in options.opt_step2[2:]: + opt2_used = True + else: + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if step == "FinalAnswer" and (opt1_used or opt2_used) and not final_after_opts: + final_after_opts = True + if outcome == "OK": + correctness = "1" + step_names_token.append(new_step) + + else: + if not (step in options.opt_step1 or step in options.opt_step2 or step == "FinalAnswer"): + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if prev < new_step: + step_names_token[-1] = new_step + if not pd.isna(kc): + index = np.argwhere(kcs==kc).flatten()[0] + # print(index, type(index)) + kcs_skills[index] = prev_skill + diff_skills[index] = prev_skill - curr_skill + if step == "FinalAnswer": + finalanswer_skill[index] = prev_skill + + original_steps_actions_attempts_help_levels_outcomes.append(f"{step}-{action}-{attempt}-{help_level}-{outcome}") + original_steps.append(step) + if (not opt1_used) and (not opt2_used): + continue + unique_steps_len = len([s for s in original_steps if not (s in options.opt_step1) and not(s in options.opt_step2)]) + if step_names_token and unique_steps_len > 4: + label = None + if opt1_used and opt2_used: + label = "2" + if (not opt1_used) and opt2_used: + label = "1" + if opt1_used and (not opt2_used): + label = "0" + # print(f"opt1_used: {opt1_ßused}, opt2_used: {opt2_used} label : {label}") + info = ",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes)), label, + "\t".join(map(str, kcs_skills)), "\t".join(map(str, diff_skills)), + "\t".join(map(str, finalanswer_skill))])#str(finalanswer_skill)]) + overall_data.append(["\t".join(step_names_token), info]) + overall_labels.append(correctness) +# proba = random.random() +# # if prob in first_prob_list: +# if proba <= 0.8: +# train_file.write("\t".join(step_names_token)) +# train_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# train_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# train_info.write("\n") + +# elif proba > 0.9: +# # elif prob in last_prob_list: +# test_file.write("\t".join(step_names_token)) +# test_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# test_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# test_info.write("\n") + +# else: +# val_file.write("\t".join(step_names_token)) +# val_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# val_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# val_info.write("\n") + # break + # break + # break + # break + # break + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + # indices_of_twos = list(np.where(overall_labels == '2')[0]) + + train_len = int(len(overall_labels) * 0.10) + sample_size = int(train_len/2) + print(f"sample_size: {sample_size}") + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + # sampled_instances.extend(random.sample(indices_of_twos, sample_size)) + + indices_of_zeros = [i for i in indices_of_zeros if not i in sampled_instances ] + indices_of_ones = [i for i in indices_of_ones if not i in sampled_instances ] + # indices_of_twos = [i for i in indices_of_twos if not i in sampled_instances ] + + balanced_test = min(len(indices_of_zeros), len(indices_of_ones)) #, len(indices_of_twos)) + print(f"balanced_test: {balanced_test}") + test_sampled_instances = random.sample(indices_of_zeros, balanced_test) + test_sampled_instances.extend(random.sample(indices_of_ones, balanced_test)) + # test_sampled_instances.extend(random.sample(indices_of_twos, balanced_test)) + + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + + steps_seq = all_data[0] + info = all_data[1] + + if index in sampled_instances: + train_file.write(steps_seq) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label) + train_label.write("\n") + elif index in test_sampled_instances: + # proba = random.random() + # if proba <0.5: + test_file.write(steps_seq) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label) + test_label.write("\n") +# else: +# val_file.write(steps_seq) +# val_file.write("\n") + +# val_info.write(info) +# val_info.write("\n") + +# val_label.write(label) +# val_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + + # val_file.close() + # val_info.close() + # val_label.close() + + test_file.close() + test_info.close() + test_label.close() + +def prepare_school_coded_finetuning_correctness_after_opts_over_prob_files(data_processor, options): + ''' + Ongoing research. + FinalAnswer step correctness + Correctness after opts: + 0 if attempt at step>1 + 1 if attempt at step==1 + ''' + kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + kcs = [kc for kc in kcs if not pd.isna(kc)] + kcs = np.array(sorted(list(kcs))) + print(kcs, type(kcs)) + print(f"KCs: {kcs}") + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + +# val_file = open(options.val_file_path, "w") +# val_info = open(options.val_info_path, "w") +# val_label = open(options.val_label_path, "w") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + train_data = [] + train_labels = [] + + test_data = [] + test_labels = [] + # kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + # kcs = [kc if not pd.isna(kc) for kc in kcs] + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + print(f"{school} : {school_group.shape}") + school_group = school_group[(school_group['CF (Is StepByStep)'] == False) & + (school_group['CF (Encounter)'] == 0) & + (school_group['CF (Is Review Mode)'] == -1) ] + print(f"{school} : {school_group.shape}") + # for class_id, class_group in school_groups.groupby('CF (Anon Class Id)'): + for student, student_groups in school_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + train = True + proba = random.random() + if proba < 0.5: + train = False + # prob_list = list(pd.unique(student_groups["Problem Name"])) + # prob_list= list(student_groups[student_groups["CF (Workspace Progress Status)"]=="GRADUATED"]["Problem Name"]) + # prob_list = prob_list[-int(len(prob_list)/2):] + prev_kcs_skills = [0 for i in kcs] + for pi, (prob, prob_groups) in enumerate(student_groups.groupby("Problem Name")): + # if not prob in prob_list: + # continue + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups[prob_groups["CF (Is Autofilled)"] == False]["Step Name"])) + unique_steps_len = len([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not (s in options.opt_step2)]) + if unique_steps_len < 4: + continue + unique_opt_steps_len = len([s for s in unique_steps if not pd.isna(s) and (s in options.opt_step1[1:] or s in options.opt_step2[1:])]) + if unique_opt_steps_len < 2: + continue + # print(unique_steps, unique_opt_steps_len) + class_id = list(pd.unique(prob_groups["CF (Anon Class Id)"])) + step_names_token = [] + original_steps_actions_attempts_help_levels_outcomes = [] + original_steps = [] + means_and_extremes = False + opt1_used = False + opt2_used = False + final_after_opts = False + correctness = "0" + kcs_skills = [0 for i in kcs] + diff_skills = [0 for i in kcs] + finalanswer_skill = [0 for i in kcs] + for index, row in prob_groups[['Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', + 'Outcome', 'Help Level', 'CF (Workspace Progress Status)', + 'CF (Skill Previous p-Known)', 'CF (Skill New p-Known)', 'KC Model(MATHia)', + 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + step = row["Step Name"] + action = row["Action"] # ['Attempt', 'Hint Request', 'Hint Level Change', 'Done'] + attempt = row["Attempt At Step"] # number + outcome = row["Outcome"] # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + help_level = row["Help Level"] # number + progress = row["CF (Workspace Progress Status)"] + scenario = row['CF (Problem Scenario Tags)'] + kc = row['KC Model(MATHia)'] + prev_skill = row['CF (Skill Previous p-Known)'] + curr_skill = row['CF (Skill New p-Known)'] + # print(kc, prev_skill) + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + if row['CF (Is Autofilled)'] == True: + continue + prev = step_names_token[-1] if step_names_token else "" + prev_step = step_names_token[-1].split("-")[0] if step_names_token else "" + + if not step_names_token or step != prev_step: + if step in options.opt_step1 or step in options.opt_step2: + new_step = step + if step in options.opt_step1[1:]: + opt1_used = True + elif step in options.opt_step2[2:]: + opt2_used = True + else: + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if step == "FinalAnswer" and (opt1_used or opt2_used) and not final_after_opts: + final_after_opts = True + if outcome == "OK": + correctness = "1" + step_names_token.append(new_step) + + else: + if not (step in options.opt_step1 or step in options.opt_step2 or step == "FinalAnswer"): + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if prev < new_step: + step_names_token[-1] = new_step + if not pd.isna(kc): + index = np.argwhere(kcs==kc).flatten()[0] + # print(index, type(index)) + kcs_skills[index] = prev_skill + if pi != 0: + diff_skills[index] = prev_skill - prev_kcs_skills[index] + prev_kcs_skills[index] = prev_skill + if step == "FinalAnswer": + finalanswer_skill[index] = prev_skill + + original_steps_actions_attempts_help_levels_outcomes.append(f"{step}-{action}-{attempt}-{help_level}-{outcome}") + original_steps.append(step) + if (not opt1_used) and (not opt2_used): + continue + unique_steps_len = len([s for s in original_steps if not (s in options.opt_step1) and not(s in options.opt_step2)]) + if step_names_token and unique_steps_len > 4: + label = None + if opt1_used and opt2_used: + label = "2" + if (not opt1_used) and opt2_used: + label = "1" + if opt1_used and (not opt2_used): + label = "0" + # print(f"opt1_used: {opt1_ßused}, opt2_used: {opt2_used} label : {label}") + info = ",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes)), label, + "\t".join(map(str, kcs_skills)), "\t".join(map(str, diff_skills)), + "\t".join(map(str, finalanswer_skill))])#str(finalanswer_skill)]) + if train: + train_data.append(["\t".join(step_names_token), info]) + train_labels.append(correctness) + else: + test_data.append(["\t".join(step_names_token), info]) + test_labels.append(correctness) +# proba = random.random() +# # if prob in first_prob_list: +# if proba <= 0.8: +# train_file.write("\t".join(step_names_token)) +# train_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# train_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# train_info.write("\n") + +# elif proba > 0.9: +# # elif prob in last_prob_list: +# test_file.write("\t".join(step_names_token)) +# test_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# test_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# test_info.write("\n") + +# else: +# val_file.write("\t".join(step_names_token)) +# val_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# val_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# val_info.write("\n") + # break + # break + # break + # break + # break +# overall_labels = np.array(overall_labels) +# indices_of_zeros = list(np.where(overall_labels == '0')[0]) +# indices_of_ones = list(np.where(overall_labels == '1')[0]) +# # indices_of_twos = list(np.where(overall_labels == '2')[0]) + +# train_len = int(len(overall_labels) * 0.10) +# sample_size = int(train_len/2) +# print(f"sample_size: {sample_size}") +# sampled_instances = random.sample(indices_of_zeros, sample_size) +# sampled_instances.extend(random.sample(indices_of_ones, sample_size)) +# # sampled_instances.extend(random.sample(indices_of_twos, sample_size)) + +# indices_of_zeros = [i for i in indices_of_zeros if not i in sampled_instances ] +# indices_of_ones = [i for i in indices_of_ones if not i in sampled_instances ] +# # indices_of_twos = [i for i in indices_of_twos if not i in sampled_instances ] + +# balanced_test = min(len(indices_of_zeros), len(indices_of_ones)) #, len(indices_of_twos)) +# print(f"balanced_test: {balanced_test}") +# test_sampled_instances = random.sample(indices_of_zeros, balanced_test) +# test_sampled_instances.extend(random.sample(indices_of_ones, balanced_test)) +# # test_sampled_instances.extend(random.sample(indices_of_twos, balanced_test)) + + for index, (all_data, label) in enumerate(zip(train_data, train_labels)): + steps_seq = all_data[0] + info = all_data[1] + + train_file.write(steps_seq) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label) + train_label.write("\n") + train_file.close() + train_info.close() + train_label.close() + + for index, (all_data, label) in enumerate(zip(test_data, test_labels)): + steps_seq = all_data[0] + info = all_data[1] + + test_file.write(steps_seq) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label) + test_label.write("\n") + test_file.close() + test_info.close() + test_label.close() + +def prepare_school_coded_finetuning_correctness_after_opts_per_files(data_processor, options): + ''' + Ongoing research. + FinalAnswer step correctness + Correctness after opts: + 0 if attempt at step>1 + 1 if attempt at step==1 + ''' + kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + kcs = [kc for kc in kcs if not pd.isna(kc)] + kcs = np.array(sorted(list(kcs))) + print(kcs, type(kcs)) + print(f"KCs: {kcs}") + chunk_iterator = data_processor.load_file_iterator(sep=",") + + train_file = open(options.train_file_path, "w") + train_info = open(options.train_info_path, "w") + train_label = open(options.train_label_path, "w") + + val_file = open(options.val_file_path, "a") + val_info = open(options.val_info_path, "a") + val_label = open(options.val_label_path, "a") + + test_file = open(options.test_file_path, "w") + test_info = open(options.test_info_path, "w") + test_label = open(options.test_label_path, "w") + + overall_data = [] + overall_labels = [] + # kcs = pickle.load(open("dataset/CL_2223/ratio_proportion_change3_2223/unique_kcs_list.pkl", "rb")) + # kcs = [kc if not pd.isna(kc) for kc in kcs] + for chunk_data in chunk_iterator: + for school, school_group in chunk_data.groupby('CF (Anon School Id)'): + if not options.school or school in options.school: + print(f"{school} : {school_group.shape}") + school_group = school_group[(school_group['CF (Is StepByStep)'] == False) & + (school_group['CF (Encounter)'] == 0) & + (school_group['CF (Is Review Mode)'] == -1) ] + print(f"{school} : {school_group.shape}") + # for class_id, class_group in school_groups.groupby('CF (Anon Class Id)'): + for student, student_groups in school_group.groupby("Anon Student Id"): + student_groups.sort_values(by="Time", inplace=True) + # prob_list = list(pd.unique(student_groups["Problem Name"])) + # prob_list= list(student_groups[student_groups["CF (Workspace Progress Status)"]=="GRADUATED"]["Problem Name"]) + # prob_list = prob_list[-int(len(prob_list)/2):] + for prob, prob_groups in student_groups.groupby("Problem Name"): + # if not prob in prob_list: + # continue + actions = list(prob_groups["Action"]) + # A problem should be completed by a student clicking Done button. + if not "Done" in actions: + continue + unique_steps = list(pd.unique(prob_groups[prob_groups["CF (Is Autofilled)"] == False]["Step Name"])) + unique_steps_len = len([s for s in unique_steps if not pd.isna(s) and not (s in options.opt_step1) and not (s in options.opt_step2)]) + if unique_steps_len < 4: + continue + unique_opt_steps_len = len([s for s in unique_steps if not pd.isna(s) and (s in options.opt_step1[1:] or s in options.opt_step2[1:])]) + if unique_opt_steps_len < 2: + continue + # print(unique_steps, unique_opt_steps_len) + class_id = list(pd.unique(prob_groups["CF (Anon Class Id)"])) + step_names_token = [] + original_steps_actions_attempts_help_levels_outcomes = [] + original_steps = [] + means_and_extremes = False + opt1_used = False + opt2_used = False + final_after_opts = False + correctness = "0" + kcs_skills = [0 for i in kcs] + diff_skills = [0 for i in kcs] + finalanswer_skill = [0 for i in kcs] + for index, row in prob_groups[['Step Name', 'Action', 'Attempt At Step', 'CF (Is Autofilled)', + 'Outcome', 'Help Level', 'CF (Workspace Progress Status)', + 'CF (Skill Previous p-Known)', 'CF (Skill New p-Known)', 'KC Model(MATHia)', + 'CF (Etalon)', 'CF (Problem Scenario Tags)']].iterrows(): + step = row["Step Name"] + action = row["Action"] # ['Attempt', 'Hint Request', 'Hint Level Change', 'Done'] + attempt = row["Attempt At Step"] # number + outcome = row["Outcome"] # ['OK', 'BUG', 'ERROR', 'INITIAL_HINT', 'HINT_LEVEL_CHANGE'] + help_level = row["Help Level"] # number + progress = row["CF (Workspace Progress Status)"] + scenario = row['CF (Problem Scenario Tags)'] + kc = row['KC Model(MATHia)'] + prev_skill = row['CF (Skill Previous p-Known)'] + curr_skill = row['CF (Skill New p-Known)'] + # print(kc, prev_skill) + if not pd.isna(step): + if step in options.opt_step1 and not means_and_extremes: + etalon = row["CF (Etalon)"] + if not pd.isna(etalon): + etalon = etalon.strip('{}') + key, value = etalon.split('=') + etalon = value + try: + etalon = int(etalon) + except Exception as e: + try: + etalon = float(etalon) + means_and_extremes = True + except Exception as e: + pass + if row['CF (Is Autofilled)'] == True: + continue + prev = step_names_token[-1] if step_names_token else "" + prev_step = step_names_token[-1].split("-")[0] if step_names_token else "" + + if not step_names_token or step != prev_step: + if step in options.opt_step1 or step in options.opt_step2: + new_step = step + if step in options.opt_step1[1:]: + opt1_used = True + elif step in options.opt_step2[2:]: + opt2_used = True + else: + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if step == "FinalAnswer" and (opt1_used or opt2_used) and not final_after_opts: + final_after_opts = True + if outcome == "OK": + correctness = "1" + step_names_token.append(new_step) + + else: + if not (step in options.opt_step1 or step in options.opt_step2 or step == "FinalAnswer"): + if action == "Attempt" and outcome != "OK": + new_step = step+"-2" + elif "Hint" in action: + new_step = step+"-1" + else: + new_step = step+"-0" + + if prev < new_step: + step_names_token[-1] = new_step + if not pd.isna(kc): + index = np.argwhere(kcs==kc).flatten()[0] + # print(index, type(index)) + kcs_skills[index] = prev_skill + diff_skills[index] = prev_skill - curr_skill + if step == "FinalAnswer": + finalanswer_skill[index] = prev_skill + + original_steps_actions_attempts_help_levels_outcomes.append(f"{step}-{action}-{attempt}-{help_level}-{outcome}") + original_steps.append(step) + if (not opt1_used) and (not opt2_used): + continue + unique_steps_len = len([s for s in original_steps if not (s in options.opt_step1) and not(s in options.opt_step2)]) + if step_names_token and unique_steps_len > 4: + label = None + if opt1_used and opt2_used: + label = "2" + if (not opt1_used) and opt2_used: + label = "1" + if opt1_used and (not opt2_used): + label = "0" + # print(f"opt1_used: {opt1_ßused}, opt2_used: {opt2_used} label : {label}") + info = ",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), + f"{1 if means_and_extremes else 0}", str(len(step_names_token)), + "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes)), label, + "\t".join(map(str, kcs_skills)), "\t".join(map(str, diff_skills)), + "\t".join(map(str, finalanswer_skill))])#str(finalanswer_skill)]) + overall_data.append(["\t".join(step_names_token), info]) + overall_labels.append(correctness) +# proba = random.random() +# # if prob in first_prob_list: +# if proba <= 0.8: +# train_file.write("\t".join(step_names_token)) +# train_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# train_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# train_info.write("\n") + +# elif proba > 0.9: +# # elif prob in last_prob_list: +# test_file.write("\t".join(step_names_token)) +# test_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# test_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# test_info.write("\n") + +# else: +# val_file.write("\t".join(step_names_token)) +# val_file.write("\n") +# # school, class, student id, progress, problem name, scenario, +# # prefered ER or ME, total steps length, +# # original seq-action-attempt-help_level-outcome +# val_info.write(",".join([str(school), "\t".join(class_id), str(student), str(progress), str(prob), str(scenario), +# f"{1 if means_and_extremes else 0}", str(len(step_names_token)), +# "\t".join(map(str, original_steps_actions_attempts_help_levels_outcomes))])) +# val_info.write("\n") + # break + # break + # break + # break + # break + overall_labels = np.array(overall_labels) + indices_of_zeros = list(np.where(overall_labels == '0')[0]) + indices_of_ones = list(np.where(overall_labels == '1')[0]) + # indices_of_twos = list(np.where(overall_labels == '2')[0]) + + # train_len = int(len(overall_labels) * 0.10) + train_len = int(len(overall_labels) * float(options.per)) + + sample_size = int(train_len/2) + if float(options.per) == 1: + sample_size = min(len(indices_of_zeros), len(indices_of_ones)) + elif float(options.per) > 1: + sample_size = int(options.per) + print(f"sample_size: {sample_size}") + sampled_instances = random.sample(indices_of_zeros, sample_size) + sampled_instances.extend(random.sample(indices_of_ones, sample_size)) + # sampled_instances.extend(random.sample(indices_of_twos, sample_size)) + + indices_of_zeros = [i for i in indices_of_zeros if not i in sampled_instances ] + indices_of_ones = [i for i in indices_of_ones if not i in sampled_instances ] + # indices_of_twos = [i for i in indices_of_twos if not i in sampled_instances ] + + balanced_test = min(len(indices_of_zeros), len(indices_of_ones)) #, len(indices_of_twos)) + print(f"balanced_test: {balanced_test}") + test_sampled_instances = random.sample(indices_of_zeros, balanced_test) + test_sampled_instances.extend(random.sample(indices_of_ones, balanced_test)) + # test_sampled_instances.extend(random.sample(indices_of_twos, balanced_test)) + + for index, (all_data, label) in enumerate(zip(overall_data, overall_labels)): + + steps_seq = all_data[0] + info = all_data[1] + + if index in sampled_instances: + train_file.write(steps_seq) + train_file.write("\n") + + train_info.write(info) + train_info.write("\n") + + train_label.write(label) + train_label.write("\n") + if float(options.per) == 1.0: + val_file.write(steps_seq) + val_file.write("\n") + + val_info.write(info) + val_info.write("\n") + + val_label.write(label) + val_label.write("\n") + + elif index in test_sampled_instances: + # proba = random.random() + # if proba <0.5: + test_file.write(steps_seq) + test_file.write("\n") + + test_info.write(info) + test_info.write("\n") + + test_label.write(label) + test_label.write("\n") + + if float(options.per) != 1.0: + val_file.write(steps_seq) + val_file.write("\n") + + val_info.write(info) + val_info.write("\n") + + val_label.write(label) + val_label.write("\n") + + + train_file.close() + train_info.close() + train_label.close() + + val_file.close() + val_info.close() + val_label.close() + + test_file.close() + test_info.close() + test_label.close() + + + +def prepare_pretraining_vocab_file(options): + + # kc = pickle.load(open("dataset/unique/unique_kcs_list.pkl","rb")) + # kc_token = {"KC"+str(i):k for i, k in enumerate(kc)} + # pickle.dump(kc_token, open("pretraining/unique_dict_kc_token.pkl", "wb")) + + # steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + # step_token = {"step"+str(i):k for i, k in enumerate(steps)} + # folder_name = options.workspace_name+"/" if options.workspace_name else "" + # pickle.dump(step_token, open(f"{folder_name}pretraining/unique_dict_step_token.pkl", "wb")) + + # steps = pickle.load(open(f"{options.dataset_folder}unique_new_steps_w_action_attempt_list.pkl","rb")) + steps = pickle.load(open(f"{options.dataset_folder}unique_steps_list.pkl","rb")) + + # print("No of unique kc", len(kc)) + print("No of unique steps ", len(steps)) + # print("No of unique problem", len(prob)) + # print("Size of vocab ", len(steps)) + + ordered_steps = sorted(list(steps)) + + with (open(options.vocab_file_path,"w")) as vb_file: + vb_file.write("[PAD]\n") + vb_file.write("[UNK]\n") + vb_file.write("[MASK]\n") + vb_file.write("[CLS]\n") + vb_file.write("[SEP]\n") + # vb_file.write("\n".join(kc_token.keys())) + # vb_file.write("\n") + # vb_file.write("\n".join(step_token.keys())) + # vb_file.write("\n".join(ordered_steps)) + for step in ordered_steps: + if step in options.opt_step1 or step in options.opt_step2: + vb_file.write(f"{step}\n") + else: + for i in range(3): + vb_file.write(f"{step}-{i}\n") + vb_file.close() + with open(options.vocab_file_path,"r") as f: + l = f.readlines() + print(l, len(l)) + f.close() + + +def main(opt): + options = copy.deepcopy(opt) + if opt.workspace_name: + options.dataset_folder = opt.dataset_folder+opt.workspace_name+"/" + + data_processor = DataPreprocessor(input_file_path=opt.dataset) + + if opt.analyze_dataset_by_section: + print(f"Analyzing dataset by section for workspace: {opt.workspace_name}") + data_processor.analyze_dataset_by_section(opt.workspace_name) + + pickle.dump(data_processor.unique_students, open(f"{options.dataset_folder}unique_students_list.pkl", "wb")) + pickle.dump(data_processor.unique_problems, open(f"{options.dataset_folder}unique_problems_list.pkl", "wb")) + pickle.dump(data_processor.unique_prob_hierarchy, open(f"{options.dataset_folder}unique_hierarchy_list.pkl", "wb")) + pickle.dump(data_processor.unique_kcs, open(f"{options.dataset_folder}unique_kcs_list.pkl", "wb")) + pickle.dump(data_processor.unique_steps, open(f"{options.dataset_folder}unique_steps_list.pkl", "wb")) + + if opt.analyze_dataset_by_school: + print(f"Analyzing dataset of all school for workspace: {opt.workspace_name}") + data_processor.analyze_dataset_by_school(opt.workspace_name) + + if not os.path.exists(options.dataset_folder): + os.makedirs(options.dataset_folder) + pickle.dump(data_processor.unique_schools, open(f"{options.dataset_folder}unique_schools_list.pkl", "wb")) + pickle.dump(data_processor.unique_class, open(f"{options.dataset_folder}unique_class_list.pkl", "wb")) + pickle.dump(data_processor.unique_students, open(f"{options.dataset_folder}unique_students_list.pkl", "wb")) + pickle.dump(data_processor.unique_problems, open(f"{options.dataset_folder}unique_problems_list.pkl", "wb")) + pickle.dump(data_processor.unique_kcs, open(f"{options.dataset_folder}unique_kcs_list.pkl", "wb")) + pickle.dump(data_processor.unique_steps, open(f"{options.dataset_folder}unique_steps_list.pkl", "wb")) + pickle.dump(data_processor.unique_new_steps_w_action_attempt, open(f"{options.dataset_folder}unique_new_steps_w_action_attempt_list.pkl", "wb")) + pickle.dump(data_processor.unique_new_steps_w_action_attempt_kcs, open(f"{options.dataset_folder}unique_new_steps_w_action_attempt_kcs.pkl", "wb")) + pickle.dump(data_processor.unique_new_steps_w_kcs, open(f"{options.dataset_folder}unique_new_steps_w_kcs_list.pkl", "wb")) + + if opt.workspace_name: + for k,v in vars(opt).items(): + if 'path' in k: + if v: + redirect_path = opt.workspace_name+"/" + if opt.school and opt.pretrain: + sch = f"sch_largest_{len(opt.school)}-coded" #f"sch_largest_655" + redirect_path = redirect_path + sch+"/" + if opt.school_folder: + redirect_path = redirect_path + opt.school_folder+"/" + # else: + # sch = "sch_largest_655" + if k != "vocab_file_path": + if opt.pretrain: + redirect_path = redirect_path + "pretraining/" + else: + if opt.code: + redirect_path = redirect_path + f"{opt.code}/" + elif opt.finetune_task: + if opt.diff_val_folder and "val" in v: + redirect_path = redirect_path + f"finetuning/" + else: + redirect_path = redirect_path + f"finetuning/{opt.finetune_task}/" + if not os.path.exists(redirect_path): + os.makedirs(redirect_path) + else: + if not os.path.exists(redirect_path+"/pretraining/"): + os.makedirs(redirect_path+"/pretraining/") + setattr(options, f"{k}", redirect_path+v) + # setattr(options, f"{k}", opt.workspace_name+"/check/"+v) + print(f"options.{k} : {getattr(options, f'{k}')}") + + + + if options.pretrain: + print("Preparing vocab...") + prepare_pretraining_vocab_file(options) + print("Preparing pre-training dataset...") + # old non-repeated steps + # prepare_pretraining_files(data_processor, options) + # coded + # prepare_school_coded_pretraining_files(data_processor, options) + prepare_school_coded_finetuning_opts_intentional_files(data_processor, options) + # prepare_pretraining_files(data_processor, options) + # prepare_school_pretraining_files(data_processor, options) + # else: + # print("Preparing attention dataset...") + # prepare_school_attention_files(data_processor, options) + else: + print("Preparing fine-tuning dataset...") + # _1920 + # prepare_finetuning_10per_files(data_processor, options) + # prepare_finetuning_IS_FS_files(data_processor, options) + # prepare_finetuning_correctness_files(data_processor, options) + + # _2223 + # prepare_school_coded_finetuning_partial_seq_files(data_processor, options) + # prepare_school_coded_finetuning_opts_files(data_processor, options) + prepare_school_coded_finetuning_correctness_after_opts_per_files(data_processor, options) + # prepare_school_coded_finetuning_correctness_after_opts_files(data_processor, options) + # prepare_school_coded_finetuning_correctness_after_opts_over_prob_files(data_processor, options) + # prepare_finetuning_IS_files(data_processor, options) + # # prepare_finetuning_FS_files(data_processor, options) + # prepare_finetuning_correctness_aaai_files(data_processor, options) + # # prepare_finetuning_SL_files(data_processor, options) + # # prepare_finetuning_effectiveness_files(data_processor, options) + # prepare_attn_test_files(data_processor, options) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-dataset_folder', type=str, default="dataset/CL4999_1920/") + + parser.add_argument('-analyze_dataset_by_section', type=bool, default=False) + parser.add_argument('-analyze_dataset_by_school', type=bool, default=False) + parser.add_argument('-workspace_name', type=str, default=None) + parser.add_argument('-school', nargs='+', type=str, default=None) + parser.add_argument('-school_folder', type=str, default=None) + + # parser.add_argument('-highGRschool', nargs='+', type=str, default=None) + # parser.add_argument('-lowGRschool', nargs='+', type=str, default=None) + + parser.add_argument('-code', type=str, default=None) + parser.add_argument('-finetune_task', type=str, default=None) + + parser.add_argument('-per', type=float, default=None) + parser.add_argument("-diff_val_folder", type=bool, default=False, help="use for different val folder") + + parser.add_argument('-opt_step1', nargs='+', type=str, help='List of optional steps 1') + parser.add_argument('-opt_step2', nargs='+', type=str, help='List of optional steps 2') + parser.add_argument('-final_step', nargs='+', type=str, help='List of final step') + + parser.add_argument('-dataset', type=str, default="dataset/CL4999_1920/course2_1920_4999_students_datashop.txt") + + parser.add_argument('-pretrain', type=bool, default=False) + parser.add_argument('-vocab_file_path', type=str, default="pretraining/vocab.txt") #pretraining/vocab.txt + + # Prepare for pretraining + parser.add_argument('-train_file_path', type=str, default="train.txt") #pretraining/pretrain.txt + parser.add_argument('-train_info_path', type=str, default="train_info.txt") #pretraining/pretrain_info.txt + parser.add_argument('-train_label_path', type=str, default="train_label.txt") #finetuning/train_label.txt + + parser.add_argument('-val_file_path', type=str, default="val.txt") #pretraining/val.txt + parser.add_argument('-val_info_path', type=str, default="val_info.txt") #pretraining/val_info.txt + parser.add_argument('-val_label_path', type=str, default="val_label.txt") #finetuning/val_label.txt + + parser.add_argument('-test_file_path', type=str, default="test.txt") #pretraining/test.txt + parser.add_argument('-test_info_path', type=str, default="test_info.txt") #pretraining/test_info.txt + parser.add_argument('-test_label_path', type=str, default="test_label.txt") #finetuning/test_label.txt + + +# parser.add_argument('-train_gt_label_path', type=str, default="finetuning/train_gt_label.txt") +# parser.add_argument('-test_gt_label_path', type=str, default="finetuning/test_gt_label.txt") + + + options = parser.parse_args() + if not options.opt_step1: + setattr(options, "opt_step1", []) + print("Optional steps 1: ", options.opt_step1) + + if not options.opt_step2: + setattr(options, "opt_step2", []) + print("Optional steps 2: ", options.opt_step2) + + if not options.final_step: + setattr(options, "final_step", []) + print("Final steps: ", options.final_step) + + main(options) + + + \ No newline at end of file diff --git a/ratio_proportion_change3/finetuning/test.txt b/ratio_proportion_change3/finetuning/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..f512d82f36f20631e5a10d79771b24c9dadef5a4 --- /dev/null +++ b/ratio_proportion_change3/finetuning/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da39d07824b2cfc3a41445694ff65018b1ffdf3e9b844d464cdba3c0ad6a8b87 +size 6876678 diff --git a/ratio_proportion_change3/finetuning/test_in.txt b/ratio_proportion_change3/finetuning/test_in.txt new file mode 100644 index 0000000000000000000000000000000000000000..522335b3cf3cdceb5e506168dc41cad119ccf935 --- /dev/null +++ b/ratio_proportion_change3/finetuning/test_in.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5029b0f965c4f6f1d5dd981778daf0b8f0f778dd71ecad7eb984e8461fa85b9 +size 1318665 diff --git a/ratio_proportion_change3/finetuning/test_in_info.txt b/ratio_proportion_change3/finetuning/test_in_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..2f718d642015c1f1708f09d94ba373791f80fcc5 --- /dev/null +++ b/ratio_proportion_change3/finetuning/test_in_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044b445c06dbdecb8663e5db8d6f270799240f1b433a169c335c15e566dbba20 +size 1660506 diff --git a/ratio_proportion_change3/finetuning/test_in_label.txt b/ratio_proportion_change3/finetuning/test_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..b7cef17dbd096ae24dfb4124781db75c44e45741 --- /dev/null +++ b/ratio_proportion_change3/finetuning/test_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c035490a97515200f23348bf01bd3c16def88046a7c2215d9ef169ffc089d0d +size 17202 diff --git a/ratio_proportion_change3/finetuning/test_label.txt b/ratio_proportion_change3/finetuning/test_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..bba17f3e331933d5a4149d4796fd6693c79b0794 --- /dev/null +++ b/ratio_proportion_change3/finetuning/test_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6fee24daa1523d1a8d7615c415fac559d0bf85ace5ab18d9db1a8dff533ff68 +size 79424 diff --git a/ratio_proportion_change3/finetuning/testr_in_label.txt b/ratio_proportion_change3/finetuning/testr_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa5bc5c8cee26376b2884e2c0e9bb72715be9a17 --- /dev/null +++ b/ratio_proportion_change3/finetuning/testr_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95faf33529a8cdbcedfca3853be88f917e730c79261731c4860f0d57909f13f +size 97701 diff --git a/ratio_proportion_change3/finetuning/testr_label.txt b/ratio_proportion_change3/finetuning/testr_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..8576c51173e90052744488d222f0685f941a85e5 --- /dev/null +++ b/ratio_proportion_change3/finetuning/testr_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29519e69e1ec480ae0440e23dcb57f97bbb33cdd9b91d18e5e999d3e7e58288c +size 549160 diff --git a/ratio_proportion_change3/finetuning/train.txt b/ratio_proportion_change3/finetuning/train.txt new file mode 100644 index 0000000000000000000000000000000000000000..3277efdd326506140839438422c31477011d5277 --- /dev/null +++ b/ratio_proportion_change3/finetuning/train.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b397618386eb7cd21cf59251b4d48c1880330477c3186375a039047f181beae +size 775465 diff --git a/ratio_proportion_change3/finetuning/train_in.txt b/ratio_proportion_change3/finetuning/train_in.txt new file mode 100644 index 0000000000000000000000000000000000000000..2631be49d982d557c6fdde620de0e0d9e5f66d3a --- /dev/null +++ b/ratio_proportion_change3/finetuning/train_in.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b935dcf7dbbe3ad66c2616ae3e6c342d9d1b162c4931c7a291386c5ce609ce0 +size 1656785 diff --git a/ratio_proportion_change3/finetuning/train_in_info.txt b/ratio_proportion_change3/finetuning/train_in_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..e18de77b52241dcb85529a4c417d0ec34aa4b111 --- /dev/null +++ b/ratio_proportion_change3/finetuning/train_in_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5663b5706417ba65ec10abecf405f5644dfa637683fe1198ea937b8838cba6a +size 2411977 diff --git a/ratio_proportion_change3/finetuning/train_in_label.txt b/ratio_proportion_change3/finetuning/train_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..11fe0e0c1327693b744b63abc79378705e537dc7 --- /dev/null +++ b/ratio_proportion_change3/finetuning/train_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e38fd99af6313174626b81cad3f5a6b6e88711f9f66f57cb5c3b0e6bc2e8b4c +size 17202 diff --git a/ratio_proportion_change3/finetuning/train_info.txt b/ratio_proportion_change3/finetuning/train_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..0bc4c40110cd31abc4a8fc3d2b5c81de801e407c --- /dev/null +++ b/ratio_proportion_change3/finetuning/train_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9968e038b75a633b4957602e37d57b7c0cb561f9ae3c2b17ad0f9eb48b554c21 +size 1080190 diff --git a/ratio_proportion_change3/finetuning/train_label.txt b/ratio_proportion_change3/finetuning/train_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..07f9f38562fc30212486c05470c3a37d49a5d0d1 --- /dev/null +++ b/ratio_proportion_change3/finetuning/train_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de505197183cefe6a1c5ff4f5cd8e07dc14ed1b601951d7c3e02947d603e58c6 +size 8932 diff --git a/ratio_proportion_change3/finetuning/trainr_in_label.txt b/ratio_proportion_change3/finetuning/trainr_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..870fc330d350eee3fb99e9d94288de025debf133 --- /dev/null +++ b/ratio_proportion_change3/finetuning/trainr_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e450636dcb476a258439c94249f1078e9186bfe00d8e70da7b9c339f4f728c +size 129011 diff --git a/ratio_proportion_change3/finetuning/trainr_label.txt b/ratio_proportion_change3/finetuning/trainr_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..0cda4a77f5b3e3d9cff3c27daa3dc17e2cebb24c --- /dev/null +++ b/ratio_proportion_change3/finetuning/trainr_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0294122c85237764e51d69d2efc5233d2c3a0d1027b31b4f510ca68bd6e46bc1 +size 61542 diff --git a/ratio_proportion_change3/logs/masked/log_test_10per_finetuned.txt b/ratio_proportion_change3/logs/masked/log_test_10per_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..ac28a633ae750ac5155af7b2623e2e088902d347 --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_test_10per_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d42d75c9a38be298f8ee1f022a544fe49804b72979a734b42aea08f7b31fb52 +size 671476 diff --git a/ratio_proportion_change3/logs/masked/log_test_FS_finetuned.txt b/ratio_proportion_change3/logs/masked/log_test_FS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..c62ec350125cf6a4e8459b79c091e1b258d09847 --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_test_FS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858fc5350a9bf0c75d46b8af1dc3b0f310bab1a0afa92ca8bca1e829b57d0b73 +size 149839 diff --git a/ratio_proportion_change3/logs/masked/log_test_IS_finetuned.txt b/ratio_proportion_change3/logs/masked/log_test_IS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..a030572e6f071ee2bf4c7061409ac4e35a2e1695 --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_test_IS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c66fcb29fc9f3d92bed511d4a91530ad79a13860b93e418f0b8c6c1be0e54169 +size 149828 diff --git a/ratio_proportion_change3/logs/masked/log_test_pretrained.txt b/ratio_proportion_change3/logs/masked/log_test_pretrained.txt new file mode 100644 index 0000000000000000000000000000000000000000..96ef8ec7399cde5cfe851868a48bf1ad4ac84a16 --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_test_pretrained.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1193ce0490717b442303f51da68869c6419f461ce5044b5a275b40e7bfb368 +size 1055582 diff --git a/ratio_proportion_change3/logs/masked/log_train_10per_finetuned.txt b/ratio_proportion_change3/logs/masked/log_train_10per_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..96adf73f12582133505eb98d3ad85b60d3258429 --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_train_10per_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf58b6b3ba0d0a9562cfd510ce1a7bff20a4bb0ee1faa907397314333d26dcd2 +size 88900 diff --git a/ratio_proportion_change3/logs/masked/log_train_FS_finetuned.txt b/ratio_proportion_change3/logs/masked/log_train_FS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..80bdb14612c274a7ee0dcda45d38635b8e6af820 --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_train_FS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92c0a8722c7b21b36f5028493692ccf32b473c20d3f6027d54e5fd822960432 +size 167286 diff --git a/ratio_proportion_change3/logs/masked/log_train_IS_finetuned.txt b/ratio_proportion_change3/logs/masked/log_train_IS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..1404f9aacd88fba4c6506a07d343b549db9c2457 --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_train_IS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644994580015b35979dce25d0e2b3be7b6ef6d02193a1b0ea6d10411412c5495 +size 167148 diff --git a/ratio_proportion_change3/logs/masked/log_train_pretrained.txt b/ratio_proportion_change3/logs/masked/log_train_pretrained.txt new file mode 100644 index 0000000000000000000000000000000000000000..9b96c790c473d1caafcff379c6b8b8a90a0e7dbc --- /dev/null +++ b/ratio_proportion_change3/logs/masked/log_train_pretrained.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6095cbd3be17925bc64b05902281c01c2c3255df63ea2e5cd48b5d402c06033b +size 4116343 diff --git a/ratio_proportion_change3/output/FS/train.txt b/ratio_proportion_change3/output/FS/train.txt new file mode 100644 index 0000000000000000000000000000000000000000..910bdec20152d98f19d6fbe7419132e273f8f7b3 --- /dev/null +++ b/ratio_proportion_change3/output/FS/train.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:367628f1b9aa5047a07d5eb6e574e166e12d533d18a1634045424736bff9cc42 +size 1699339 diff --git a/ratio_proportion_change3/output/FS/train_label.txt b/ratio_proportion_change3/output/FS/train_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..3c1853bff8458868504674088b69ce7c2b86019c --- /dev/null +++ b/ratio_proportion_change3/output/FS/train_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1eeaf1d96b6010aec2db568d20170e79d5e53bb790e250074f877931ab23d3 +size 20636 diff --git a/ratio_proportion_change3/output/IS/train.txt b/ratio_proportion_change3/output/IS/train.txt new file mode 100644 index 0000000000000000000000000000000000000000..775ad566024d1bc61c35366918571a3003248a5d --- /dev/null +++ b/ratio_proportion_change3/output/IS/train.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc804d5d3a54d0cbe69b295464378609916a2c5b2a8c0696757d20be185e1427 +size 1361007 diff --git a/ratio_proportion_change3/output/IS/train_label.txt b/ratio_proportion_change3/output/IS/train_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..655fe4b6852b592f3fa90b353baa24596ef2b869 --- /dev/null +++ b/ratio_proportion_change3/output/IS/train_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a394ba9e86a56b82422fd9b7a7212bde72eae95fbd8d899e0e9fb9c21132a605 +size 20636 diff --git a/ratio_proportion_change3/output/correctness/bert_fine_tuned.model.ep48 b/ratio_proportion_change3/output/correctness/bert_fine_tuned.model.ep48 new file mode 100644 index 0000000000000000000000000000000000000000..f7d920b0d88a07b81a262a4a0c440270d9171d6e Binary files /dev/null and b/ratio_proportion_change3/output/correctness/bert_fine_tuned.model.ep48 differ diff --git a/ratio_proportion_change3/output/correctness/test.txt b/ratio_proportion_change3/output/correctness/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b3c77a02c95f814a6e81ca0fcdc5e80176c1a72 --- /dev/null +++ b/ratio_proportion_change3/output/correctness/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0faf7af6b63c26cb29b586e087c84881365a94b22d71f1a8587bfa979f2d5794 +size 6253326 diff --git a/ratio_proportion_change3/output/correctness/test_label.txt b/ratio_proportion_change3/output/correctness/test_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..19aa0e87c7f6405867b72b7ae2cc70de3538dfb2 --- /dev/null +++ b/ratio_proportion_change3/output/correctness/test_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708d181754296d2bbbe56ce509eb896ca69bd2d7a418839c0a09836bf1c31541 +size 75023 diff --git a/ratio_proportion_change3/output/effectiveness/bert_fine_tuned.model.ep28 b/ratio_proportion_change3/output/effectiveness/bert_fine_tuned.model.ep28 new file mode 100644 index 0000000000000000000000000000000000000000..e2fbc7afeb7d7ca93d668cc00b1da0052ed2b994 Binary files /dev/null and b/ratio_proportion_change3/output/effectiveness/bert_fine_tuned.model.ep28 differ diff --git a/ratio_proportion_change3/output/effectiveness/test.txt b/ratio_proportion_change3/output/effectiveness/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..b35cbd8dd6f390d507d02f476c57ea478faa3fa6 --- /dev/null +++ b/ratio_proportion_change3/output/effectiveness/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea029980699e1390948a7f6d7290d00d4aa20d0939110f901a92bd1837abc640 +size 6961290 diff --git a/ratio_proportion_change3/output/effectiveness/test_label.txt b/ratio_proportion_change3/output/effectiveness/test_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..76034c44b429cdf8d4afe9bfb0855994c2ccf6bd --- /dev/null +++ b/ratio_proportion_change3/output/effectiveness/test_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b76d7d5a4866bb7cc67afca90c0483a95855c270d4f819b6da4b5061695bb7 +size 82767 diff --git a/ratio_proportion_change3/pretraining/pretrain.txt b/ratio_proportion_change3/pretraining/pretrain.txt new file mode 100644 index 0000000000000000000000000000000000000000..bf79e6e715bb591a7c55c3c25d726721f9449297 --- /dev/null +++ b/ratio_proportion_change3/pretraining/pretrain.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46ab0c401e5b7ad4c4b4eae74aa0fb5de78971bc18a1343a0b9ab8c462174d5 +size 6129416 diff --git a/ratio_proportion_change3/pretraining/pretrain_info.txt b/ratio_proportion_change3/pretraining/pretrain_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d7802734140a36947b4e1f06c6fe12d21a5fde4 --- /dev/null +++ b/ratio_proportion_change3/pretraining/pretrain_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f79a19d8b9deb82f47b1d186c233121f450fc6b2df02814089f5ebf32509235 +size 8482733 diff --git a/ratio_proportion_change3/pretraining/test.txt b/ratio_proportion_change3/pretraining/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..e22b3ce3d8305033355c274b34dfa408c4ad9408 --- /dev/null +++ b/ratio_proportion_change3/pretraining/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f7a7a591605b0a2b68c606c89056bf8cca80f18962c981d7eab6d7c1d62132 +size 1522727 diff --git a/ratio_proportion_change3/pretraining/test_info.txt b/ratio_proportion_change3/pretraining/test_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..bf631ecd101cb5ecccf3c151a37da1c122e481a9 --- /dev/null +++ b/ratio_proportion_change3/pretraining/test_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbece856f236233d2ed08ce4fe4719403c78a984b7a88e9cc3a783ca57653f43 +size 2107116 diff --git a/ratio_proportion_change3/pretraining/vocab.txt b/ratio_proportion_change3/pretraining/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..49212739d9bbfdd52bcfecd5d4236339e042e932 --- /dev/null +++ b/ratio_proportion_change3/pretraining/vocab.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90138f456243cf445e9762a870f4534f61acd4f463206d6a56cf51fa83376202 +size 282 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/asd.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/asd.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f6600da6d9595fdb2a10f5939db964037a05fe4 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/asd.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ca1bb6c7e907d06dafe4687e579fce76b37e4e93b7605022da52e6ccc26fd2 +size 5 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..f8c5ce63294541993a644d33e76298784970c1b7 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972269732483bcdd558fb2666b7ff0a8ed8549c8a4356a4cb7fef0117802a63a +size 8672284 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test_info.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..754f4ee15adcd985db6da45ead4c5671184b7734 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8c47b2bbfe3885cb6c3cac4b0bb51aea71aa7d7dfb6f11b6b6228ebb4d9868 +size 40747929 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test_label.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..fd39de187be52ddb3cb91b6ab5df56c598007cc3 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6649695097e9cfd58df18de69ba3e6a30a8cc961e8a8a0d3b7e0334c2f3adbe +size 82732 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..99c8c7241334992c8db0ec82a04cf410e4d17ef1 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e738c87fcbcc3e0362199ea2b7f9ef06093fb3f9e7a5f8c5ab828602e52230f9 +size 16005023 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test_info.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..377c95685a35dcb2e4a8ce20efd8015d8d507db3 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4862f5c282efdfa49e13ed0f6cb344abcb7ae07fdfba535d48193bb8a3c1ed +size 81939614 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test_label.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..5f60066ca0dee0d8e31b6cabbd03be9b41c9f2dd --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/lowGRschoolAll/test_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1030da905863033698eb6dc4e206eaccc058ee802c9d8975f8710d37c9926699 +size 145592 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/logs/highGRschool10/asd.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/logs/highGRschool10/asd.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f6600da6d9595fdb2a10f5939db964037a05fe4 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/logs/highGRschool10/asd.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ca1bb6c7e907d06dafe4687e579fce76b37e4e93b7605022da52e6ccc26fd2 +size 5 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/output/highGRschool10/asd.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/output/highGRschool10/asd.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f6600da6d9595fdb2a10f5939db964037a05fe4 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/output/highGRschool10/asd.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ca1bb6c7e907d06dafe4687e579fce76b37e4e93b7605022da52e6ccc26fd2 +size 5 diff --git a/ratio_proportion_change3_2223/sch_largest_100-coded/pretraining/vocab.txt b/ratio_proportion_change3_2223/sch_largest_100-coded/pretraining/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c27782cdd9d7972cc5424e7727aa5e0076e16b4 --- /dev/null +++ b/ratio_proportion_change3_2223/sch_largest_100-coded/pretraining/vocab.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae557bc258b0706474509d0d2b17560e5e9eb2e3484a9035182d7debf8b044b +size 531 diff --git a/ratio_proportion_change4/finetuning/test.txt b/ratio_proportion_change4/finetuning/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..48ffe8f89008dbea4c884860e6507ef3beffb036 --- /dev/null +++ b/ratio_proportion_change4/finetuning/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4debb8796a74a6d49ffcc46854c1e22fb56200a7e79dcb08a4420a222a7ca3 +size 8841095 diff --git a/ratio_proportion_change4/finetuning/test_in.txt b/ratio_proportion_change4/finetuning/test_in.txt new file mode 100644 index 0000000000000000000000000000000000000000..19a3c05aa25432e43bbd6b3346f25e8abc0bc581 --- /dev/null +++ b/ratio_proportion_change4/finetuning/test_in.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e4bd4f4caa02941171c116f48e66c6e9a3ab21d65d799a725a9a9a17f1a795 +size 1213036 diff --git a/ratio_proportion_change4/finetuning/test_in_info.txt b/ratio_proportion_change4/finetuning/test_in_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..e86cc380cff9e7b2eb907be4c64eaa1c710ca84e --- /dev/null +++ b/ratio_proportion_change4/finetuning/test_in_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b161f08199cb65e3e8ff30d773c814f2cedb21c12cdc7c1cdb43827e4e87e8 +size 1426614 diff --git a/ratio_proportion_change4/finetuning/test_in_label.txt b/ratio_proportion_change4/finetuning/test_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..9fa96e7c4aad806fb1e8e6ac08d42076620ef63f --- /dev/null +++ b/ratio_proportion_change4/finetuning/test_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5747975ad58adc95334a603e808a117e53cb0ef87937143b4f73d76d5df525a4 +size 17226 diff --git a/ratio_proportion_change4/finetuning/test_label.txt b/ratio_proportion_change4/finetuning/test_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..c3cd2c9a47fe218aa8644e3e6d264febe3f39a43 --- /dev/null +++ b/ratio_proportion_change4/finetuning/test_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc6c897e212b8668a96331fde004e613ba07aca9cf637a5dc22b2c5d926425e +size 113806 diff --git a/ratio_proportion_change4/finetuning/testr_in_label.txt b/ratio_proportion_change4/finetuning/testr_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..be4e7a5f40da197cc75c9bdf3b1bfd8d8469b2ee --- /dev/null +++ b/ratio_proportion_change4/finetuning/testr_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde678892c3e0c8c56a167c6e88e33cbbad4e7b5ea5efde2ea5dfe7290e58193 +size 86277 diff --git a/ratio_proportion_change4/finetuning/testr_label.txt b/ratio_proportion_change4/finetuning/testr_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e004418fb3504a98283b89420afb74a0416acbd --- /dev/null +++ b/ratio_proportion_change4/finetuning/testr_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c589ca5677c844bbfb1926ec01a755f01258369f70183ad50dc36027b3993a6c +size 709352 diff --git a/ratio_proportion_change4/finetuning/train.txt b/ratio_proportion_change4/finetuning/train.txt new file mode 100644 index 0000000000000000000000000000000000000000..41190f4879d1be2161ac3390967c391a8d2bfc47 --- /dev/null +++ b/ratio_proportion_change4/finetuning/train.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efd8e291eaba5c3010b4c37eb860cdbda3a4bac66feedeb3367bde194695ae99 +size 965211 diff --git a/ratio_proportion_change4/finetuning/train_in.txt b/ratio_proportion_change4/finetuning/train_in.txt new file mode 100644 index 0000000000000000000000000000000000000000..26bdac82423bde8845009dc3d3f0788ad58fc866 --- /dev/null +++ b/ratio_proportion_change4/finetuning/train_in.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baaf345285bae8e2e6c9c11798b92f9c8ef0d042850212015d030b7f50983f17 +size 1314995 diff --git a/ratio_proportion_change4/finetuning/train_in_info.txt b/ratio_proportion_change4/finetuning/train_in_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b4bfc7c3e9794e8b2835f63434f51e3086ef4c2 --- /dev/null +++ b/ratio_proportion_change4/finetuning/train_in_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:698a1c91dcdcfd2bf058a0b64e991869b87a09e79964d5dcc2454fe384e56d36 +size 1571941 diff --git a/ratio_proportion_change4/finetuning/train_in_label.txt b/ratio_proportion_change4/finetuning/train_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a69df837b16c7eeed33278d0d76570f47893706 --- /dev/null +++ b/ratio_proportion_change4/finetuning/train_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbbcaa637f8db6a210b7c1083dbb91203d2ce0ab49bb3adcb93061b212f2662 +size 17226 diff --git a/ratio_proportion_change4/finetuning/train_info.txt b/ratio_proportion_change4/finetuning/train_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..3808a5ba59b2274a06da8de37569e53a8045f16e --- /dev/null +++ b/ratio_proportion_change4/finetuning/train_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b29ca8484b089754d42e5c9266a3bfbeae61a0bedb2a7ccde8cae8e88288246 +size 1193160 diff --git a/ratio_proportion_change4/finetuning/train_label.txt b/ratio_proportion_change4/finetuning/train_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..eaa1c1a4c997496931c8329bd54c44c00c84aba7 --- /dev/null +++ b/ratio_proportion_change4/finetuning/train_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7cf0340edb0a02431ff0b5771a800ceab6b0251903f8b5e6b47e2181ef43a8 +size 12498 diff --git a/ratio_proportion_change4/finetuning/trainr_in_label.txt b/ratio_proportion_change4/finetuning/trainr_in_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..f91b791664e6f914009b233d113f363c3f94de15 --- /dev/null +++ b/ratio_proportion_change4/finetuning/trainr_in_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76556ffe9188b04132a2540870003b5811688cd501e97a6c1102b824ebf54238 +size 101930 diff --git a/ratio_proportion_change4/finetuning/trainr_label.txt b/ratio_proportion_change4/finetuning/trainr_label.txt new file mode 100644 index 0000000000000000000000000000000000000000..0027b39e22bea45a01a708ac1fa61313ed433ca4 --- /dev/null +++ b/ratio_proportion_change4/finetuning/trainr_label.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e7c8cfbef6e774ce82290ab19eb39a02224b21cb40e7481579d613bd59171d +size 77666 diff --git a/ratio_proportion_change4/logs/masked/log_test_10per_finetuned.txt b/ratio_proportion_change4/logs/masked/log_test_10per_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..2ad0c7eb6ce4f0fd00b0a2c53f1e3da20166fdd4 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_test_10per_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbec835815c00ad436513dc9dd74948a315d2837f9681f5288a1157465c6dd97 +size 960638 diff --git a/ratio_proportion_change4/logs/masked/log_test_FS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_test_FS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec5d85ad974b55011484704db0795874297d8ba8 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_test_FS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761a9558d3313c8332fea8635063a96918f62d9cfb37e53d4d45bafe9ae0a578 +size 150366 diff --git a/ratio_proportion_change4/logs/masked/log_test_IS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_test_IS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..4a472f398fe68dd8a9f2c98046121264be3f457b --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_test_IS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cc7c96a18f023086090a54b0df4c029fd6d63cc2e914cd376ee622a138078e2 +size 150783 diff --git a/ratio_proportion_change4/logs/masked/log_test_in_FS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_test_in_FS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..937b9b755957d4e33acf9f2eaf8c485d01ce01c9 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_test_in_FS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6066f8af118adeb8ec9a5f029a6c2f391624fd438cb6eb9ba60cc9e7b40d3ffc +size 169612 diff --git a/ratio_proportion_change4/logs/masked/log_test_in_IS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_test_in_IS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..a306860a28d514e154b40fc1365f32c18cd592e7 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_test_in_IS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e69fd061587d21e9945e2ccfa238c56de2d984836673bcf948ee666dc3b3a0fb +size 169400 diff --git a/ratio_proportion_change4/logs/masked/log_test_pretrained.txt b/ratio_proportion_change4/logs/masked/log_test_pretrained.txt new file mode 100644 index 0000000000000000000000000000000000000000..f6037673d3997ad6e4b0cb95d693901c0e3edefb --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_test_pretrained.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:377cf3d318896f7530c017abafc52d10439fa4c14e53d6af9d06e7ef0c82c40b +size 1495316 diff --git a/ratio_proportion_change4/logs/masked/log_test_reg_finetuned.txt b/ratio_proportion_change4/logs/masked/log_test_reg_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..ac25d75387d5e9035991c28cbc8b8f70618d20bb --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_test_reg_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba74c992ba7c588523b811031b96054afb388b031ec5a7e2bd09341aebc28cd +size 148365 diff --git a/ratio_proportion_change4/logs/masked/log_train_10per_finetuned.txt b/ratio_proportion_change4/logs/masked/log_train_10per_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8d28b7ae33041c396df0c0ae3e8311be309f4ad --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_train_10per_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72376b8865d2e7fdf6e158264d5abea885814e49836a5e2c6debfa8e0c99d74 +size 125561 diff --git a/ratio_proportion_change4/logs/masked/log_train_FS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_train_FS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..2edf25401948e4a87d973df1438881d65f70db65 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_train_FS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb8b7cadb98b092e4f0f58b1e1a69b4e2dae3c384c057e4b4256c4e1d4a131d +size 167554 diff --git a/ratio_proportion_change4/logs/masked/log_train_IS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_train_IS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..ab1a9715517b7a882959b3ff6dd3e47fe8e24ed7 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_train_IS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac493ed580541d018cd1f9ddfa3c07dce51e98f1d2cf3a0bb0454e872762b8f +size 167752 diff --git a/ratio_proportion_change4/logs/masked/log_train_in_FS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_train_in_FS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b3648058c763ceca821d260a000589e5542c310 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_train_in_FS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5478c9a1d23dc7ffc05205b31d2ab981c1e8b8d31edf5d4fbd524ecd66d4afd7 +size 170932 diff --git a/ratio_proportion_change4/logs/masked/log_train_in_IS_finetuned.txt b/ratio_proportion_change4/logs/masked/log_train_in_IS_finetuned.txt new file mode 100644 index 0000000000000000000000000000000000000000..6f68ae7c300e038c113b6cc313655dbe00ab1844 --- /dev/null +++ b/ratio_proportion_change4/logs/masked/log_train_in_IS_finetuned.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c542367862d632b160414d7508e144789c5c9522cd26a48a48dfab1223f3556f +size 170642 diff --git a/ratio_proportion_change4/output/bert_fine_tuned.FS.model.ep30 b/ratio_proportion_change4/output/bert_fine_tuned.FS.model.ep30 new file mode 100644 index 0000000000000000000000000000000000000000..9e2ff544695bb8d406476a9bc355ee1eb06fae37 Binary files /dev/null and b/ratio_proportion_change4/output/bert_fine_tuned.FS.model.ep30 differ diff --git a/ratio_proportion_change4/output/config.json b/ratio_proportion_change4/output/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf63756104577689bc93b77470808199ebaa0aea --- /dev/null +++ b/ratio_proportion_change4/output/config.json @@ -0,0 +1,23 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "num_labels": 2, + "output_attentions": false, + "output_hidden_states": false, + "torch_dtype": "float32", + "transformers_version": "4.5.1", + "vocab_size": 30522 + } + \ No newline at end of file diff --git a/ratio_proportion_change4/output/vocab.txt b/ratio_proportion_change4/output/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..03d7ae12d61d90449c2ed602a58b00bd5df3a983 --- /dev/null +++ b/ratio_proportion_change4/output/vocab.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90f1b8392e4d1127a4012d3c3e52ccb7dc7ca87f02e7ca70f5629664e343cee +size 334 diff --git a/ratio_proportion_change4/pretraining/pretrain.txt b/ratio_proportion_change4/pretraining/pretrain.txt new file mode 100644 index 0000000000000000000000000000000000000000..e9c38720097ab73327293476bae1e669c0e1f84f --- /dev/null +++ b/ratio_proportion_change4/pretraining/pretrain.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bcd93ff4e53ef73f20cccb68172d8c3648106f61a46825860b5699a32bfc28 +size 7841590 diff --git a/ratio_proportion_change4/pretraining/pretrain_info.txt b/ratio_proportion_change4/pretraining/pretrain_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..7b0f08a7f546f9ae23776aecedebbd5fb5093c11 --- /dev/null +++ b/ratio_proportion_change4/pretraining/pretrain_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f659ad39fdae528418eb1cc19b0c2e29f5df1ffb73ef1fefaf3622f3468e2a5f +size 9686801 diff --git a/ratio_proportion_change4/pretraining/test.txt b/ratio_proportion_change4/pretraining/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..5543df285a416444b580a033bfeb632321a51e2e --- /dev/null +++ b/ratio_proportion_change4/pretraining/test.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdade8317776eb385de2123e594dbf929c310eff4a94433f7f504f35865b587b +size 1964716 diff --git a/ratio_proportion_change4/pretraining/test_info.txt b/ratio_proportion_change4/pretraining/test_info.txt new file mode 100644 index 0000000000000000000000000000000000000000..5887a8c5cdbc32c40b58c9ed7c42a235948fc1fb --- /dev/null +++ b/ratio_proportion_change4/pretraining/test_info.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:061558198a1e8998a08a9382db683fcc05114662d4729881f5ca2e14947c8d2d +size 2424826 diff --git a/ratio_proportion_change4/pretraining/vocab.txt b/ratio_proportion_change4/pretraining/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f0319af08a56ce42167da5c023a6a5ab35f9a0a --- /dev/null +++ b/ratio_proportion_change4/pretraining/vocab.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4c7d6ec0a0c2f91e53f2de9f4f2c6016f63c228a8f92c96a60b003da122f25 +size 333 diff --git a/recalibration.py b/recalibration.py new file mode 100644 index 0000000000000000000000000000000000000000..3450a3ad37997367895619744da6c38aa1ba3228 --- /dev/null +++ b/recalibration.py @@ -0,0 +1,82 @@ +import torch +from torch import nn, optim +from torch.nn import functional as F + +import metrics + +class ModelWithTemperature(nn.Module): + """ + A thin decorator, which wraps a model with temperature scaling + model (nn.Module): + A classification neural network + NB: Output of the neural network should be the classification logits, + NOT the softmax (or log softmax)! + """ + def __init__(self, model, device="cpu"): + super(ModelWithTemperature, self).__init__() + self.model = model + self.device = torch.device(device) + self.temperature = nn.Parameter(torch.ones(1) * 1.5) + + def forward(self, input): + logits = self.model(input["input"], input["segment_label"], input["feat"]) + return self.temperature_scale(logits) + + def temperature_scale(self, logits): + """ + Perform temperature scaling on logits + """ + # Expand temperature to match the size of logits + temperature = self.temperature.unsqueeze(1).expand(logits.size(0), logits.size(1)).to(self.device) + return logits / temperature + + # This function probably should live outside of this class, but whatever + def set_temperature(self, valid_loader): + """ + Tune the tempearature of the model (using the validation set). + We're going to set it to optimize NLL. + valid_loader (DataLoader): validation set loader + """ + #self.cuda() + nll_criterion = nn.CrossEntropyLoss() + ece_criterion = metrics.ECELoss() + + # First: collect all the logits and labels for the validation set + logits_list = [] + labels_list = [] + with torch.no_grad(): + for input, label in valid_loader: + # print("Input = ", input["input"]) + # print("Input = ", input["segment_label"]) + # print("Input = ", input["feat"]) + # input = input + logits = self.model(input["input"].to(self.device), input["segment_label"].to(self.device), input["feat"].to(self.device)) + logits_list.append(logits) + labels_list.append(label) + logits = torch.cat(logits_list).to(self.device) + labels = torch.cat(labels_list).to(self.device) + + # Calculate NLL and ECE before temperature scaling + before_temperature_nll = nll_criterion(logits, labels).item() + before_temperature_ece = ece_criterion.loss(logits.cpu().numpy(),labels.cpu().numpy(),15) + #before_temperature_ece = ece_criterion(logits, labels).item() + #ece_2 = ece_criterion_2.loss(logits,labels) + print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece)) + #print(ece_2) + # Next: optimize the temperature w.r.t. NLL + optimizer = optim.LBFGS([self.temperature], lr=0.005, max_iter=1000) + + def eval(): + loss = nll_criterion(self.temperature_scale(logits.to(self.device)), labels.to(self.device)) + loss.backward() + return loss + optimizer.step(eval) + + # Calculate NLL and ECE after temperature scaling + after_temperature_nll = nll_criterion(self.temperature_scale(logits), labels).item() + after_temperature_ece = ece_criterion.loss(self.temperature_scale(logits).detach().cpu().numpy(),labels.cpu().numpy(),15) + #after_temperature_ece = ece_criterion(self.temperature_scale(logits), labels).item() + print('Optimal temperature: %.3f' % self.temperature.item()) + print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece)) + + return self diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9ca299bc5239d47a73db854f6c8ffa37037d6fd1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ea498f99ec4a95ef1691140933d277cdf978fb57675f8793dec3272a6a8dad +size 3324 diff --git a/result.txt b/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..6aa9bc704a44a36eb56838792e1a597299c3fe99 --- /dev/null +++ b/result.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f2694ddbbb2f05f37ab35ecb1f929b402dcd9925dca1e26da24fffaefabaf82 +size 219 diff --git a/roc_data.pkl b/roc_data.pkl new file mode 100644 index 0000000000000000000000000000000000000000..44fb1780a36c9d1ff6f92383bc3de94d03f0c3da --- /dev/null +++ b/roc_data.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c022a6b5eaa8a1a3c8cb6f10578afc01f92a1f9800ec4ebe1ab78b22b3ddd988 +size 10685 diff --git a/school_grduation_rate.pkl b/school_grduation_rate.pkl new file mode 100644 index 0000000000000000000000000000000000000000..02915dd326e4ca0a2c4afa2bca3c3db9de5130fb --- /dev/null +++ b/school_grduation_rate.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0c99dd8fc601de1fc8f4af5880bf71b7198c09bf0d016a880b02043e0b3d03 +size 18356 diff --git a/selected_rows.txt b/selected_rows.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7cdd85c55d0fc24b5eca1d7fc65e06436924a21 --- /dev/null +++ b/selected_rows.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70f30063db1faf8376fb2f7371f935f508416688a4d58bea25bee7e2e32e3d5 +size 278475 diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6cfa8f25242e2d8c4e727197948542473881848a --- /dev/null +++ b/src/README.md @@ -0,0 +1,13 @@ +--- +title: Knn Test Gradio +emoji: 📊 +colorFrom: green +colorTo: blue +sdk: gradio +sdk_version: 4.31.4 +app_file: app.py +pinned: false +license: mit +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/src/__pycache__/attention.cpython-312.pyc b/src/__pycache__/attention.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4bcc5d6902277248108005f9874acdde43c56f2 Binary files /dev/null and b/src/__pycache__/attention.cpython-312.pyc differ diff --git a/src/__pycache__/bert.cpython-312.pyc b/src/__pycache__/bert.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a13a28d15ca2a7b7ce56ff99ffc7beba7b84a1c8 Binary files /dev/null and b/src/__pycache__/bert.cpython-312.pyc differ diff --git a/src/__pycache__/classifier_model.cpython-312.pyc b/src/__pycache__/classifier_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cfc9bae247dbc92a256bcc2d0799354c8926f34 Binary files /dev/null and b/src/__pycache__/classifier_model.cpython-312.pyc differ diff --git a/src/__pycache__/dataset.cpython-312.pyc b/src/__pycache__/dataset.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eaee17018e607b6d34507dcfc8633028184aa8c2 Binary files /dev/null and b/src/__pycache__/dataset.cpython-312.pyc differ diff --git a/src/__pycache__/embedding.cpython-312.pyc b/src/__pycache__/embedding.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..638e8c98d76e7a5a6bca7d26f158bd60e9e17daf Binary files /dev/null and b/src/__pycache__/embedding.cpython-312.pyc differ diff --git a/src/__pycache__/metrics.cpython-312.pyc b/src/__pycache__/metrics.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..914fa199709edf0cce27743e29cfbf8e71b202ec Binary files /dev/null and b/src/__pycache__/metrics.cpython-312.pyc differ diff --git a/src/__pycache__/optim_schedule.cpython-312.pyc b/src/__pycache__/optim_schedule.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dcc7a5973b04f288542cfea136d25c1f1353da3 Binary files /dev/null and b/src/__pycache__/optim_schedule.cpython-312.pyc differ diff --git a/src/__pycache__/pretrainer.cpython-312.pyc b/src/__pycache__/pretrainer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3f579ab9f46314604517031cd3866fb085a588c Binary files /dev/null and b/src/__pycache__/pretrainer.cpython-312.pyc differ diff --git a/src/__pycache__/seq_model.cpython-312.pyc b/src/__pycache__/seq_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4bf4ea8b5dbdbcde4fddd00549abc6f982122135 Binary files /dev/null and b/src/__pycache__/seq_model.cpython-312.pyc differ diff --git a/src/__pycache__/transformer.cpython-312.pyc b/src/__pycache__/transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96e376f548726a16d456f163aca2d2c1603a9af0 Binary files /dev/null and b/src/__pycache__/transformer.cpython-312.pyc differ diff --git a/src/__pycache__/transformer_component.cpython-312.pyc b/src/__pycache__/transformer_component.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce586be3c4ba7ec6d2c72f5c5e8633e97bddbfd9 Binary files /dev/null and b/src/__pycache__/transformer_component.cpython-312.pyc differ diff --git a/src/__pycache__/visualization.cpython-312.pyc b/src/__pycache__/visualization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96b8ce9482853b081383b0825c17668daa3f8dc1 Binary files /dev/null and b/src/__pycache__/visualization.cpython-312.pyc differ diff --git a/src/__pycache__/vocab.cpython-312.pyc b/src/__pycache__/vocab.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..392da44abb88420863050982d0800099d9569dc4 Binary files /dev/null and b/src/__pycache__/vocab.cpython-312.pyc differ diff --git a/src/attention.py b/src/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..acd14383af10aa7cb5f94678c843e38df04ae44a --- /dev/null +++ b/src/attention.py @@ -0,0 +1,66 @@ +import torch.nn as nn +import torch.nn.functional as F +import torch + +import math +import pickle + +class Attention(nn.Module): + """ + Compute Scaled Dot Product Attention + """ + + def __init__(self): + super().__init__() + + def forward(self, query, key, value, mask=None, dropout=None): + d_k = query.size(-1) + scores = torch.matmul(query, key.transpose(-2, -1)) \ + / math.sqrt(d_k) + + if mask is not None: + scores = scores.masked_fill(mask == 0, -1e9) + + p_attn = F.softmax(scores, dim=-1) + + if dropout is not None: + p_attn = dropout(p_attn) + + return torch.matmul(p_attn, value), p_attn + + +class MultiHeadedAttention(nn.Module): + """ + Take in model size and number of heads. + """ + + def __init__(self, h, d_model, dropout=0.1): + super().__init__() + assert d_model % h == 0 + + # We assume d_v always equals d_k + self.d_k = d_model // h + self.h = h + + self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)]) + self.output_linear = nn.Linear(d_model, d_model) + self.attention = Attention() + self.dropout = nn.Dropout(p=dropout) + + def forward(self, query, key, value, mask=None): + # if mask is not None: + # # Same mask applied to all h heads. + # mask = mask.unsqueeze(1) + + nbatches = query.size(0) + + # 1) Do all the linear projections in batch from d_model => h x d_k + query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2) + for l, x in zip(self.linear_layers, (query, key, value))] + # 2) Apply attention on all the projected vectors in batch. + x, p_attn = self.attention(query, key, value, mask=mask, dropout=self.dropout) + + # 3) "Concat" using a view and apply a final linear. + x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k) + + return self.output_linear(x), p_attn diff --git a/src/bert.py b/src/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..53ee9fd6c9ea29a05c2476c23fb867c6b310700b --- /dev/null +++ b/src/bert.py @@ -0,0 +1,62 @@ +import torch.nn as nn +import torch + +from .transformer import TransformerBlock +from .embedding import BERTEmbedding + +class BERT(nn.Module): + """ + BERT model : Bidirectional Encoder Representations from Transformers. + """ + + def __init__(self, vocab_size, hidden=768, n_layers=12, attn_heads=12, dropout=0.1): + """ + :param vocab_size: vocab_size of total words + :param hidden: BERT model hidden size + :param n_layers: numbers of Transformer blocks(layers) + :param attn_heads: number of attention heads + :param dropout: dropout rate + """ + + super().__init__() + self.hidden = hidden + self.n_layers = n_layers + self.attn_heads = attn_heads + + # paper noted they used 4*hidden_size for ff_network_hidden_size + self.feed_forward_hidden = hidden * 4 + + # embedding for BERT, sum of positional, segment, token embeddings + self.embedding = BERTEmbedding(vocab_size=vocab_size, embed_size=hidden) + + # multi-layers transformer blocks, deep network + self.transformer_blocks = nn.ModuleList( + [TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)]) + # self.attention_values = [] + + def forward(self, x, segment_info): + # attention masking for padded token + # torch.ByteTensor([batch_size, 1, seq_len, seq_len) + + device = x.device + + masked = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1) + r,e,c = masked.shape + mask = torch.zeros((r, e, c), dtype=torch.bool).to(device=device) + + for i in range(r): + mask[i] = masked[i].T*masked[i] + mask = mask.unsqueeze(1) + # mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1) + + # print("bert mask: ", mask) + # embedding the indexed sequence to sequence of vectors + x = self.embedding(x, segment_info) + + # self.attention_values = [] + # running over multiple transformer blocks + for transformer in self.transformer_blocks: + x = transformer.forward(x, mask) + # self.attention_values.append(transformer.p_attn) + + return x diff --git a/src/bert_.py b/src/bert_.py new file mode 100644 index 0000000000000000000000000000000000000000..57e8d0e950f2526674a9faf7e7d692c1f0131dca --- /dev/null +++ b/src/bert_.py @@ -0,0 +1,355 @@ +import torch +from torch import nn +from torch.nn import functional as F +from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler +from keras.preprocessing.sequence import pad_sequences +from sklearn.model_selection import train_test_split +from transformers import BertTokenizer, BertConfig +from transformers import AdamW, BertForSequenceClassification, get_linear_schedule_with_warmup +from tqdm import tqdm, trange +import pandas as pd +import io +import numpy as np +import matplotlib.pyplot as plt +from torch.autograd.gradcheck import zero_gradients +import argparse +import random +from utils import * +import os + + +class ECE(nn.Module): + + def __init__(self, n_bins=15): + """ + n_bins (int): number of confidence interval bins + """ + super(ECE, self).__init__() + bin_boundaries = torch.linspace(0, 1, n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + def forward(self, logits, labels): + softmaxes = F.softmax(logits, dim=1) + confidences, predictions = torch.max(softmaxes, 1) + accuracies = predictions.eq(labels) + + ece = torch.zeros(1, device=logits.device) + for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): + # Calculated |confidence - accuracy| in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + accuracy_in_bin = accuracies[in_bin].float().mean() + avg_confidence_in_bin = confidences[in_bin].mean() + ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + + return ece + +# Function to calculate the accuracy of our predictions vs labels +def accurate_nb(preds, labels): + pred_flat = np.argmax(preds, axis=1).flatten() + labels_flat = labels.flatten() + return np.sum(pred_flat == labels_flat) + + +def set_seed(args): + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--lr", default=5e-5, type=float, help="The initial learning rate for Adam.") + parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.") + parser.add_argument("--eval_batch_size", default=128, type=int, help="Batch size for training.") + parser.add_argument("--epochs", default=10, type=int, help="Number of epochs for training.") + parser.add_argument("--seed", default=0, type=int, help="Number of epochs for training.") + parser.add_argument("--dataset", default='20news-15', type=str, help="dataset") + parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") + parser.add_argument('--saved_dataset', type=str, default='n', help='whether save the preprocessed pt file of the dataset') + + args = parser.parse_args() + print(args) + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + args.device = device + set_seed(args) + + ece_criterion = ECE().to(args.device) + + # load dataset + if args.saved_dataset == 'n': + train_sentences, val_sentences, test_sentences, train_labels, val_labels, test_labels = load_dataset(args.dataset) + + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + + train_input_ids = [] + val_input_ids = [] + test_input_ids = [] + + if args.dataset == '20news' or args.dataset == '20news-15': + MAX_LEN = 150 + else: + MAX_LEN = 256 + + for sent in train_sentences: + # `encode` will: + # (1) Tokenize the sentence. + # (2) Prepend the `[CLS]` token to the start. + # (3) Append the `[SEP]` token to the end. + # (4) Map tokens to their IDs. + encoded_sent = tokenizer.encode( + sent, # Sentence to encode. + add_special_tokens = True, # Add '[CLS]' and '[SEP]' + # This function also supports truncation and conversion + # to pytorch tensors, but we need to do padding, so we + # can't use these features :( . + max_length = MAX_LEN, # Truncate all sentences. + #return_tensors = 'pt', # Return pytorch tensors. + ) + # Add the encoded sentence to the list. + train_input_ids.append(encoded_sent) + + + for sent in val_sentences: + encoded_sent = tokenizer.encode( + sent, + add_special_tokens = True, + max_length = MAX_LEN, + ) + val_input_ids.append(encoded_sent) + + for sent in test_sentences: + encoded_sent = tokenizer.encode( + sent, + add_special_tokens = True, + max_length = MAX_LEN, + ) + test_input_ids.append(encoded_sent) + + # Pad our input tokens + train_input_ids = pad_sequences(train_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + val_input_ids = pad_sequences(val_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + test_input_ids = pad_sequences(test_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + # Create attention masks + train_attention_masks = [] + val_attention_masks = [] + test_attention_masks = [] + + # Create a mask of 1s for each token followed by 0s for padding + for seq in train_input_ids: + seq_mask = [float(i>0) for i in seq] + train_attention_masks.append(seq_mask) + for seq in val_input_ids: + seq_mask = [float(i>0) for i in seq] + val_attention_masks.append(seq_mask) + for seq in test_input_ids: + seq_mask = [float(i>0) for i in seq] + test_attention_masks.append(seq_mask) + + # Convert all of our data into torch tensors, the required datatype for our model + + train_inputs = torch.tensor(train_input_ids) + validation_inputs = torch.tensor(val_input_ids) + train_labels = torch.tensor(train_labels) + validation_labels = torch.tensor(val_labels) + train_masks = torch.tensor(train_attention_masks) + validation_masks = torch.tensor(val_attention_masks) + test_inputs = torch.tensor(test_input_ids) + test_labels = torch.tensor(test_labels) + test_masks = torch.tensor(test_attention_masks) + + # Create an iterator of our data with torch DataLoader. + train_data = TensorDataset(train_inputs, train_masks, train_labels) + validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels) + prediction_data = TensorDataset(test_inputs, test_masks, test_labels) + + dataset_dir = 'dataset/{}'.format(args.dataset) + if not os.path.exists(dataset_dir): + os.makedirs(dataset_dir) + + torch.save(train_data, dataset_dir+'/train.pt') + torch.save(validation_data, dataset_dir+'/val.pt') + torch.save(prediction_data, dataset_dir+'/test.pt') + + else: + dataset_dir = 'dataset/{}'.format(args.dataset) + train_data = torch.load(dataset_dir+'/train.pt') + validation_data = torch.load(dataset_dir+'/val.pt') + prediction_data = torch.load(dataset_dir+'/test.pt') + + + train_sampler = RandomSampler(train_data) + train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) + + validation_sampler = SequentialSampler(validation_data) + validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=args.eval_batch_size) + + prediction_sampler = SequentialSampler(prediction_data) + prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=args.eval_batch_size) + + + + if args.dataset == '20news': + num_labels = 20 + elif args.dataset == '20news-15': + num_labels = 15 + elif args.dataset == 'wos-100': + num_labels = 100 + elif args.dataset == 'wos': + num_labels = 134 + + print(num_labels) + + model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels= num_labels, output_hidden_states=True) + if torch.cuda.device_count() > 1: + print("Let's use", torch.cuda.device_count(), "GPUs!") + # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs + model = nn.DataParallel(model) + model.to(args.device) + +#######train model + + param_optimizer = list(model.named_parameters()) + no_decay = ['bias', 'gamma', 'beta'] + optimizer_grouped_parameters = [ + {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], + 'weight_decay_rate': args.weight_decay}, + {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], + 'weight_decay_rate': 0.0} + ] + + optimizer = torch.optim.Adam(optimizer_grouped_parameters, lr=args.lr, eps=1e-9) + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1) + t_total = len(train_dataloader) * args.epochs + # Store our loss and accuracy for plotting + + best_val = -np.inf + # trange is a tqdm wrapper around the normal python range + for epoch in trange(args.epochs, desc="Epoch"): + # Training + # Set our model to training mode (as opposed to evaluation mode) + # Tracking variables + tr_loss = 0 + nb_tr_examples, nb_tr_steps = 0, 0 + model.train() + + # Train the data for one epoch + for step, batch in enumerate(train_dataloader): + + # Add batch to GPU + batch = tuple(t.to(args.device) for t in batch) + # Unpack the inputs from our dataloader + b_input_ids, b_input_mask, b_labels = batch + + loss_ce = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)[0] + if torch.cuda.device_count() > 1: + loss_ce = loss_ce.mean() + loss_ce.backward() + + torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) + + # Update parameters and take a step using the computed gradient + optimizer.step() + + # Update tracking variables + tr_loss += loss_ce.item() + + nb_tr_examples += b_input_ids.size(0) + nb_tr_steps += 1 + + print("Train cross entropy loss: {}".format(tr_loss/nb_tr_steps)) + + + + # Validation + # Put model in evaluation mode to evaluate loss on the validation set + model.eval() + # Tracking variables + eval_accurate_nb = 0 + nb_eval_examples = 0 + logits_list = [] + labels_list = [] + + # Evaluate data for one epoch + for batch in validation_dataloader: + # Add batch to GPU + batch = tuple(t.to(args.device) for t in batch) + # Unpack the inputs from our dataloader + b_input_ids, b_input_mask, b_labels = batch + # Telling the model not to compute or store gradients, saving memory and speeding up validation + with torch.no_grad(): + # Forward pass, calculate logit predictions + logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + logits_list.append(logits) + labels_list.append(b_labels) + # Move logits and labels to CPU + logits = logits.detach().cpu().numpy() + label_ids = b_labels.to('cpu').numpy() + + tmp_eval_nb = accurate_nb(logits, label_ids) + + eval_accurate_nb += tmp_eval_nb + nb_eval_examples += label_ids.shape[0] + eval_accuracy = eval_accurate_nb/nb_eval_examples + print("Validation Accuracy: {}".format(eval_accuracy)) + scheduler.step(eval_accuracy) + + logits_ece = torch.cat(logits_list) + labels_ece = torch.cat(labels_list) + ece = ece_criterion(logits_ece, labels_ece).item() + print('ECE on val data: {}'.format(ece)) + + + if eval_accuracy > best_val: + dirname = '{}/BERT-base-{}'.format(args.dataset, args.seed) + + output_dir = './model_save/{}'.format(dirname) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + print("Saving model to %s" % output_dir) + model_to_save = model.module if hasattr(model, 'module') else model + model_to_save.save_pretrained(output_dir) + #tokenizer.save_pretrained(output_dir) + + best_val = eval_accuracy + +# ##### test model on test data + # Put model in evaluation mode + model.eval() + # Tracking variables + eval_accurate_nb = 0 + nb_test_examples = 0 + logits_list = [] + labels_list = [] + # Predict + for batch in prediction_dataloader: + # Add batch to GPU + batch = tuple(t.to(args.device) for t in batch) + # Unpack the inputs from our dataloader + b_input_ids, b_input_mask, b_labels = batch + # Telling the model not to compute or store gradients, saving memory and speeding up prediction + with torch.no_grad(): + # Forward pass, calculate logit predictions + logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + logits_list.append(logits) + labels_list.append(b_labels) + # Move logits and labels to CPU + logits = logits.detach().cpu().numpy() + label_ids = b_labels.to('cpu').numpy() + + tmp_eval_nb = accurate_nb(logits, label_ids) + eval_accurate_nb += tmp_eval_nb + nb_test_examples += label_ids.shape[0] + + print("Test Accuracy: {}".format(eval_accurate_nb/nb_test_examples)) + + logits_ece = torch.cat(logits_list) + labels_ece = torch.cat(labels_list) + ece = ece_criterion(logits_ece, labels_ece).item() + + print('ECE on test data: {}'.format(ece)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/classifier_model.py b/src/classifier_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b6fe2f3afbd3e19e72a672ba168a7de624744f10 --- /dev/null +++ b/src/classifier_model.py @@ -0,0 +1,51 @@ +import torch +import torch.nn as nn + +from .bert import BERT + + +class BERTForClassification(nn.Module): + """ + Fine-tune Task Classifier Model + """ + + def __init__(self, bert: BERT, vocab_size, n_labels): + """ + :param bert: BERT model which should be trained + :param vocab_size: total vocab size + :param n_labels: number of labels for the task + """ + super().__init__() + self.bert = bert + self.linear = nn.Linear(self.bert.hidden, n_labels) + + def forward(self, x, segment_label): + x = self.bert(x, segment_label) + return self.linear(x[:, 0]) + +class BERTForClassificationWithFeats(nn.Module): + """ + Fine-tune Task Classifier Model + BERT embeddings concatenated with features + """ + + def __init__(self, bert: BERT, n_labels, feat_size=9): + """ + :param bert: BERT model which should be trained + :param vocab_size: total vocab size + :param n_labels: number of labels for the task + """ + super().__init__() + self.bert = bert + # self.linear1 = nn.Linear(self.bert.hidden+feat_size, 128) + self.linear = nn.Linear(self.bert.hidden+feat_size, n_labels) + # self.RELU = nn.ReLU() + # self.linear2 = nn.Linear(128, n_labels) + + def forward(self, x, segment_label, feat): + x = self.bert(x, segment_label) + x = torch.cat((x[:, 0], feat), dim=-1) + # x = self.linear1(x) + # x = self.RELU(x) + # return self.linear2(x) + return self.linear(x) \ No newline at end of file diff --git a/src/data_preprocessor.py b/src/data_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..d0c377a239737956959455e2a1a4f2d1b18f4539 --- /dev/null +++ b/src/data_preprocessor.py @@ -0,0 +1,99 @@ +import time +import pandas as pd + +import sys + +class DataPreprocessor: + def __init__(self, input_file_path): + self.input_file_path = input_file_path + self.unique_students = None + self.unique_problems = None + self.unique_prob_hierarchy = None + self.unique_steps = None + self.unique_kcs = None + + def analyze_dataset(self): + file_iterator = self.load_file_iterator() + + start_time = time.time() + self.unique_students = {"st"} + self.unique_problems = {"pr"} + self.unique_prob_hierarchy = {"ph"} + self.unique_kcs = {"kc"} + for chunk_data in file_iterator: + for student_id, std_groups in chunk_data.groupby('Anon Student Id'): + self.unique_students.update({student_id}) + prob_hierarchy = std_groups.groupby('Level (Workspace Id)') + for hierarchy, hierarchy_groups in prob_hierarchy: + self.unique_prob_hierarchy.update({hierarchy}) + prob_name = hierarchy_groups.groupby('Problem Name') + for problem_name, prob_name_groups in prob_name: + self.unique_problems.update({problem_name}) + sub_skills = prob_name_groups['KC Model(MATHia)'] + for a in sub_skills: + if str(a) != "nan": + temp = a.split("~~") + for kc in temp: + self.unique_kcs.update({kc}) + self.unique_students.remove("st") + self.unique_problems.remove("pr") + self.unique_prob_hierarchy.remove("ph") + self.unique_kcs.remove("kc") + end_time = time.time() + print("Time Taken to analyze dataset = ", end_time - start_time) + print("Length of unique students->", len(self.unique_students)) + print("Length of unique problems->", len(self.unique_problems)) + print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy)) + print("Length of Unique Knowledge components ->", len(self.unique_kcs)) + + def analyze_dataset_by_section(self, workspace_name): + file_iterator = self.load_file_iterator() + + start_time = time.time() + self.unique_students = {"st"} + self.unique_problems = {"pr"} + self.unique_prob_hierarchy = {"ph"} + self.unique_steps = {"s"} + self.unique_kcs = {"kc"} + # with open("workspace_info.txt", 'a') as f: + # sys.stdout = f + for chunk_data in file_iterator: + for student_id, std_groups in chunk_data.groupby('Anon Student Id'): + prob_hierarchy = std_groups.groupby('Level (Workspace Id)') + for hierarchy, hierarchy_groups in prob_hierarchy: + if workspace_name == hierarchy: + # print("Workspace : ", hierarchy) + self.unique_students.update({student_id}) + self.unique_prob_hierarchy.update({hierarchy}) + prob_name = hierarchy_groups.groupby('Problem Name') + for problem_name, prob_name_groups in prob_name: + self.unique_problems.update({problem_name}) + step_names = prob_name_groups['Step Name'] + sub_skills = prob_name_groups['KC Model(MATHia)'] + for step in step_names: + if str(step) != "nan": + self.unique_steps.update({step}) + for a in sub_skills: + if str(a) != "nan": + temp = a.split("~~") + for kc in temp: + self.unique_kcs.update({kc}) + self.unique_problems.remove("pr") + self.unique_prob_hierarchy.remove("ph") + self.unique_steps.remove("s") + self.unique_kcs.remove("kc") + end_time = time.time() + print("Time Taken to analyze dataset = ", end_time - start_time) + print("Workspace-> ",workspace_name) + print("Length of unique students->", len(self.unique_students)) + print("Length of unique problems->", len(self.unique_problems)) + print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy)) + print("Length of unique step names ->", len(self.unique_steps)) + print("Length of unique knowledge components ->", len(self.unique_kcs)) + # f.close() + # sys.stdout = sys.__stdout__ + + def load_file_iterator(self): + chunk_iterator = pd.read_csv(self.input_file_path, sep="\t", header=0, iterator=True, chunksize=1000000) + return chunk_iterator + diff --git a/src/dataset.py b/src/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..408610a506c66c26f769ba1f8d02ada2a80cccee --- /dev/null +++ b/src/dataset.py @@ -0,0 +1,459 @@ +import torch +from torch.utils.data import Dataset +import pandas as pd +import numpy as np +import tqdm +import random +from .vocab import Vocab +import pickle +import copy +# from sklearn.preprocessing import OneHotEncoder + +class PretrainerDataset(Dataset): + """ + Class name: PretrainDataset + + """ + def __init__(self, dataset_path, vocab, seq_len=30, max_mask=0.15): + self.dataset_path = dataset_path + self.vocab = vocab # Vocab object + + # Related to input dataset file + self.lines = [] + self.index_documents = {} + + seq_len_list = [] + with open(self.dataset_path, "r") as reader: + i = 0 + index = 0 + self.index_documents[i] = [] + for line in tqdm.tqdm(reader.readlines()): + if line: + line = line.strip() + if not line: + i+=1 + self.index_documents[i] = [] + else: + self.index_documents[i].append(index) + self.lines.append(line.split("\t")) + len_line = len(line.split("\t")) + seq_len_list.append(len_line) + index+=1 + reader.close() + print("Sequence Stats: len: %s, min: %s, max: %s, average: %s"% (len(seq_len_list), + min(seq_len_list), max(seq_len_list), sum(seq_len_list)/len(seq_len_list))) + print("Unique Sequences: ", len({tuple(ll) for ll in self.lines})) + self.index_documents = {k:v for k,v in self.index_documents.items() if v} + print(len(self.index_documents)) + self.seq_len = seq_len + print("Sequence length set at: ", self.seq_len) + self.max_mask = max_mask + print("% of input tokens selected for masking : ",self.max_mask) + + + def __len__(self): + return len(self.lines) + + def __getitem__(self, item): + token_a = self.lines[item] + # sa_masked = None + # sa_masked_label = None + # token_b = None + # is_same_student = None + # sb_masked = None + # sb_masked_label = None + + # if self.select_next_seq: + # is_same_student, token_b = self.get_token_b(item) + # is_same_student = 1 if is_same_student else 0 + # token_a1, token_b1 = self.truncate_to_max_seq(token_a, token_b) + # sa_masked, sa_masked_label = self.random_mask_seq(token_a1) + # sb_masked, sb_masked_label = self.random_mask_seq(token_b1) + # else: + token_a = token_a[:self.seq_len-2] + sa_masked, sa_masked_label, sa_masked_pos = self.random_mask_seq(token_a) + + s1 = ([self.vocab.vocab['[CLS]']] + sa_masked + [self.vocab.vocab['[SEP]']]) + s1_label = ([self.vocab.vocab['[PAD]']] + sa_masked_label + [self.vocab.vocab['[PAD]']]) + segment_label = [1 for _ in range(len(s1))] + masked_pos = ([0] + sa_masked_pos + [0]) + + # if self.select_next_seq: + # s1 = s1 + sb_masked + [self.vocab.vocab['[SEP]']] + # s1_label = s1_label + sb_masked_label + [self.vocab.vocab['[PAD]']] + # segment_label = segment_label + [2 for _ in range(len(sb_masked)+1)] + + padding = [self.vocab.vocab['[PAD]'] for _ in range(self.seq_len - len(s1))] + s1.extend(padding) + s1_label.extend(padding) + segment_label.extend(padding) + masked_pos.extend(padding) + + output = {'bert_input': s1, + 'bert_label': s1_label, + 'segment_label': segment_label, + 'masked_pos': masked_pos} + # print(f"tokenA: {token_a}") + # print(f"output: {output}") + + # if self.select_next_seq: + # output['is_same_student'] = is_same_student + + # print(item, len(s1), len(s1_label), len(segment_label)) + # print(f"{item}.") + return {key: torch.tensor(value) for key, value in output.items()} + + def random_mask_seq(self, tokens): + """ + Input: original token seq + Output: masked token seq, output label + """ + + masked_pos = [] + output_labels = [] + output_tokens = copy.deepcopy(tokens) + opt_step = False + for i, token in enumerate(tokens): + if token in ['OptionalTask_1', 'EquationAnswer', 'NumeratorFactor', 'DenominatorFactor', 'OptionalTask_2', 'FirstRow1:1', 'FirstRow1:2', 'FirstRow2:1', 'FirstRow2:2', 'SecondRow', 'ThirdRow']: + opt_step = True + # if opt_step: + # prob = random.random() + # if prob < self.max_mask: + # output_tokens[i] = random.choice([3,7,8,9,11,12,13,14,15,16,22,23,24,25,26,27,30,31,32]) + # masked_pos.append(1) + # else: + # output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']) + # masked_pos.append(0) + # output_labels.append(self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])) + # opt_step = False + # else: + prob = random.random() + if prob < self.max_mask: + # chooses 15% of token positions at random + # prob /= 0.15 + prob = random.random() + if prob < 0.8: #[MASK] token 80% of the time + output_tokens[i] = self.vocab.vocab['[MASK]'] + masked_pos.append(1) + elif prob < 0.9: # a random token 10% of the time + # print(".......0.8-0.9......") + if opt_step: + output_tokens[i] = random.choice([7,8,9,11,12,13,14,15,16,22,23,24,25,26,27,30,31,32]) + opt_step = False + else: + output_tokens[i] = random.randint(1, len(self.vocab.vocab)-1) + masked_pos.append(1) + else: # the unchanged i-th token 10% of the time + # print(".......unchanged......") + output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']) + masked_pos.append(0) + # True Label + output_labels.append(self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])) + # masked_pos_label[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']) + else: + # i-th token with original value + output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']) + # Padded label + output_labels.append(self.vocab.vocab['[PAD]']) + masked_pos.append(0) + # label_position = [] + # label_tokens = [] + # for k, v in masked_pos_label.items(): + # label_position.append(k) + # label_tokens.append(v) + return output_tokens, output_labels, masked_pos + +# def get_token_b(self, item): +# document_id = [k for k,v in self.index_documents.items() if item in v][0] +# random_document_id = document_id + +# if random.random() < 0.5: +# document_ids = [k for k in self.index_documents.keys() if k != document_id] +# random_document_id = random.choice(document_ids) + +# same_student = (random_document_id == document_id) + +# nex_seq_list = self.index_documents.get(random_document_id) + +# if same_student: +# if len(nex_seq_list) != 1: +# nex_seq_list = [v for v in nex_seq_list if v !=item] + +# next_seq = random.choice(nex_seq_list) +# tokens = self.lines[next_seq] +# # print(f"item = {item}, tokens: {tokens}") +# # print(f"item={item}, next={next_seq}, same_student = {same_student}, {document_id} == {random_document_id}, b. {tokens}") +# return same_student, tokens + +# def truncate_to_max_seq(self, s1, s2): +# sa = copy.deepcopy(s1) +# sb = copy.deepcopy(s1) +# total_allowed_seq = self.seq_len - 3 + +# while((len(sa)+len(sb)) > total_allowed_seq): +# if random.random() < 0.5: +# sa.pop() +# else: +# sb.pop() +# return sa, sb + + +class TokenizerDataset(Dataset): + """ + Class name: TokenizerDataset + Tokenize the data in the dataset + + """ + def __init__(self, dataset_path, label_path, vocab, seq_len=30): + self.dataset_path = dataset_path + self.label_path = label_path + self.vocab = vocab # Vocab object + # self.encoder = OneHotEncoder(sparse=False) + + # Related to input dataset file + self.lines = [] + self.labels = [] + self.feats = [] + if self.label_path: + self.label_file = open(self.label_path, "r") + for line in self.label_file: + if line: + line = line.strip() + if not line: + continue + self.labels.append(int(line)) + self.label_file.close() + + # Comment this section if you are not using feat attribute + try: + j = 0 + dataset_info_file = open(self.label_path.replace("label", "info"), "r") + for line in dataset_info_file: + if line: + line = line.strip() + if not line: + continue + + # # highGRschool_w_prior + # feat_vec = [float(i) for i in line.split(",")[-3].split("\t")] + + # highGRschool_w_prior_w_diffskill_wo_fa + feat_vec = [float(i) for i in line.split(",")[-3].split("\t")] + feat2 = [float(i) for i in line.split(",")[-2].split("\t")] + feat_vec.extend(feat2[1:]) + + # # highGRschool_w_prior_w_p_diffskill_wo_fa + # feat_vec = [float(i) for i in line.split(",")[-3].split("\t")] + # feat2 = [-float(i) for i in line.split(",")[-2].split("\t")] + # feat_vec.extend(feat2[1:]) + +# # highGRschool_w_prior_w_diffskill_0fa_skill +# feat_vec = [float(i) for i in line.split(",")[-3].split("\t")] +# feat2 = [float(i) for i in line.split(",")[-2].split("\t")] +# fa_feat_vec = [float(i) for i in line.split(",")[-1].split("\t")] + +# diff_skill = [f2 if f1==0 else 0 for f2, f1 in zip(feat2, fa_feat_vec)] +# feat_vec.extend(diff_skill) + + if j == 0: + print(len(feat_vec)) + j+=1 + + # feat_vec.extend(feat2[1:]) + # feat_vec.extend(feat2) + # feat_vec = [float(i) for i in line.split(",")[-2].split("\t")] + # feat_vec = feat_vec[1:] + # feat_vec = [float(line.split(",")[-1])] + # feat_vec = [float(i) for i in line.split(",")[-1].split("\t")] + # feat_vec = [ft-f1 for ft, f1 in zip(feat_vec, fa_feat_vec)] + + self.feats.append(feat_vec) + dataset_info_file.close() + except Exception as e: + print(e) + # labeler = np.array([0, 1]) #np.unique(self.labels) + # print(f"Labeler {labeler}") + # self.encoder.fit(labeler.reshape(-1,1)) + # self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1)) + + self.file = open(self.dataset_path, "r") + for line in self.file: + if line: + line = line.strip() + if line: + self.lines.append(line) + self.file.close() + + self.len = len(self.lines) + self.seq_len = seq_len + print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels) if self.label_path else 0) + + def __len__(self): + return self.len + + def __getitem__(self, item): + org_line = self.lines[item].split("\t") + dup_line = [] + opt = False + for l in org_line: + if l in ["OptionalTask_1", "EquationAnswer", "NumeratorFactor", "DenominatorFactor", "OptionalTask_2", "FirstRow1:1", "FirstRow1:2", "FirstRow2:1", "FirstRow2:2", "SecondRow", "ThirdRow"]: + opt = True + if opt and 'FinalAnswer-' in l: + dup_line.append('[UNK]') + else: + dup_line.append(l) + dup_line = "\t".join(dup_line) + # print(dup_line) + s1 = self.vocab.to_seq(dup_line, self.seq_len) # This is like tokenizer and adds [CLS] and [SEP]. + s1_label = self.labels[item] if self.label_path else 0 + segment_label = [1 for _ in range(len(s1))] + s1_feat = self.feats[item] if len(self.feats)>0 else 0 + padding = [self.vocab.vocab['[PAD]'] for _ in range(self.seq_len - len(s1))] + s1.extend(padding), segment_label.extend(padding) + + output = {'input': s1, + 'label': s1_label, + 'feat': s1_feat, + 'segment_label': segment_label} + return {key: torch.tensor(value) for key, value in output.items()} + + +class TokenizerDatasetForCalibration(Dataset): + """ + Class name: TokenizerDataset + Tokenize the data in the dataset + + """ + def __init__(self, dataset_path, label_path, vocab, seq_len=30): + self.dataset_path = dataset_path + self.label_path = label_path + self.vocab = vocab # Vocab object + # self.encoder = OneHotEncoder(sparse=False) + + # Related to input dataset file + self.lines = [] + self.labels = [] + self.feats = [] + if self.label_path: + self.label_file = open(self.label_path, "r") + for line in self.label_file: + if line: + line = line.strip() + if not line: + continue + self.labels.append(int(line)) + self.label_file.close() + + # Comment this section if you are not using feat attribute + try: + j = 0 + dataset_info_file = open(self.label_path.replace("label", "info"), "r") + for line in dataset_info_file: + if line: + line = line.strip() + if not line: + continue + + # # highGRschool_w_prior + # feat_vec = [float(i) for i in line.split(",")[-3].split("\t")] + + # highGRschool_w_prior_w_diffskill_wo_fa + feat_vec = [float(i) for i in line.split(",")[-3].split("\t")] + feat2 = [float(i) for i in line.split(",")[-2].split("\t")] + feat_vec.extend(feat2[1:]) + +# # highGRschool_w_prior_w_diffskill_0fa_skill +# feat_vec = [float(i) for i in line.split(",")[-3].split("\t")] +# feat2 = [float(i) for i in line.split(",")[-2].split("\t")] +# fa_feat_vec = [float(i) for i in line.split(",")[-1].split("\t")] + +# diff_skill = [f2 if f1==0 else 0 for f2, f1 in zip(feat2, fa_feat_vec)] +# feat_vec.extend(diff_skill) + + if j == 0: + print(len(feat_vec)) + j+=1 + + # feat_vec.extend(feat2[1:]) + # feat_vec.extend(feat2) + # feat_vec = [float(i) for i in line.split(",")[-2].split("\t")] + # feat_vec = feat_vec[1:] + # feat_vec = [float(line.split(",")[-1])] + # feat_vec = [float(i) for i in line.split(",")[-1].split("\t")] + # feat_vec = [ft-f1 for ft, f1 in zip(feat_vec, fa_feat_vec)] + + self.feats.append(feat_vec) + dataset_info_file.close() + except Exception as e: + print(e) + # labeler = np.array([0, 1]) #np.unique(self.labels) + # print(f"Labeler {labeler}") + # self.encoder.fit(labeler.reshape(-1,1)) + # self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1)) + + self.file = open(self.dataset_path, "r") + for line in self.file: + if line: + line = line.strip() + if line: + self.lines.append(line) + self.file.close() + + self.len = len(self.lines) + self.seq_len = seq_len + print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels) if self.label_path else 0) + + def __len__(self): + return self.len + + def __getitem__(self, item): + org_line = self.lines[item].split("\t") + dup_line = [] + opt = False + for l in org_line: + if l in ["OptionalTask_1", "EquationAnswer", "NumeratorFactor", "DenominatorFactor", "OptionalTask_2", "FirstRow1:1", "FirstRow1:2", "FirstRow2:1", "FirstRow2:2", "SecondRow", "ThirdRow"]: + opt = True + if opt and 'FinalAnswer-' in l: + dup_line.append('[UNK]') + else: + dup_line.append(l) + dup_line = "\t".join(dup_line) + # print(dup_line) + s1 = self.vocab.to_seq(dup_line, self.seq_len) # This is like tokenizer and adds [CLS] and [SEP]. + s1_label = self.labels[item] if self.label_path else 0 + segment_label = [1 for _ in range(len(s1))] + s1_feat = self.feats[item] if len(self.feats)>0 else 0 + padding = [self.vocab.vocab['[PAD]'] for _ in range(self.seq_len - len(s1))] + s1.extend(padding), segment_label.extend(padding) + + output = {'input': s1, + 'label': s1_label, + 'feat': s1_feat, + 'segment_label': segment_label} + return ({key: torch.tensor(value) for key, value in output.items()}, s1_label) + + + + # if __name__ == "__main__": +# # import pickle +# # k = pickle.load(open("dataset/CL4999_1920/unique_steps_list.pkl","rb")) +# # print(k) +# vocab_obj = Vocab("pretraining/vocab.txt") +# vocab_obj.load_vocab() +# datasetTrain = PretrainerDataset("pretraining/pretrain.txt", vocab_obj) + +# print(datasetTrain, len(datasetTrain))#, datasetTrain.documents_index) +# print(datasetTrain[len(datasetTrain)-1]) +# for i, d in enumerate(datasetTrain): +# print(d.items()) +# break + +# fine_tune = TokenizerDataset("finetuning/finetune.txt", "finetuning/finetune_label.txt", vocab_obj) +# print(fine_tune) +# print(fine_tune[len(fine_tune)-1]) +# print(fine_tune[random.randint(0, len(fine_tune))]) +# for i, d in enumerate(fine_tune): +# print(d.items()) +# break + + \ No newline at end of file diff --git a/src/embedding.py b/src/embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..c49d39c7cf66b4d8b97942368ffd99a69bc0c43c --- /dev/null +++ b/src/embedding.py @@ -0,0 +1,61 @@ +import torch.nn as nn +import torch +import math + +class TokenEmbedding(nn.Embedding): + def __init__(self, vocab_size, embed_size=512): + super().__init__(vocab_size, embed_size, padding_idx=0) # look at vocab_file + + +class SegmentEmbedding(nn.Embedding): + def __init__(self, embed_size=512): + super().__init__(3, embed_size, padding_idx=0) + + +class PositionalEmbedding(nn.Module): + + def __init__(self, d_model, max_len=512): + super().__init__() + + # Compute the positional encodings once in log space. + pe = torch.zeros(max_len, d_model).float() + pe.require_grad = False + + position = torch.arange(0, max_len).float().unsqueeze(1) + div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() + + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + + pe = pe.unsqueeze(0) + self.register_buffer('pe', pe) + + def forward(self, x): + return self.pe[:, :x.size(1)] + +class BERTEmbedding(nn.Module): + """ + BERT Embedding which consisted of following features + 1. TokenEmbedding : normal embedding matrix + 2. PositionalEmbedding : adding positional information using sin, cos + 2. SegmentEmbedding : adding sentence segment info, (sent_A:1, sent_B:2) + + sum of all these features are output of BERTEmbedding + """ + + def __init__(self, vocab_size, embed_size, dropout=0.1): + """ + :param vocab_size: total vocab size + :param embed_size: embedding size of token embedding + :param dropout: dropout rate + """ + super().__init__() + self.token = TokenEmbedding(vocab_size=vocab_size, embed_size=embed_size) + self.position = PositionalEmbedding(d_model=self.token.embedding_dim) + self.segment = SegmentEmbedding(embed_size=self.token.embedding_dim) + self.dropout = nn.Dropout(p=dropout) + self.embed_size = embed_size + + def forward(self, sequence, segment_label): + x = self.token(sequence) + self.position(sequence) + self.segment(segment_label) + return self.dropout(x) \ No newline at end of file diff --git a/src/environment.yml b/src/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..c3e6f70f4aff886ea88cf055b9a4ff68bc69d59d --- /dev/null +++ b/src/environment.yml @@ -0,0 +1,432 @@ +name: tf_gpu_env +channels: + - pytorch + - pyg + - ankurankan + - gurobi + - dglteam + - nvidia/label/cuda-11.3.1 + - anaconda + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_kmp_llvm + - abseil-cpp=20220623.0=h8cdb687_6 + - aiohttp=3.8.5=py38h01eb140_0 + - aiosignal=1.3.1=pyhd8ed1ab_0 + - alsa-lib=1.2.8=h166bdaf_0 + - anyio=3.5.0=py38h06a4308_0 + - appdirs=1.4.4=pyhd3eb1b0_0 + - argon2-cffi=21.3.0=pyhd3eb1b0_0 + - argon2-cffi-bindings=21.2.0=py38h7f8727e_0 + - arrow-cpp=11.0.0=ha770c72_5_cpu + - asgiref=3.5.2=py38h06a4308_0 + - asttokens=2.0.5=pyhd3eb1b0_0 + - async-timeout=4.0.3=pyhd8ed1ab_0 + - attr=2.5.1=h166bdaf_1 + - attrs=22.1.0=py38h06a4308_0 + - aws-c-auth=0.6.24=h84a1944_5 + - aws-c-cal=0.5.20=hc60faf5_6 + - aws-c-common=0.8.11=h0b41bf4_0 + - aws-c-compression=0.2.16=h034cb4b_3 + - aws-c-event-stream=0.2.18=h75388cd_6 + - aws-c-http=0.7.4=hf084cc8_2 + - aws-c-io=0.13.17=h10df833_2 + - aws-c-mqtt=0.8.6=hc41645a_6 + - aws-c-s3=0.2.4=h1b8f470_3 + - aws-c-sdkutils=0.1.7=h034cb4b_3 + - aws-checksums=0.1.14=h034cb4b_3 + - aws-crt-cpp=0.19.7=h0073717_7 + - aws-sdk-cpp=1.10.57=h4707e7a_4 + - backcall=0.2.0=pyhd3eb1b0_0 + - beautifulsoup4=4.12.2=py38h06a4308_0 + - blas=1.0=mkl + - bleach=4.1.0=pyhd3eb1b0_0 + - boltons=23.0.0=pyhd8ed1ab_0 + - boost-cpp=1.81.0=he95ae9e_0 + - bottleneck=1.3.5=py38h7deecbd_0 + - brotli=1.0.9=h5eee18b_7 + - brotli-bin=1.0.9=h5eee18b_7 + - brotlipy=0.7.0=py38h27cfd23_1003 + - bzip2=1.0.8=h7b6447c_0 + - c-ares=1.19.1=hd590300_0 + - ca-certificates=2023.11.17=hbcca054_0 + - cached-property=1.5.2=hd8ed1ab_1 + - cached_property=1.5.2=pyha770c72_1 + - certifi=2023.11.17=pyhd8ed1ab_0 + - cffi=1.15.1=py38h74dc2b5_0 + - charset-normalizer=2.0.4=pyhd3eb1b0_0 + - click=8.1.7=py38h06a4308_0 + - comm=0.1.2=py38h06a4308_0 + - conda=23.7.4=py38h578d9bd_0 + - conda-package-handling=2.2.0=pyh38be061_0 + - conda-package-streaming=0.9.0=pyhd8ed1ab_0 + - contourpy=1.0.5=py38hdb19cb5_0 + - cryptography=39.0.1=py38h9ce1e76_0 + - cuda-command-line-tools=11.3.1=h712c49d_0 + - cuda-compiler=11.3.1=h712c49d_0 + - cuda-cudart=11.3.109=hfb95d0c_0 + - cuda-cuobjdump=11.3.122=hbf6ec6b_0 + - cuda-cupti=11.3.111=h12ad217_0 + - cuda-cuxxfilt=11.3.122=h4dc11a3_0 + - cuda-gdb=11.3.109=h33b7820_0 + - cuda-libraries=11.3.1=h712c49d_0 + - cuda-libraries-dev=11.3.1=h712c49d_0 + - cuda-memcheck=11.3.109=hf5cb439_0 + - cuda-nvcc=11.3.122=h4814707_0 + - cuda-nvdisasm=11.3.122=ha26faa6_0 + - cuda-nvml-dev=11.3.58=hc25e488_0 + - cuda-nvprof=11.3.111=h95a27d4_0 + - cuda-nvprune=11.3.122=hb3346b8_0 + - cuda-nvrtc=11.3.122=h1aa17d8_0 + - cuda-nvtx=11.3.109=h4ec7630_0 + - cuda-nvvp=11.3.111=h4c4416a_0 + - cuda-samples=11.3.58=h6d5b628_0 + - cuda-sanitizer-api=11.3.111=h2446cfc_0 + - cuda-thrust=11.3.109=he8b717c_0 + - cuda-toolkit=11.3.1=h712c49d_0 + - cuda-tools=11.3.1=h712c49d_0 + - cuda-version=11.3=hbc958af_2 + - cuda-visual-tools=11.3.1=h712c49d_0 + - cudatoolkit=11.3.1=h2bc3f7f_2 + - cudnn=8.8.0.121=h838ba91_4 + - cycler=0.11.0=pyhd3eb1b0_0 + - cyrus-sasl=2.1.27=h9033bb2_6 + - cython=0.29.33=py38h6a678d5_0 + - dataclasses=0.8=pyhc8e2a94_3 + - datasets=2.14.5=pyhd8ed1ab_0 + - dbus=1.13.18=hb2f20db_0 + - debugpy=1.5.1=py38h295c915_0 + - decorator=5.1.1=pyhd3eb1b0_0 + - defusedxml=0.7.1=pyhd3eb1b0_0 + - dgl-cuda11.3=0.9.1=py38_0 + - dill=0.3.7=py38h06a4308_0 + - django=3.1.2=py_0 + - entrypoints=0.4=py38h06a4308_0 + - executing=0.8.3=pyhd3eb1b0_0 + - expat=2.5.0=hcb278e6_1 + - ffmpeg=4.2.2=h20bf706_0 + - fftw=3.3.10=nompi_hf0379b8_106 + - filelock=3.9.0=py38h06a4308_0 + - flatbuffers=22.12.06=hcb278e6_2 + - fmt=10.1.1=h00ab1b0_0 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=h77eed37_1 + - fontconfig=2.14.1=h4c34cd2_2 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - fonttools=4.25.0=pyhd3eb1b0_0 + - freetype=2.12.1=h4a9f257_0 + - frozenlist=1.4.0=py38h01eb140_0 + - fsspec=2023.6.0=pyh1a96a4e_0 + - future=0.18.3=pyhd8ed1ab_0 + - gensim=3.8.3=py38h2531618_2 + - gettext=0.21.1=h27087fc_0 + - gflags=2.2.2=he1b5a44_1004 + - giflib=5.2.1=h5eee18b_3 + - glib=2.78.1=hfc55251_1 + - glib-tools=2.78.1=hfc55251_1 + - glog=0.6.0=h6f12383_0 + - gmp=6.2.1=h295c915_3 + - gmpy2=2.1.2=py38heeb90bb_0 + - gnutls=3.6.15=he1e5248_0 + - grakel=0.1.8=py38h2b96118_3 + - grpc-cpp=1.51.1=h27aab58_1 + - gst-plugins-base=1.21.3=h4243ec0_1 + - gstreamer=1.21.3=h25f0c4b_1 + - gstreamer-orc=0.4.34=hd590300_0 + - gurobi=10.0.0=py38_0 + - huggingface_hub=0.17.0=pyhd8ed1ab_0 + - icu=70.1=h27087fc_0 + - idna=3.4=py38h06a4308_0 + - importlib-metadata=6.0.0=py38h06a4308_0 + - importlib_metadata=6.0.0=hd3eb1b0_0 + - importlib_resources=5.2.0=pyhd3eb1b0_1 + - intel-openmp=2021.4.0=h06a4308_3561 + - ipykernel=6.19.2=py38hb070fc8_0 + - ipython=8.12.0=py38h06a4308_0 + - ipython_genutils=0.2.0=pyhd3eb1b0_1 + - ipywidgets=8.0.4=py38h06a4308_0 + - jack=1.9.22=h11f4161_0 + - jedi=0.18.1=py38h06a4308_1 + - jinja2=3.1.2=py38h06a4308_0 + - joblib=1.1.1=py38h06a4308_0 + - jpeg=9e=h5eee18b_1 + - jsonpatch=1.33=pyhd8ed1ab_0 + - jsonpointer=2.4=py38h578d9bd_3 + - jsonschema=4.17.3=py38h06a4308_0 + - jupyter=1.0.0=py38_7 + - jupyter_client=7.4.9=py38h06a4308_0 + - jupyter_console=6.6.3=py38h06a4308_0 + - jupyter_core=5.3.0=py38h06a4308_0 + - jupyter_server=1.23.4=py38h06a4308_0 + - jupyterlab_pygments=0.1.2=py_0 + - jupyterlab_widgets=3.0.5=py38h06a4308_0 + - keyutils=1.6.1=h166bdaf_0 + - kiwisolver=1.4.4=py38h6a678d5_0 + - krb5=1.20.1=h81ceb04_0 + - lame=3.100=h7b6447c_0 + - lcms2=2.12=h3be6417_0 + - ld_impl_linux-64=2.38=h1181459_1 + - lerc=3.0=h295c915_0 + - libabseil=20220623.0=cxx17_h05df665_6 + - libaec=1.1.2=h59595ed_1 + - libarchive=3.6.2=h3d51595_0 + - libarrow=11.0.0=h2ebd325_5_cpu + - libblas=3.9.0=12_linux64_mkl + - libbrotlicommon=1.0.9=h5eee18b_7 + - libbrotlidec=1.0.9=h5eee18b_7 + - libbrotlienc=1.0.9=h5eee18b_7 + - libcap=2.67=he9d0100_0 + - libcblas=3.9.0=12_linux64_mkl + - libclang=15.0.7=default_h7634d5b_3 + - libclang13=15.0.7=default_h9986a30_3 + - libcrc32c=1.1.2=h9c3ff4c_0 + - libcublas=11.5.1.109=h0fd73e7_0 + - libcufft=10.4.2.109=h2344711_0 + - libcups=2.3.3=h36d4200_3 + - libcurand=10.2.4.109=h0189693_0 + - libcurl=8.2.1=h251f7ec_0 + - libcusolver=11.1.2.109=h1e009e5_0 + - libcusparse=11.6.0.109=hf5bfba9_0 + - libdb=6.2.32=h9c3ff4c_0 + - libdeflate=1.17=h5eee18b_0 + - libedit=3.1.20221030=h5eee18b_0 + - libev=4.33=h516909a_1 + - libevent=2.1.10=h28343ad_4 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.2=h7f98852_5 + - libflac=1.4.3=h59595ed_0 + - libgcc-ng=12.2.0=h65d4601_19 + - libgcrypt=1.10.2=hd590300_0 + - libgfortran-ng=11.2.0=h00389a5_1 + - libgfortran5=11.2.0=h1234567_1 + - libglib=2.78.1=h783c2da_1 + - libgoogle-cloud=2.7.0=h21dfe5b_1 + - libgpg-error=1.47=h71f35ed_0 + - libgrpc=1.51.1=h4fad500_1 + - libiconv=1.17=h166bdaf_0 + - libidn2=2.3.2=h7f8727e_0 + - liblapack=3.9.0=12_linux64_mkl + - libllvm14=14.0.6=hcd5def8_4 + - libllvm15=15.0.7=hadd5161_1 + - libmamba=1.5.1=h744094f_0 + - libmambapy=1.5.1=py38h5cd715c_0 + - libnghttp2=1.52.0=h61bc06f_0 + - libnpp=11.3.3.95=h122bb27_0 + - libnsl=2.0.1=hd590300_0 + - libntlm=1.4=h7f98852_1002 + - libnvjpeg=11.5.0.109=h159916b_0 + - libogg=1.3.4=h7f98852_1 + - libopus=1.3.1=h7b6447c_0 + - libpng=1.6.39=h5eee18b_0 + - libpq=15.3=hbcd7760_1 + - libprotobuf=3.21.12=hfc55251_2 + - libsndfile=1.2.2=hc60ed4a_1 + - libsodium=1.0.18=h7b6447c_0 + - libsolv=0.7.27=hfc55251_0 + - libsqlite=3.44.2=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-ng=13.2.0=h7e041cc_3 + - libsystemd0=253=h8c4010b_1 + - libtasn1=4.19.0=h5eee18b_0 + - libthrift=0.18.0=h5e4af38_0 + - libtiff=4.5.0=h6a678d5_2 + - libtool=2.4.7=h27087fc_0 + - libudev1=253=h0b41bf4_1 + - libunistring=0.9.10=h27cfd23_0 + - libutf8proc=2.8.0=h166bdaf_0 + - libuuid=1.41.5=h5eee18b_0 + - libvorbis=1.3.7=h9c3ff4c_0 + - libvpx=1.7.0=h439df22_0 + - libwebp=1.2.4=h11a3e52_1 + - libwebp-base=1.2.4=h5eee18b_1 + - libxcb=1.13=h7f98852_1004 + - libxkbcommon=1.5.0=h79f4944_1 + - libxml2=2.10.3=hca2bb57_4 + - libxslt=1.1.37=h873f0b0_0 + - libzlib=1.2.13=hd590300_5 + - llvm-openmp=14.0.6=h9e868ea_0 + - lxml=4.9.2=py38h5eee18b_0 + - lz4-c=1.9.4=h6a678d5_0 + - lzo=2.10=h516909a_1000 + - mamba=1.5.1=py38haad2881_0 + - markupsafe=2.1.1=py38h7f8727e_0 + - matplotlib-base=3.7.1=py38h417a72b_1 + - matplotlib-inline=0.1.6=py38h06a4308_0 + - mistune=0.8.4=py38h7b6447c_1000 + - mkl=2021.4.0=h06a4308_640 + - mkl-service=2.4.0=py38h7f8727e_0 + - mkl_fft=1.3.1=py38hd3c417c_0 + - mkl_random=1.2.2=py38h51133e4_0 + - ml-insights=1.0.2=pyha21a80b_0 + - mpc=1.1.0=h10f8cd9_1 + - mpfr=4.0.2=hb69a4c5_1 + - mpg123=1.32.3=h59595ed_0 + - mpmath=1.2.1=py38h06a4308_0 + - multidict=6.0.4=py38h1de0b5d_0 + - multiprocess=0.70.15=py38h06a4308_0 + - munkres=1.1.4=py_0 + - mysql=5.7.20=hf484d3e_1001 + - mysql-common=8.0.33=hf1915f5_6 + - mysql-libs=8.0.33=hca2cd23_6 + - nauty=2.7.1=h7f98852_2 + - nbclassic=0.5.5=py38h06a4308_0 + - nbclient=0.5.13=py38h06a4308_0 + - nbconvert=6.5.4=py38h06a4308_0 + - nbformat=5.7.0=py38h06a4308_0 + - nccl=2.19.4.1=h0800d71_0 + - ncurses=6.4=h6a678d5_0 + - nest-asyncio=1.5.6=py38h06a4308_0 + - nettle=3.7.3=hbbd107a_1 + - networkx=3.1=py38h06a4308_0 + - nose=1.3.7=py_1006 + - notebook=6.5.4=py38h06a4308_0 + - notebook-shim=0.2.2=py38h06a4308_0 + - nspr=4.35=h27087fc_0 + - nss=3.94=h1d7d5a4_0 + - numexpr=2.8.4=py38he184ba9_0 + - openh264=2.1.1=h4ff587b_0 + - openssl=3.2.0=hd590300_0 + - opt_einsum=3.3.0=pyhd8ed1ab_1 + - orc=1.8.2=hfdbbad2_2 + - packaging=23.0=py38h06a4308_0 + - pandas=1.3.4=py38h8c16a72_0 + - pandocfilters=1.5.0=pyhd3eb1b0_0 + - parso=0.8.3=pyhd3eb1b0_0 + - patsy=0.5.3=py38h06a4308_0 + - pcre=8.45=h295c915_0 + - pcre2=10.42=hcad00b1_0 + - pexpect=4.8.0=pyhd3eb1b0_3 + - pgmpy=0.1.23=py38_0 + - pickleshare=0.7.5=pyhd3eb1b0_1003 + - pillow=9.4.0=py38h6a678d5_0 + - pip=22.3.1=py38h06a4308_0 + - pkgutil-resolve-name=1.3.10=py38h06a4308_0 + - platformdirs=2.5.2=py38h06a4308_0 + - pluggy=1.3.0=pyhd8ed1ab_0 + - ply=3.11=py38_0 + - pooch=1.4.0=pyhd3eb1b0_0 + - prometheus_client=0.14.1=py38h06a4308_0 + - prompt-toolkit=3.0.36=py38h06a4308_0 + - prompt_toolkit=3.0.36=hd3eb1b0_0 + - psutil=5.9.0=py38h5eee18b_0 + - psycopg2=2.9.6=py38ha5fcc81_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - ptyprocess=0.7.0=pyhd3eb1b0_2 + - pulseaudio=16.1=hcb278e6_3 + - pulseaudio-client=16.1=h5195f5e_3 + - pulseaudio-daemon=16.1=ha8d29e2_3 + - pure_eval=0.2.2=pyhd3eb1b0_0 + - pyarrow=11.0.0=py38h468efa6_1 + - pybind11-abi=4=hd8ed1ab_3 + - pycosat=0.6.6=py38h01eb140_0 + - pycparser=2.21=pyhd3eb1b0_0 + - pyg=2.3.0=py38_torch_1.12.0_cu113 + - pygments=2.15.1=py38h06a4308_1 + - pynauty=2.8.6=py38h1de0b5d_0 + - pyopenssl=23.0.0=py38h06a4308_0 + - pyparsing=3.0.9=py38h06a4308_0 + - pyqt=5.15.7=py38ha0d8c90_3 + - pyqt5-sip=12.11.0=py38h8dc9893_3 + - pyro-api=0.1.2=pyhd8ed1ab_0 + - pyro-ppl=1.8.4=pyhd8ed1ab_0 + - pyrsistent=0.18.0=py38heee7806_0 + - pysocks=1.7.1=py38h06a4308_0 + - python=3.8.12=h0744224_3_cpython + - python-dateutil=2.8.2=pyhd3eb1b0_0 + - python-fastjsonschema=2.16.2=py38h06a4308_0 + - python-xxhash=3.3.0=py38h01eb140_0 + - python_abi=3.8=2_cp38 + - pytorch=1.12.1=py3.8_cuda11.3_cudnn8.3.2_0 + - pytorch-mutex=1.0=cuda + - pytz=2022.7=py38h06a4308_0 + - pyyaml=6.0.1=py38h5eee18b_0 + - pyzmq=23.2.0=py38h6a678d5_0 + - qt-main=5.15.6=hf6cd601_5 + - qt-webengine=5.15.4=h325cec9_3 + - qtconsole=5.4.2=py38h06a4308_0 + - qtpy=2.2.0=py38h06a4308_0 + - qtwebkit=5.212=h3e5094c_7 + - re2=2023.02.01=hcb278e6_0 + - readline=8.2=h5eee18b_0 + - regex=2023.10.3=py38h5eee18b_0 + - reproc=14.2.4.post0=hd590300_1 + - reproc-cpp=14.2.4.post0=h59595ed_1 + - requests=2.28.1=py38h06a4308_1 + - ruamel.yaml=0.17.40=py38h01eb140_0 + - ruamel.yaml.clib=0.2.7=py38h01eb140_2 + - s2n=1.3.37=h3358134_0 + - sacremoses=0.0.53=pyhd8ed1ab_0 + - safetensors=0.3.3=py38h0cc4f7c_0 + - scikit-learn=1.3.2=py38ha25d942_1 + - send2trash=1.8.0=pyhd3eb1b0_1 + - setuptools=68.0.0=py38h06a4308_0 + - sip=6.7.12=py38h17151c0_0 + - six=1.16.0=pyhd3eb1b0_1 + - smart_open=5.2.1=py38h06a4308_0 + - snappy=1.1.9=h295c915_0 + - sniffio=1.2.0=py38h06a4308_1 + - soupsieve=2.4=py38h06a4308_0 + - splinecalib=0.0.4=py38h26c90d9_0 + - sqlite=3.41.2=h5eee18b_0 + - sqlparse=0.4.3=py38h06a4308_0 + - stack_data=0.2.0=pyhd3eb1b0_0 + - statsmodels=0.14.0=py38ha9d4c09_0 + - sympy=1.10.1=py38h06a4308_0 + - terminado=0.17.1=py38h06a4308_0 + - threadpoolctl=2.2.0=pyh0d69192_0 + - tinycss2=1.2.1=py38h06a4308_0 + - tk=8.6.12=h1ccaba5_0 + - tokenizers=0.15.0=py38hf65db12_0 + - toml=0.10.2=pyhd3eb1b0_0 + - tomli=2.0.1=pyhd8ed1ab_0 + - toolz=0.12.0=pyhd8ed1ab_0 + - torchaudio=0.12.1=py38_cu113 + - torchvision=0.13.1=py38_cu113 + - tornado=6.2=py38h5eee18b_0 + - tqdm=4.65.0=py38hb070fc8_0 + - traitlets=5.7.1=py38h06a4308_0 + - transformers=4.17.0=pyhd8ed1ab_0 + - typing-extensions=4.5.0=py38h06a4308_0 + - typing_extensions=4.5.0=py38h06a4308_0 + - urllib3=1.26.15=py38h06a4308_0 + - utf8proc=2.6.1=h27cfd23_0 + - wcwidth=0.2.5=pyhd3eb1b0_0 + - webencodings=0.5.1=py38_1 + - websocket-client=0.58.0=py38h06a4308_4 + - wheel=0.38.4=py38h06a4308_0 + - widgetsnbextension=4.0.5=py38h06a4308_0 + - x264=1!157.20191217=h7b6447c_0 + - xcb-util=0.4.0=h516909a_0 + - xcb-util-image=0.4.0=h166bdaf_0 + - xcb-util-keysyms=0.4.0=h516909a_0 + - xcb-util-renderutil=0.3.9=h166bdaf_0 + - xcb-util-wm=0.4.1=h516909a_0 + - xkeyboard-config=2.38=h0b41bf4_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xxhash=0.8.2=hd590300_0 + - xz=5.2.10=h5eee18b_1 + - yaml=0.2.5=h7f98852_2 + - yaml-cpp=0.7.0=h59595ed_3 + - yarl=1.9.2=py38h01eb140_0 + - zeromq=4.3.4=h2531618_0 + - zipp=3.11.0=py38h06a4308_0 + - zlib=1.2.13=hd590300_5 + - zstandard=0.22.0=py38ha98ab4e_0 + - zstd=1.5.5=hc292b87_0 + - pip: + - google-auth-oauthlib==1.0.0 + - grpcio==1.59.3 + - keras==2.15.0 + - numpy==1.23.5 + - protobuf==4.25.1 + - pyasn1==0.5.1 + - scipy==1.4.1 + - tensorboard==2.14.0 + - tensorboard-data-server==0.7.2 +prefix: /opt/conda/envs/tf_gpu_env diff --git a/src/evaluate_embeddings.py b/src/evaluate_embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..67c2798de38fa7ace9ac04c06c4aaa3e038f19f4 --- /dev/null +++ b/src/evaluate_embeddings.py @@ -0,0 +1,136 @@ +from torch.utils.data import DataLoader +import torch.nn as nn +import torch +import numpy + +import pickle +import tqdm + +from bert import BERT +from vocab import Vocab +from dataset import TokenizerDataset +import argparse +from itertools import combinations + +def generate_subset(s): + subsets = [] + for r in range(len(s) + 1): + combinations_result = combinations(s, r) + if r==1: + subsets.extend(([item] for sublist in combinations_result for item in sublist)) + else: + subsets.extend((list(sublist) for sublist in combinations_result)) + subsets_dict = {i:s for i, s in enumerate(subsets)} + return subsets_dict + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-workspace_name', type=str, default=None) + parser.add_argument("-seq_len", type=int, default=100, help="maximum sequence length") + parser.add_argument('-pretrain', type=bool, default=False) + parser.add_argument('-masked_pred', type=bool, default=False) + parser.add_argument('-epoch', type=str, default=None) + # parser.add_argument('-set_label', type=bool, default=False) + # parser.add_argument('--label_standard', nargs='+', type=str, help='List of optional tasks') + + options = parser.parse_args() + + folder_path = options.workspace_name+"/" if options.workspace_name else "" + + # if options.set_label: + # label_standard = generate_subset({'optional-tasks-1', 'optional-tasks-2'}) + # pickle.dump(label_standard, open(f"{folder_path}pretraining/pretrain_opt_label.pkl", "wb")) + # else: + # label_standard = pickle.load(open(f"{folder_path}pretraining/pretrain_opt_label.pkl", "rb")) + # print(f"options.label _standard: {options.label_standard}") + vocab_path = f"{folder_path}check/pretraining/vocab.txt" + # vocab_path = f"{folder_path}pretraining/vocab.txt" + + + print("Loading Vocab", vocab_path) + vocab_obj = Vocab(vocab_path) + vocab_obj.load_vocab() + print("Vocab Size: ", len(vocab_obj.vocab)) + + # label_standard = list(pickle.load(open(f"dataset/CL4999_1920/{options.workspace_name}/unique_problems_list.pkl", "rb"))) + # label_standard = generate_subset({'optional-tasks-1', 'optional-tasks-2', 'OptionalTask_1', 'OptionalTask_2'}) + # pickle.dump(label_standard, open(f"{folder_path}pretraining/pretrain_opt_label.pkl", "wb")) + + if options.masked_pred: + str_code = "masked_prediction" + output_name = f"{folder_path}output/bert_trained.seq_model.ep{options.epoch}" + else: + str_code = "masked" + output_name = f"{folder_path}output/bert_trained.seq_encoder.model.ep{options.epoch}" + + folder_path = folder_path+"check/" + # folder_path = folder_path + if options.pretrain: + pretrain_file = f"{folder_path}pretraining/pretrain.txt" + pretrain_label = f"{folder_path}pretraining/pretrain_opt.pkl" + + # pretrain_file = f"{folder_path}finetuning/train.txt" + # pretrain_label = f"{folder_path}finetuning/train_label.txt" + + embedding_file_path = f"{folder_path}embeddings/pretrain_embeddings_{str_code}_{options.epoch}.pkl" + print("Loading Pretrain Dataset ", pretrain_file) + pretrain_dataset = TokenizerDataset(pretrain_file, pretrain_label, vocab_obj, seq_len=options.seq_len) + + print("Creating Dataloader") + pretrain_data_loader = DataLoader(pretrain_dataset, batch_size=32, num_workers=4) + else: + val_file = f"{folder_path}pretraining/test.txt" + val_label = f"{folder_path}pretraining/test_opt.txt" + +# val_file = f"{folder_path}finetuning/test.txt" +# val_label = f"{folder_path}finetuning/test_label.txt" + embedding_file_path = f"{folder_path}embeddings/test_embeddings_{str_code}_{options.epoch}.pkl" + + print("Loading Validation Dataset ", val_file) + val_dataset = TokenizerDataset(val_file, val_label, vocab_obj, seq_len=options.seq_len) + + print("Creating Dataloader") + val_data_loader = DataLoader(val_dataset, batch_size=32, num_workers=4) + + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + print(device) + print("Load Pre-trained BERT model...") + print(output_name) + bert = torch.load(output_name, map_location=device) +# learned_parameters = model_ep0.state_dict() + for param in bert.parameters(): + param.requires_grad = False + + if options.pretrain: + print("Pretrain-embeddings....") + data_iter = tqdm.tqdm(enumerate(pretrain_data_loader), + desc="pre-train", + total=len(pretrain_data_loader), + bar_format="{l_bar}{r_bar}") + pretrain_embeddings = [] + for i, data in data_iter: + data = {key: value.to(device) for key, value in data.items()} + hrep = bert(data["bert_input"], data["segment_label"]) + # print(hrep[:,0].cpu().detach().numpy()) + embeddings = [h for h in hrep[:,0].cpu().detach().numpy()] + pretrain_embeddings.extend(embeddings) + pickle.dump(pretrain_embeddings, open(embedding_file_path,"wb")) + # pickle.dump(pretrain_embeddings, open("embeddings/finetune_cfa_train_embeddings.pkl","wb")) + + else: + print("Validation-embeddings....") + data_iter = tqdm.tqdm(enumerate(val_data_loader), + desc="validation", + total=len(val_data_loader), + bar_format="{l_bar}{r_bar}") + val_embeddings = [] + for i, data in data_iter: + data = {key: value.to(device) for key, value in data.items()} + hrep = bert(data["bert_input"], data["segment_label"]) + # print(,hrep[:,0].shape) + embeddings = [h for h in hrep[:,0].cpu().detach().numpy()] + val_embeddings.extend(embeddings) + pickle.dump(val_embeddings, open(embedding_file_path,"wb")) + # pickle.dump(val_embeddings, open("embeddings/finetune_cfa_test_embeddings.pkl","wb")) + diff --git a/src/gradio_test.py b/src/gradio_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c5944cf036b3acd257ec04be26922ddbbcc914fd --- /dev/null +++ b/src/gradio_test.py @@ -0,0 +1,39 @@ +import gradio as gr +from huggingface_hub import hf_hub_download +import pickle +import gradio as gr +import numpy as np +import subprocess +import shutil +# Define the function to process the input file and model selection +def process_file(file, model_name): + with open(file.name, 'r') as f: + content = f.read() + saved_test_dataset = "test.txt" + saved_test_label = "saved_test_label.txt" + + # Save the uploaded file content to a specified location + shutil.copyfile(file.name, saved_test_dataset) + # For demonstration purposes, we'll just return the content with the selected model name + subprocess.run(["python", "src/test_saved_model.py"]) + return f"Model: {model_name}\nContent:\n{content}" + +# List of models for the dropdown menu +models = ["Model A", "Model B", "Model C"] + +# Create the Gradio interface +with gr.Blocks() as demo: + gr.Markdown("# File Processor with Model Selection") + gr.Markdown("Upload a .txt file and select a model from the dropdown menu.") + + with gr.Row(): + file_input = gr.File(label="Upload a .txt file", file_types=['.txt']) + model_dropdown = gr.Dropdown(choices=models, label="Select a model") + + output_text = gr.Textbox(label="Output") + + btn = gr.Button("Submit") + btn.click(fn=process_file, inputs=[file_input, model_dropdown], outputs=output_text) + +# Launch the app +demo.launch() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000000000000000000000000000000000000..36dcaa7d429e945ec055803b1d2283d7c09c2ab5 --- /dev/null +++ b/src/main.py @@ -0,0 +1,162 @@ +import argparse + +from torch.utils.data import DataLoader +import torch + +from bert import BERT +from pretrainer import BERTTrainer, BERTFineTuneTrainer +from dataset import PretrainerDataset, TokenizerDataset +from vocab import Vocab + +import time + + +def train(): + parser = argparse.ArgumentParser() + + parser.add_argument('-workspace_name', type=str, default=None) + parser.add_argument("-p", "--pretrain_dataset", type=str, default="pretraining/pretrain.txt", help="pretraining dataset for bert") + parser.add_argument("-pv", "--pretrain_val_dataset", type=str, default="pretraining/test.txt", help="pretraining validation dataset for bert") +# default="finetuning/test.txt", + parser.add_argument("-f", "--train_dataset", type=str, default="finetuning/test_in.txt", help="fine tune train dataset for progress classifier") + parser.add_argument("-t", "--test_dataset", type=str, default="finetuning/train_in.txt", help="test set for evaluate fine tune train set") + parser.add_argument("-flabel", "--train_label", type=str, default="finetuning/test_in_label.txt", help="fine tune train dataset for progress classifier") + parser.add_argument("-tlabel", "--test_label", type=str, default="finetuning/train_in_label.txt", help="test set for evaluate fine tune train set") + ##### change Checkpoint + parser.add_argument("-c", "--pretrained_bert_checkpoint", type=str, default="output_feb09/bert_trained.model.ep40", help="checkpoint of saved pretrained bert model") # output_1: output_1/bert_trained.model.ep3 + parser.add_argument("-v", "--vocab_path", type=str, default="pretraining/vocab.txt", help="built vocab model path with bert-vocab") + + parser.add_argument("-hs", "--hidden", type=int, default=64, help="hidden size of transformer model") + parser.add_argument("-l", "--layers", type=int, default=4, help="number of layers") + parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") + parser.add_argument("-s", "--seq_len", type=int, default=100, help="maximum sequence length") + + parser.add_argument("-b", "--batch_size", type=int, default=32, help="number of batch_size") + parser.add_argument("-e", "--epochs", type=int, default=301, help="number of epochs") + # Use 50 for pretrain, and 10 for fine tune + parser.add_argument("-w", "--num_workers", type=int, default=4, help="dataloader worker size") + + # Later run with cuda + parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") + parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") + parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") + parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") + parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") + + parser.add_argument("--dropout", type=float, default=0.1, help="dropout of network") + parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") + parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") + parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") + parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") + + # These two need to be changed for fine tuning + # parser.add_argument("--pretrain", type=bool, default=True, help="pretraining: true, or false") + # parser.add_argument("-o", "--output_path", type=str, default="output/bert_trained.seq_encoder.model", help="ex)output/bert.model") + # parser.add_argument("--same_student_prediction", type=bool, default=False, help="predict sequences by same student: true, or false") + + #clear;python3 src/main.py --output_path output/masked/bert_trained.model + #clear;python3 src/main.py --output_path output/masked_prediction/bert_trained.model --same_student_prediction True + + parser.add_argument("--pretrain", type=bool, default=False, help="pretraining: true, or false") + parser.add_argument("-o", "--output_path", type=str, default="output/bert_fine_tuned.FS.model", help="ex)output/bert.model") + # python3 src/main.py + + args = parser.parse_args() + for k,v in vars(args).items(): + if ('dataset' in k) or ('path' in k) or ('label' in k): + if v: + setattr(args, f"{k}", args.workspace_name+"/"+v) + print(f"args.{k} : {getattr(args, f'{k}')}") + + print("Loading Vocab", args.vocab_path) + vocab_obj = Vocab(args.vocab_path) + vocab_obj.load_vocab() + print("Vocab Size: ", len(vocab_obj.vocab)) + + if args.pretrain: + + print("Pre-training......") + print("Loading Pretraining Dataset", args.pretrain_dataset) + print(f"Workspace: {args.workspace_name}") + pretrain_dataset = PretrainerDataset(args.pretrain_dataset, vocab_obj, seq_len=args.seq_len, select_next_seq=args.same_student_prediction) + + print("Loading Pretraining validation Dataset", args.pretrain_val_dataset) + pretrain_valid_dataset = PretrainerDataset(args.pretrain_val_dataset, vocab_obj, seq_len=args.seq_len, select_next_seq=args.same_student_prediction) \ + if args.pretrain_val_dataset is not None else None + + print("Creating Dataloader") + pretrain_data_loader = DataLoader(pretrain_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + pretrain_val_data_loader = DataLoader(pretrain_valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers)\ + if pretrain_valid_dataset is not None else None + + print("Building BERT model") + # a = 5/0 + # hidden = pretrain_dataset.seq_len if pretrain_dataset.seq_len > args.hidden else args.hidden + # print("hidden: ", hidden) + bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads, dropout=args.dropout) + + print(f"Creating BERT Trainer .... masking: True, prediction: {args.same_student_prediction}") + trainer = BERTTrainer(bert, len(vocab_obj.vocab), train_dataloader=pretrain_data_loader, test_dataloader=pretrain_val_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, same_student_prediction = args.same_student_prediction, workspace_name = args.workspace_name) + + print("Training Start") + start_time = time.time() + for epoch in range(args.epochs): + trainer.train(epoch) + + if pretrain_val_data_loader is not None: + trainer.test(epoch) + + if epoch > 19 and trainer.save_model: # or epoch%10 == 0 + trainer.save(epoch, args.output_path) + end_time = time.time() + print("Time Taken to pretrain dataset = ", end_time - start_time) + else: + print("Fine Tuning......") + print("Loading Train Dataset", args.train_dataset) + train_dataset = TokenizerDataset(args.train_dataset, args.train_label, vocab_obj, seq_len=args.seq_len, train=True) + + print("Loading Test Dataset", args.test_dataset) + test_dataset = TokenizerDataset(args.test_dataset, args.test_label, vocab_obj, seq_len=args.seq_len, train=False) \ + if args.test_dataset is not None else None + + print("Creating Dataloader") + train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ + if test_dataset is not None else None + + print("Load Pre-trained BERT model") + # bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) + cuda_condition = torch.cuda.is_available() and args.with_cuda + device = torch.device("cuda:0" if cuda_condition else "cpu") + bert = torch.load(args.pretrained_bert_checkpoint, map_location=device) + + if args.workspace_name == "ratio_proportion_change4": + num_labels = 7 + elif args.workspace_name == "ratio_proportion_change3": + num_labels = 7 + elif args.workspace_name == "scale_drawings_3": + num_labels = 7 + elif args.workspace_name == "sales_tax_discounts_two_rates": + num_labels = 3 + # num_labels = 1 + print(f"Number of Labels : {num_labels}") + print("Creating BERT Fine Tune Trainer") + trainer = BERTFineTuneTrainer(bert, len(vocab_obj.vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, workspace_name = args.workspace_name, num_labels=num_labels) + + print("Training Start....") + start_time = time.time() + for epoch in range(args.epochs): + trainer.train(epoch) + + if epoch > 4 and trainer.save_model: + trainer.save(epoch, args.output_path) + + if test_data_loader is not None: + trainer.test(epoch) + + end_time = time.time() + print("Time Taken to fine tune dataset = ", end_time - start_time) + + +if __name__ == "__main__": + train() \ No newline at end of file diff --git a/src/manifold-smoothing.py b/src/manifold-smoothing.py new file mode 100644 index 0000000000000000000000000000000000000000..e491dd2258a4cf1133374dc5ac2c2b408c17399c --- /dev/null +++ b/src/manifold-smoothing.py @@ -0,0 +1,502 @@ +import torch +from torch import nn +from torch.nn import functional as F +from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler +from keras.preprocessing.sequence import pad_sequences +from sklearn.model_selection import train_test_split +from transformers import BertTokenizer, BertConfig +from transformers import AdamW, BertForSequenceClassification, get_linear_schedule_with_warmup +from tqdm import tqdm, trange +import pandas as pd +import io +import numpy as np +import matplotlib.pyplot as plt +from torch.autograd.gradcheck import zero_gradients +import argparse +import random +from utils import * +import os + + +class softCrossEntropy(nn.Module): + def __init__(self, reduce=True): + super(softCrossEntropy, self).__init__() + self.reduce = reduce + return + + def forward(self, inputs, target): + """ + :param inputs: predictions + :param target: target labels in vector form + :return: loss + """ + log_likelihood = -F.log_softmax(inputs, dim=1) + sample_num, class_num = target.shape + if self.reduce: + loss = torch.sum(torch.mul(log_likelihood, target)) / sample_num + else: + loss = torch.sum(torch.mul(log_likelihood, target), 1) + + return loss + + +def one_hot_tensor(y_batch_tensor, num_classes, device): + y_tensor = torch.FloatTensor(y_batch_tensor.size(0), num_classes).fill_(0).to(device) + y_tensor[np.arange(len(y_batch_tensor)), y_batch_tensor] = 1.0 + return y_tensor + +class on_manifold_samples(object): + def __init__(self, epsilon_x=1e-4, epsilon_y=0.1): + super(on_manifold_samples, self).__init__() + self.epsilon_x = epsilon_x + self.epsilon_y = epsilon_y + + + def generate(self, input_ids, input_mask, y, model): + model.eval() + with torch.no_grad(): + if torch.cuda.device_count() > 1: + embedding = model.module.get_input_embeddings()(input_ids) + else: + embedding = model.get_input_embeddings()(input_ids) + + x = embedding.detach() + + inv_index = torch.arange(x.size(0) - 1, -1, -1).long() + x_tilde = x[inv_index, :].detach() + y_tilde = y[inv_index, :] + x_init = x.detach() + torch.zeros_like(x).uniform_(-self.epsilon_x, self.epsilon_x) + + x_init.requires_grad_() + zero_gradients(x_init) + if x_init.grad is not None: + x_init.grad.data.fill_(0) + + fea_b = model(inputs_embeds=x_init, token_type_ids=None, attention_mask=input_mask)[1][-1] + fea_b = torch.mean(fea_b, 1) + with torch.no_grad(): + fea_t = model(inputs_embeds=x_tilde, token_type_ids=None, attention_mask=input_mask)[1][-1] + fea_t = torch.mean(fea_t, 1) + + Dx = cos_dist(fea_b, fea_t) + model.zero_grad() + if torch.cuda.device_count() > 1: + Dx = Dx.mean() + Dx.backward() + + x_prime = x_init.data - self.epsilon_x * torch.sign(x_init.grad.data) + x_prime = torch.min(torch.max(x_prime, embedding - self.epsilon_x), embedding + self.epsilon_x) + + y_prime = (1 - self.epsilon_y) * y + self.epsilon_y * y_tilde + model.train() + return x_prime.detach(), y_prime.detach() + +class off_manifold_samples(object): + def __init__(self, eps=0.001, rand_init='n'): + super(off_manifold_samples, self).__init__() + self.eps = eps + self.rand_init = rand_init + + + def generate(self, model, input_ids, input_mask, labels): + model.eval() + ny = labels + with torch.no_grad(): + if torch.cuda.device_count() > 1: + embedding = model.module.get_input_embeddings()(input_ids) + else: + embedding = model.get_input_embeddings()(input_ids) + + input_embedding = embedding.detach() + #random init the adv samples + if self.rand_init == 'y': + input_embedding = input_embedding + torch.zeros_like(input_embedding).uniform_(-self.eps, self.eps) + input_embedding.requires_grad = True + + zero_gradients(input_embedding) + if input_embedding.grad is not None: + input_embedding.grad.data.fill_(0) + + cost = model(inputs_embeds=input_embedding, token_type_ids=None, attention_mask=input_mask, labels=ny)[0] + if torch.cuda.device_count() > 1: + cost = cost.mean() + model.zero_grad() + cost.backward() + off_samples = input_embedding + self.eps*torch.sign(input_embedding.grad.data) + off_samples = torch.min(torch.max(off_samples, embedding - self.eps), embedding + self.eps) + + model.train() + return off_samples.detach() + + + +class ECE(nn.Module): + + def __init__(self, n_bins=15): + """ + n_bins (int): number of confidence interval bins + """ + super(ECE, self).__init__() + bin_boundaries = torch.linspace(0, 1, n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + def forward(self, logits, labels): + softmaxes = F.softmax(logits, dim=1) + confidences, predictions = torch.max(softmaxes, 1) + accuracies = predictions.eq(labels) + + ece = torch.zeros(1, device=logits.device) + for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): + # Calculated |confidence - accuracy| in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + accuracy_in_bin = accuracies[in_bin].float().mean() + avg_confidence_in_bin = confidences[in_bin].mean() + ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + + return ece + +# Function to calculate the accuracy of our predictions vs labels +def accurate_nb(preds, labels): + pred_flat = np.argmax(preds, axis=1).flatten() + labels_flat = labels.flatten() + return np.sum(pred_flat == labels_flat) + + +def set_seed(args): + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--lr", default=5e-5, type=float, help="The initial learning rate for Adam.") + parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.") + parser.add_argument("--eval_batch_size", default=128, type=int, help="Batch size for training.") + parser.add_argument("--epochs", default=10, type=int, help="Number of epochs for training.") + parser.add_argument("--seed", default=0, type=int, help="Number of epochs for training.") + parser.add_argument("--dataset", default='20news-15', type=str, help="dataset") + parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") + parser.add_argument("--beta_on", default=1., type=float, help="Weight of on manifold reg") + parser.add_argument("--beta_off", default=1., type=float, help="Weight of off manifold reg") + parser.add_argument("--eps_in", default=1e-4, type=float, help="Perturbation size of on-manifold regularizer") + parser.add_argument("--eps_y", default=0.1, type=float, help="Perturbation size of label") + parser.add_argument('--eps_out', default=0.001, type=float, help="Perturbation size of out-of-domain adversarial training") + parser.add_argument('--saved_dataset', type=str, default='n', help='whether save the preprocessed pt file of the dataset') + + args = parser.parse_args() + print(args) + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + args.device = device + set_seed(args) + + ece_criterion = ECE().to(args.device) + soft_ce = softCrossEntropy() + + on_manifold = on_manifold_samples(epsilon_x=args.eps_in, epsilon_y=args.eps_y) + off_manifold = off_manifold_samples(eps=args.eps_out) + + + # load dataset + if args.saved_dataset == 'n': + train_sentences, val_sentences, test_sentences, train_labels, val_labels, test_labels = load_dataset(args.dataset) + + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + + train_input_ids = [] + val_input_ids = [] + test_input_ids = [] + + if args.dataset == '20news' or args.dataset == '20news-15': + MAX_LEN = 150 + else: + MAX_LEN = 256 + + for sent in train_sentences: + # `encode` will: + # (1) Tokenize the sentence. + # (2) Prepend the `[CLS]` token to the start. + # (3) Append the `[SEP]` token to the end. + # (4) Map tokens to their IDs. + encoded_sent = tokenizer.encode( + sent, # Sentence to encode. + add_special_tokens = True, # Add '[CLS]' and '[SEP]' + # This function also supports truncation and conversion + # to pytorch tensors, but we need to do padding, so we + # can't use these features :( . + max_length = MAX_LEN, # Truncate all sentences. + #return_tensors = 'pt', # Return pytorch tensors. + ) + # Add the encoded sentence to the list. + train_input_ids.append(encoded_sent) + + + for sent in val_sentences: + encoded_sent = tokenizer.encode( + sent, + add_special_tokens = True, + max_length = MAX_LEN, + ) + val_input_ids.append(encoded_sent) + + for sent in test_sentences: + encoded_sent = tokenizer.encode( + sent, + add_special_tokens = True, + max_length = MAX_LEN, + ) + test_input_ids.append(encoded_sent) + + # Pad our input tokens + train_input_ids = pad_sequences(train_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + val_input_ids = pad_sequences(val_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + test_input_ids = pad_sequences(test_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + # Create attention masks + train_attention_masks = [] + val_attention_masks = [] + test_attention_masks = [] + + # Create a mask of 1s for each token followed by 0s for padding + for seq in train_input_ids: + seq_mask = [float(i>0) for i in seq] + train_attention_masks.append(seq_mask) + for seq in val_input_ids: + seq_mask = [float(i>0) for i in seq] + val_attention_masks.append(seq_mask) + for seq in test_input_ids: + seq_mask = [float(i>0) for i in seq] + test_attention_masks.append(seq_mask) + + # Convert all of our data into torch tensors, the required datatype for our model + + train_inputs = torch.tensor(train_input_ids) + validation_inputs = torch.tensor(val_input_ids) + train_labels = torch.tensor(train_labels) + validation_labels = torch.tensor(val_labels) + train_masks = torch.tensor(train_attention_masks) + validation_masks = torch.tensor(val_attention_masks) + test_inputs = torch.tensor(test_input_ids) + test_labels = torch.tensor(test_labels) + test_masks = torch.tensor(test_attention_masks) + + # Create an iterator of our data with torch DataLoader. + + train_data = TensorDataset(train_inputs, train_masks, train_labels) + validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels) + prediction_data = TensorDataset(test_inputs, test_masks, test_labels) + + dataset_dir = 'dataset/{}'.format(args.dataset) + if not os.path.exists(dataset_dir): + os.makedirs(dataset_dir) + + torch.save(train_data, dataset_dir+'/train.pt') + torch.save(validation_data, dataset_dir+'/val.pt') + torch.save(prediction_data, dataset_dir+'/test.pt') + + else: + dataset_dir = 'dataset/{}'.format(args.dataset) + train_data = torch.load(dataset_dir+'/train.pt') + validation_data = torch.load(dataset_dir+'/val.pt') + prediction_data = torch.load(dataset_dir+'/test.pt') + + + train_sampler = RandomSampler(train_data) + train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) + + validation_sampler = SequentialSampler(validation_data) + validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=args.eval_batch_size) + + prediction_sampler = SequentialSampler(prediction_data) + prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=args.eval_batch_size) + + + + if args.dataset == '20news': + num_labels = 20 + elif args.dataset == '20news-15': + num_labels = 15 + elif args.dataset == 'wos-in': + num_labels = 100 + elif args.dataset == 'wos': + num_labels = 134 + + print(num_labels) + + model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels= num_labels, output_hidden_states=True) + if torch.cuda.device_count() > 1: + print("Let's use", torch.cuda.device_count(), "GPUs!") + # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs + model = nn.DataParallel(model) + model.to(args.device) + +#######train model + + param_optimizer = list(model.named_parameters()) + no_decay = ['bias', 'gamma', 'beta'] + optimizer_grouped_parameters = [ + {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], + 'weight_decay_rate': args.weight_decay}, + {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], + 'weight_decay_rate': 0.0} + ] + + optimizer = torch.optim.Adam(optimizer_grouped_parameters, lr=args.lr, eps=1e-9) + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1) + t_total = len(train_dataloader) * args.epochs + # Store our loss and accuracy for plotting + + best_val = -np.inf + # trange is a tqdm wrapper around the normal python range + for epoch in trange(args.epochs, desc="Epoch"): + # Training + # Set our model to training mode (as opposed to evaluation mode) + # Tracking variables + tr_loss1, tr_loss2 = 0, 0 + nb_tr_examples, nb_tr_steps = 0, 0 + model.train() + + # Train the data for one epoch + for step, batch in enumerate(train_dataloader): + + # Add batch to GPU + batch = tuple(t.to(args.device) for t in batch) + # Unpack the inputs from our dataloader + b_input_ids, b_input_mask, b_labels = batch + + # generate on manifold samples + targets_onehot = one_hot_tensor(b_labels, num_labels, args.device) + on_manifold_x, on_manifold_y = on_manifold.generate(b_input_ids, b_input_mask, targets_onehot, model) + model.train() + # train with on manifold samples + on_manifold_logits = model(token_type_ids=None, attention_mask=b_input_mask, inputs_embeds=on_manifold_x)[0] + loss_on = soft_ce(on_manifold_logits, on_manifold_y) + + #generate off manifold samples + off_manifold_x = off_manifold.generate(model, b_input_ids, b_input_mask, b_labels) + + model.train() + # train with off manifold samples + off_manifold_logits = model(token_type_ids=None, attention_mask=b_input_mask, inputs_embeds=off_manifold_x)[0] + off_manifold_prob = F.softmax(off_manifold_logits, dim=1) + loss_off = -torch.mean(-torch.sum(off_manifold_prob*torch.log(off_manifold_prob), dim=1)) + loss_reg = args.beta_on*loss_on + args.beta_off*loss_off + + if torch.cuda.device_count() > 1: + loss_reg = loss_reg.mean() + + # Clear out the gradients (by default they accumulate) + optimizer.zero_grad() + loss_reg.backward() + + + loss_ce = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)[0] + if torch.cuda.device_count() > 1: + loss_ce = loss_ce.mean() + loss_ce.backward() + + torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) + + # Update parameters and take a step using the computed gradient + optimizer.step() + + # Update tracking variables + tr_loss1 += loss_ce.item() + tr_loss2 += loss_reg.item() + + nb_tr_examples += b_input_ids.size(0) + nb_tr_steps += 1 + + print("Train cross entropy loss: {} | reg loss: {}".format(tr_loss1/nb_tr_steps, tr_loss2/nb_tr_steps)) + + + + # Validation + # Put model in evaluation mode to evaluate loss on the validation set + model.eval() + # Tracking variables + eval_accurate_nb = 0 + nb_eval_examples = 0 + + # Evaluate data for one epoch + for batch in validation_dataloader: + # Add batch to GPU + batch = tuple(t.to(args.device) for t in batch) + # Unpack the inputs from our dataloader + b_input_ids, b_input_mask, b_labels = batch + # Telling the model not to compute or store gradients, saving memory and speeding up validation + with torch.no_grad(): + # Forward pass, calculate logit predictions + logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + # Move logits and labels to CPU + logits = logits.detach().cpu().numpy() + label_ids = b_labels.to('cpu').numpy() + + tmp_eval_nb = accurate_nb(logits, label_ids) + + eval_accurate_nb += tmp_eval_nb + nb_eval_examples += label_ids.shape[0] + eval_accuracy = eval_accurate_nb/nb_eval_examples + print("Validation Accuracy: {}".format(eval_accuracy)) + + scheduler.step(eval_accuracy) + + + if eval_accuracy > best_val: + dirname = '{}/BERT-mf-{}-{}-{}-{}'.format(args.dataset, args.seed, args.eps_in, args.eps_y, args.eps_out) + + output_dir = './model_save/{}'.format(dirname) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + print("Saving model to %s" % output_dir) + model_to_save = model.module if hasattr(model, 'module') else model + model_to_save.save_pretrained(output_dir) + #tokenizer.save_pretrained(output_dir) + + best_val = eval_accuracy + +# ##### test model on test data + # Put model in evaluation mode + model.eval() + # Tracking variables + predictions , true_labels = [], [] + eval_accurate_nb = 0 + nb_test_examples = 0 + logits_list = [] + labels_list = [] + # Predict + for batch in prediction_dataloader: + # Add batch to GPU + batch = tuple(t.to(args.device) for t in batch) + # Unpack the inputs from our dataloader + b_input_ids, b_input_mask, b_labels = batch + # Telling the model not to compute or store gradients, saving memory and speeding up prediction + with torch.no_grad(): + # Forward pass, calculate logit predictions + logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + logits_list.append(logits) + labels_list.append(b_labels) + # Move logits and labels to CPU + logits = logits.detach().cpu().numpy() + label_ids = b_labels.to('cpu').numpy() + + tmp_eval_nb = accurate_nb(logits, label_ids) + eval_accurate_nb += tmp_eval_nb + nb_test_examples += label_ids.shape[0] + + # Store predictions and true labels + predictions.append(logits) + true_labels.append(label_ids) + + print("Test Accuracy: {}".format(eval_accurate_nb/nb_test_examples)) + + logits_ece = torch.cat(logits_list) + labels_ece = torch.cat(labels_list) + ece = ece_criterion(logits_ece, labels_ece).item() + + print('ECE on test data: {}'.format(ece)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/metrics.py b/src/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..96642d1026a4869375b585c24b56e34690b5f7b4 --- /dev/null +++ b/src/metrics.py @@ -0,0 +1,149 @@ +import numpy as np +from scipy.special import softmax + + +class CELoss(object): + + def compute_bin_boundaries(self, probabilities = np.array([])): + + #uniform bin spacing + if probabilities.size == 0: + bin_boundaries = np.linspace(0, 1, self.n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + else: + #size of bins + bin_n = int(self.n_data/self.n_bins) + + bin_boundaries = np.array([]) + + probabilities_sort = np.sort(probabilities) + + for i in range(0,self.n_bins): + bin_boundaries = np.append(bin_boundaries,probabilities_sort[i*bin_n]) + bin_boundaries = np.append(bin_boundaries,1.0) + + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + + def get_probabilities(self, output, labels, logits): + #If not probabilities apply softmax! + if logits: + self.probabilities = softmax(output, axis=1) + else: + self.probabilities = output + + self.labels = np.argmax(labels, axis=1) + self.confidences = np.max(self.probabilities, axis=1) + self.predictions = np.argmax(self.probabilities, axis=1) + self.accuracies = np.equal(self.predictions, self.labels) + + def binary_matrices(self): + idx = np.arange(self.n_data) + #make matrices of zeros + pred_matrix = np.zeros([self.n_data,self.n_class]) + label_matrix = np.zeros([self.n_data,self.n_class]) + #self.acc_matrix = np.zeros([self.n_data,self.n_class]) + pred_matrix[idx,self.predictions] = 1 + label_matrix[idx,self.labels] = 1 + + self.acc_matrix = np.equal(pred_matrix, label_matrix) + + + def compute_bins(self, index = None): + self.bin_prop = np.zeros(self.n_bins) + self.bin_acc = np.zeros(self.n_bins) + self.bin_conf = np.zeros(self.n_bins) + self.bin_score = np.zeros(self.n_bins) + + if index == None: + confidences = self.confidences + accuracies = self.accuracies + else: + confidences = self.probabilities[:,index] + accuracies = self.acc_matrix[:,index] + + + for i, (bin_lower, bin_upper) in enumerate(zip(self.bin_lowers, self.bin_uppers)): + # Calculated |confidence - accuracy| in each bin + in_bin = np.greater(confidences,bin_lower.item()) * np.less_equal(confidences,bin_upper.item()) + self.bin_prop[i] = np.mean(in_bin) + + if self.bin_prop[i].item() > 0: + self.bin_acc[i] = np.mean(accuracies[in_bin]) + self.bin_conf[i] = np.mean(confidences[in_bin]) + self.bin_score[i] = np.abs(self.bin_conf[i] - self.bin_acc[i]) + +class MaxProbCELoss(CELoss): + def loss(self, output, labels, n_bins = 15, logits = True): + self.n_bins = n_bins + super().compute_bin_boundaries() + super().get_probabilities(output, labels, logits) + super().compute_bins() + +#http://people.cs.pitt.edu/~milos/research/AAAI_Calibration.pdf +class ECELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.dot(self.bin_prop,self.bin_score) + +class MCELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.max(self.bin_score) + +#https://arxiv.org/abs/1905.11001 +#Overconfidence Loss (Good in high risk applications where confident but wrong predictions can be especially harmful) +class OELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.dot(self.bin_prop,self.bin_conf * np.maximum(self.bin_conf-self.bin_acc,np.zeros(self.n_bins))) + + +#https://arxiv.org/abs/1904.01685 +class SCELoss(CELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + sce = 0.0 + self.n_bins = n_bins + self.n_data = len(output) + self.n_class = len(output[0]) + + super().compute_bin_boundaries() + super().get_probabilities(output, labels, logits) + super().binary_matrices() + + for i in range(self.n_class): + super().compute_bins(i) + sce += np.dot(self.bin_prop,self.bin_score) + + return sce/self.n_class + +class TACELoss(CELoss): + + def loss(self, output, labels, threshold = 0.01, n_bins = 15, logits = True): + tace = 0.0 + self.n_bins = n_bins + self.n_data = len(output) + self.n_class = len(output[0]) + + super().get_probabilities(output, labels, logits) + self.probabilities[self.probabilities < threshold] = 0 + super().binary_matrices() + + for i in range(self.n_class): + super().compute_bin_boundaries(self.probabilities[:,i]) + super().compute_bins(i) + tace += np.dot(self.bin_prop,self.bin_score) + + return tace/self.n_class + +#create TACELoss with threshold fixed at 0 +class ACELoss(TACELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + return super().loss(output, labels, 0.0 , n_bins, logits) diff --git a/src/optim_schedule.py b/src/optim_schedule.py new file mode 100644 index 0000000000000000000000000000000000000000..5ccd222588c75e04002e9bf0d565e97313a9ae9e --- /dev/null +++ b/src/optim_schedule.py @@ -0,0 +1,35 @@ +'''A wrapper class for optimizer ''' +import numpy as np + + +class ScheduledOptim(): + '''A simple wrapper class for learning rate scheduling''' + + def __init__(self, optimizer, d_model, n_warmup_steps): + self._optimizer = optimizer + self.n_warmup_steps = n_warmup_steps + self.n_current_steps = 0 + self.init_lr = np.power(d_model, -0.5) + + def step_and_update_lr(self): + "Step with the inner optimizer" + self._update_learning_rate() + self._optimizer.step() + + def zero_grad(self): + "Zero out the gradients by the inner optimizer" + self._optimizer.zero_grad() + + def _get_lr_scale(self): + return np.min([ + np.power(self.n_current_steps, -0.5), + np.power(self.n_warmup_steps, -1.5) * self.n_current_steps]) + + def _update_learning_rate(self): + ''' Learning rate scheduling per step ''' + + self.n_current_steps += 1 + lr = self.init_lr * self._get_lr_scale() + + for param_group in self._optimizer.param_groups: + param_group['lr'] = lr diff --git a/src/pretrainer.py b/src/pretrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..a4288fda4e8182d198619dbcdb05c2013c1a118b --- /dev/null +++ b/src/pretrainer.py @@ -0,0 +1,808 @@ +import torch +import torch.nn as nn +# from torch.nn import functional as F +from torch.optim import Adam +from torch.utils.data import DataLoader +# import pickle + +from .bert import BERT +from .seq_model import BERTSM +from .classifier_model import BERTForClassification, BERTForClassificationWithFeats +from .optim_schedule import ScheduledOptim + +import tqdm +import sys +import time + +import numpy as np + +from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix + +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +from collections import defaultdict +import os + +class BERTTrainer: + """ + BERTTrainer pretrains BERT model on input sequence of strategies. + BERTTrainer make the pretrained BERT model with one training method objective. + 1. Masked Strategy Modeling :Masked SM + """ + + def __init__(self, bert: BERT, vocab_size: int, + train_dataloader: DataLoader, val_dataloader: DataLoader = None, test_dataloader: DataLoader = None, + lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=5000, + with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, log_folder_path: str = None): + """ + :param bert: BERT model which you want to train + :param vocab_size: total word vocab size + :param train_dataloader: train dataset data loader + :param test_dataloader: test dataset data loader [can be None] + :param lr: learning rate of optimizer + :param betas: Adam optimizer betas + :param weight_decay: Adam optimizer weight decay param + :param with_cuda: traning with cuda + :param log_freq: logging frequency of the batch iteration + """ + + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(cuda_condition, " Device used = ", self.device) + + available_gpus = list(range(torch.cuda.device_count())) + + # This BERT model will be saved + self.bert = bert.to(self.device) + # Initialize the BERT Sequence Model, with BERT model + self.model = BERTSM(bert, vocab_size).to(self.device) + + # Distributed GPU training if CUDA can detect more than 1 GPU + if with_cuda and torch.cuda.device_count() > 1: + print("Using %d GPUS for BERT" % torch.cuda.device_count()) + self.model = nn.DataParallel(self.model, device_ids=available_gpus) + + # Setting the train, validation and test data loader + self.train_data = train_dataloader + self.val_data = val_dataloader + self.test_data = test_dataloader + + # Setting the Adam optimizer with hyper-param + self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) + self.optim_schedule = ScheduledOptim(self.optim, self.bert.hidden, n_warmup_steps=warmup_steps) + + # Using Negative Log Likelihood Loss function for predicting the masked_token + self.criterion = nn.NLLLoss(ignore_index=0) + + self.log_freq = log_freq + self.log_folder_path = log_folder_path + # self.workspace_name = workspace_name + self.save_model = False + # self.code = code + self.avg_loss = 10000 + for fi in ['train', 'val', 'test']: + f = open(self.log_folder_path+f"/log_{fi}_pretrained.txt", 'w') + f.close() + self.start_time = time.time() + + print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) + + def train(self, epoch): + self.iteration(epoch, self.train_data) + + def val(self, epoch): + if epoch == 0: + self.avg_loss = 10000 + self.iteration(epoch, self.val_data, phase="val") + + def test(self, epoch): + self.iteration(epoch, self.test_data, phase="test") + + def iteration(self, epoch, data_loader, phase="train"): + """ + loop over the data_loader for training or testing + if on train status, backward operation is activated + and also auto save the model every peoch + + :param epoch: current epoch index + :param data_loader: torch.utils.data.DataLoader for iteration + :param train: boolean value of is train or test + :return: None + """ + + # self.log_file = f"{self.workspace_name}/logs/{self.code}/log_{phase}_pretrained.txt" + # bert_hidden_representations = [] can be used + # if epoch == 0: + # f = open(self.log_file, 'w') + # f.close() + + # Progress bar + data_iter = tqdm.tqdm(enumerate(data_loader), + desc="EP_%s:%d" % (phase, epoch), + total=len(data_loader), + bar_format="{l_bar}{r_bar}") + + total_correct = 0 + total_element = 0 + avg_loss = 0.0 + + if phase == "train": + self.model.train() + else: + self.model.eval() + with open(self.log_folder_path+f"/log_{phase}_pretrained.txt", 'a') as f: + sys.stdout = f + for i, data in data_iter: + # 0. batch_data will be sent into the device(GPU or cpu) + data = {key: value.to(self.device) for key, value in data.items()} + + # 1. forward masked_sm model + # mask_sm_output is log-probabilities output + mask_sm_output, bert_hidden_rep = self.model.forward(data["bert_input"], data["segment_label"]) + + # 2. NLLLoss of predicting masked token word + loss = self.criterion(mask_sm_output.transpose(1, 2), data["bert_label"]) + if torch.cuda.device_count() > 1: + loss = loss.mean() + + # 3. backward and optimization only in train + if phase == "train": + self.optim_schedule.zero_grad() + loss.backward() + self.optim_schedule.step_and_update_lr() + + # tokens with highest log-probabilities creates a predicted sequence + pred_tokens = torch.argmax(mask_sm_output, dim=-1) + mask_correct = (data["bert_label"] == pred_tokens) & data["masked_pos"] + + total_correct += mask_correct.sum().item() + total_element += data["masked_pos"].sum().item() + avg_loss +=loss.item() + + torch.cuda.empty_cache() + + post_fix = { + "epoch": epoch, + "iter": i, + "avg_loss": avg_loss / (i + 1), + "avg_acc_mask": (total_correct / total_element * 100) if total_element != 0 else 0, + "loss": loss.item() + } + if i % self.log_freq == 0: + data_iter.write(str(post_fix)) + + end_time = time.time() + final_msg = { + "epoch": f"EP{epoch}_{phase}", + "avg_loss": avg_loss / len(data_iter), + "total_masked_acc": (total_correct / total_element * 100) if total_element != 0 else 0, + "time_taken_from_start": end_time - self.start_time + } + print(final_msg) + f.close() + sys.stdout = sys.__stdout__ + + if phase == "val": + self.save_model = False + if self.avg_loss > (avg_loss / len(data_iter)): + self.save_model = True + self.avg_loss = (avg_loss / len(data_iter)) + + def save(self, epoch, file_path="output/bert_trained.model"): + """ + Saving the current BERT model on file_path + + :param epoch: current epoch number + :param file_path: model output path which gonna be file_path+"ep%d" % epoch + :return: final_output_path + """ + output_path = file_path + ".ep%d" % epoch + torch.save(self.bert.cpu(), output_path) + self.bert.to(self.device) + print("EP:%d Model Saved on:" % epoch, output_path) + return output_path + + +class BERTFineTuneTrainer: + + def __init__(self, bert: BERT, vocab_size: int, + train_dataloader: DataLoader, test_dataloader: DataLoader = None, + lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000, + with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None, + num_labels=2, log_folder_path: str = None): + """ + :param bert: BERT model which you want to train + :param vocab_size: total word vocab size + :param train_dataloader: train dataset data loader + :param test_dataloader: test dataset data loader [can be None] + :param lr: learning rate of optimizer + :param betas: Adam optimizer betas + :param weight_decay: Adam optimizer weight decay param + :param with_cuda: traning with cuda + :param log_freq: logging frequency of the batch iteration + """ + + # Setup cuda device for BERT training, argument -c, --cuda should be true + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(cuda_condition, " Device used = ", self.device) + + available_gpus = list(range(torch.cuda.device_count())) + + # This BERT model will be saved every epoch + self.bert = bert + for param in self.bert.parameters(): + param.requires_grad = False + + # for name, param in self.bert.named_parameters(): + # if '.attention.linear_layers.0' in name or \ + # '.attention.linear_layers.1' in name or \ + # '.attention.linear_layers.2' in name: + # # if 'transformer_blocks.' in name:# or \ + # # 'transformer_blocks.3.' in name: + # # if '2.attention.linear_layers.' in name or \ + # # '3.attention.linear_layers.' in name: + # param.requires_grad = True + # Initialize the BERT Language Model, with BERT model + # self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device) + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device) + self.model = BERTForClassificationWithFeats(self.bert, num_labels, 17).to(self.device) + + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device) + # Distributed GPU training if CUDA can detect more than 1 GPU + if with_cuda and torch.cuda.device_count() > 1: + print("Using %d GPUS for BERT" % torch.cuda.device_count()) + self.model = nn.DataParallel(self.model, device_ids=available_gpus) + + # Setting the train, validation and test data loader + self.train_data = train_dataloader + # self.val_data = val_dataloader + self.test_data = test_dataloader + + # self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9 + self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) + self.optim_schedule = ScheduledOptim(self.optim, self.bert.hidden, n_warmup_steps=warmup_steps) + # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1) + self.criterion = nn.CrossEntropyLoss() + + # if num_labels == 1: + # self.criterion = nn.MSELoss() + # elif num_labels == 2: + # self.criterion = nn.BCEWithLogitsLoss() + # # self.criterion = nn.CrossEntropyLoss() + # elif num_labels > 2: + # self.criterion = nn.CrossEntropyLoss() + # self.criterion = nn.BCEWithLogitsLoss() + + + self.log_freq = log_freq + self.log_folder_path = log_folder_path + # self.workspace_name = workspace_name + # self.finetune_task = finetune_task + self.save_model = False + self.avg_loss = 10000 + self.start_time = time.time() + # self.probability_list = [] + for fi in ['train', 'test']: #'val', + f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w') + f.close() + print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) + + def train(self, epoch): + self.iteration(epoch, self.train_data) + + # def val(self, epoch): + # self.iteration(epoch, self.val_data, phase="val") + + def test(self, epoch): + if epoch == 0: + self.avg_loss = 10000 + self.iteration(epoch, self.test_data, phase="test") + + def iteration(self, epoch, data_loader, phase="train"): + """ + loop over the data_loader for training or testing + if on train status, backward operation is activated + and also auto save the model every peoch + + :param epoch: current epoch index + :param data_loader: torch.utils.data.DataLoader for iteration + :param train: boolean value of is train or test + :return: None + """ + + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(data_loader), + desc="EP_%s:%d" % (phase, epoch), + total=len(data_loader), + bar_format="{l_bar}{r_bar}") + + avg_loss = 0.0 + total_correct = 0 + total_element = 0 + plabels = [] + tlabels = [] + probabs = [] + + if phase == "train": + self.model.train() + else: + self.model.eval() + # self.probability_list = [] + + with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f: + sys.stdout = f + for i, data in data_iter: + # 0. batch_data will be sent into the device(GPU or cpu) + data = {key: value.to(self.device) for key, value in data.items()} + if phase == "train": + logits = self.model.forward(data["input"], data["segment_label"], data["feat"]) + else: + with torch.no_grad(): + logits = self.model.forward(data["input"], data["segment_label"], data["feat"]) + + loss = self.criterion(logits, data["label"]) + if torch.cuda.device_count() > 1: + loss = loss.mean() + + # 3. backward and optimization only in train + if phase == "train": + self.optim_schedule.zero_grad() + loss.backward() + self.optim_schedule.step_and_update_lr() + + # prediction accuracy + probs = nn.Softmax(dim=-1)(logits) # Probabilities + probabs.extend(probs.detach().cpu().numpy().tolist()) + predicted_labels = torch.argmax(probs, dim=-1) #correct + # self.probability_list.append(probs) + # true_labels = torch.argmax(data["label"], dim=-1) + plabels.extend(predicted_labels.cpu().numpy()) + tlabels.extend(data['label'].cpu().numpy()) + + # Compare predicted labels to true labels and calculate accuracy + correct = (data['label'] == predicted_labels).sum().item() + + avg_loss += loss.item() + total_correct += correct + # total_element += true_labels.nelement() + total_element += data["label"].nelement() + # print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element) + + post_fix = { + "epoch": epoch, + "iter": i, + "avg_loss": avg_loss / (i + 1), + "avg_acc": total_correct / total_element * 100 if total_element != 0 else 0, + "loss": loss.item() + } + if i % self.log_freq == 0: + data_iter.write(str(post_fix)) + + precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0) + recalls = recall_score(tlabels, plabels, average="weighted") + f1_scores = f1_score(tlabels, plabels, average="weighted") + cmatrix = confusion_matrix(tlabels, plabels) + end_time = time.time() + final_msg = { + "epoch": f"EP{epoch}_{phase}", + "avg_loss": avg_loss / len(data_iter), + "total_acc": total_correct * 100.0 / total_element, + "precisions": precisions, + "recalls": recalls, + "f1_scores": f1_scores, + # "confusion_matrix": f"{cmatrix}", + # "true_labels": f"{tlabels}", + # "predicted_labels": f"{plabels}", + "time_taken_from_start": end_time - self.start_time + } + print(final_msg) + f.close() + with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1: + sys.stdout = f1 + final_msg = { + "epoch": f"EP{epoch}_{phase}", + "confusion_matrix": f"{cmatrix}", + "true_labels": f"{tlabels if epoch == 0 else ''}", + "predicted_labels": f"{plabels}", + "probabilities": f"{probabs}", + "time_taken_from_start": end_time - self.start_time + } + print(final_msg) + f1.close() + sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ + + if phase == "test": + self.save_model = False + if self.avg_loss > (avg_loss / len(data_iter)): + self.save_model = True + self.avg_loss = (avg_loss / len(data_iter)) + + def iteration_1(self, epoch_idx, data): + try: + data = {key: value.to(self.device) for key, value in data.items()} + logits = self.model(data['input_ids'], data['segment_label']) + # Ensure logits is a tensor, not a tuple + loss_fct = nn.CrossEntropyLoss() + loss = loss_fct(logits, data['labels']) + + # Backpropagation and optimization + self.optim.zero_grad() + loss.backward() + self.optim.step() + + if self.log_freq > 0 and epoch_idx % self.log_freq == 0: + print(f"Epoch {epoch_idx}: Loss = {loss.item()}") + + return loss + + except Exception as e: + print(f"Error during iteration: {e}") + raise + + + def save(self, epoch, file_path="output/bert_fine_tuned_trained.model"): + """ + Saving the current BERT model on file_path + + :param epoch: current epoch number + :param file_path: model output path which gonna be file_path+"ep%d" % epoch + :return: final_output_path + """ + output_path = file_path + ".ep%d" % epoch + torch.save(self.model.cpu(), output_path) + self.model.to(self.device) + print("EP:%d Model Saved on:" % epoch, output_path) + return output_path + +class BERTFineTuneTrainer1: + + def __init__(self, bert: BERT, vocab_size: int, + train_dataloader: DataLoader, test_dataloader: DataLoader = None, + lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000, + with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None, + num_labels=2, log_folder_path: str = None): + """ + :param bert: BERT model which you want to train + :param vocab_size: total word vocab size + :param train_dataloader: train dataset data loader + :param test_dataloader: test dataset data loader [can be None] + :param lr: learning rate of optimizer + :param betas: Adam optimizer betas + :param weight_decay: Adam optimizer weight decay param + :param with_cuda: traning with cuda + :param log_freq: logging frequency of the batch iteration + """ + + # Setup cuda device for BERT training, argument -c, --cuda should be true + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(cuda_condition, " Device used = ", self.device) + + available_gpus = list(range(torch.cuda.device_count())) + + # This BERT model will be saved every epoch + self.bert = bert + for param in self.bert.parameters(): + param.requires_grad = False + # Initialize the BERT Language Model, with BERT model + self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device) + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device) + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8*2).to(self.device) + + # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device) + # Distributed GPU training if CUDA can detect more than 1 GPU + if with_cuda and torch.cuda.device_count() > 1: + print("Using %d GPUS for BERT" % torch.cuda.device_count()) + self.model = nn.DataParallel(self.model, device_ids=available_gpus) + + # Setting the train, validation and test data loader + self.train_data = train_dataloader + # self.val_data = val_dataloader + self.test_data = test_dataloader + + # self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9 + self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) + self.optim_schedule = ScheduledOptim(self.optim, self.bert.hidden, n_warmup_steps=warmup_steps) + # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1) + self.criterion = nn.CrossEntropyLoss() + + # if num_labels == 1: + # self.criterion = nn.MSELoss() + # elif num_labels == 2: + # self.criterion = nn.BCEWithLogitsLoss() + # # self.criterion = nn.CrossEntropyLoss() + # elif num_labels > 2: + # self.criterion = nn.CrossEntropyLoss() + # self.criterion = nn.BCEWithLogitsLoss() + + + self.log_freq = log_freq + self.log_folder_path = log_folder_path + # self.workspace_name = workspace_name + # self.finetune_task = finetune_task + self.save_model = False + self.avg_loss = 10000 + self.start_time = time.time() + # self.probability_list = [] + for fi in ['train', 'test']: #'val', + f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w') + f.close() + print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) + + def train(self, epoch): + self.iteration(epoch, self.train_data) + + # def val(self, epoch): + # self.iteration(epoch, self.val_data, phase="val") + + def test(self, epoch): + if epoch == 0: + self.avg_loss = 10000 + self.iteration(epoch, self.test_data, phase="test") + + def iteration(self, epoch, data_loader, phase="train"): + """ + loop over the data_loader for training or testing + if on train status, backward operation is activated + and also auto save the model every peoch + + :param epoch: current epoch index + :param data_loader: torch.utils.data.DataLoader for iteration + :param train: boolean value of is train or test + :return: None + """ + + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(data_loader), + desc="EP_%s:%d" % (phase, epoch), + total=len(data_loader), + bar_format="{l_bar}{r_bar}") + + avg_loss = 0.0 + total_correct = 0 + total_element = 0 + plabels = [] + tlabels = [] + probabs = [] + + if phase == "train": + self.model.train() + else: + self.model.eval() + # self.probability_list = [] + + with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f: + sys.stdout = f + for i, data in data_iter: + # 0. batch_data will be sent into the device(GPU or cpu) + data = {key: value.to(self.device) for key, value in data.items()} + if phase == "train": + logits = self.model.forward(data["input"], data["segment_label"])#, data["feat"]) + else: + with torch.no_grad(): + logits = self.model.forward(data["input"], data["segment_label"])#, data["feat"]) + + loss = self.criterion(logits, data["label"]) + if torch.cuda.device_count() > 1: + loss = loss.mean() + + # 3. backward and optimization only in train + if phase == "train": + self.optim_schedule.zero_grad() + loss.backward() + self.optim_schedule.step_and_update_lr() + + # prediction accuracy + probs = nn.Softmax(dim=-1)(logits) # Probabilities + probabs.extend(probs.detach().cpu().numpy().tolist()) + predicted_labels = torch.argmax(probs, dim=-1) #correct + # self.probability_list.append(probs) + # true_labels = torch.argmax(data["label"], dim=-1) + plabels.extend(predicted_labels.cpu().numpy()) + tlabels.extend(data['label'].cpu().numpy()) + + # Compare predicted labels to true labels and calculate accuracy + correct = (data['label'] == predicted_labels).sum().item() + + avg_loss += loss.item() + total_correct += correct + # total_element += true_labels.nelement() + total_element += data["label"].nelement() + # print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element) + + post_fix = { + "epoch": epoch, + "iter": i, + "avg_loss": avg_loss / (i + 1), + "avg_acc": total_correct / total_element * 100 if total_element != 0 else 0, + "loss": loss.item() + } + if i % self.log_freq == 0: + data_iter.write(str(post_fix)) + + precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0) + recalls = recall_score(tlabels, plabels, average="weighted") + f1_scores = f1_score(tlabels, plabels, average="weighted") + cmatrix = confusion_matrix(tlabels, plabels) + end_time = time.time() + final_msg = { + "epoch": f"EP{epoch}_{phase}", + "avg_loss": avg_loss / len(data_iter), + "total_acc": total_correct * 100.0 / total_element, + "precisions": precisions, + "recalls": recalls, + "f1_scores": f1_scores, + # "confusion_matrix": f"{cmatrix}", + # "true_labels": f"{tlabels}", + # "predicted_labels": f"{plabels}", + "time_taken_from_start": end_time - self.start_time + } + print(final_msg) + f.close() + with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1: + sys.stdout = f1 + final_msg = { + "epoch": f"EP{epoch}_{phase}", + "confusion_matrix": f"{cmatrix}", + "true_labels": f"{tlabels if epoch == 0 else ''}", + "predicted_labels": f"{plabels}", + "probabilities": f"{probabs}", + "time_taken_from_start": end_time - self.start_time + } + print(final_msg) + f1.close() + sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ + + if phase == "test": + self.save_model = False + if self.avg_loss > (avg_loss / len(data_iter)): + self.save_model = True + self.avg_loss = (avg_loss / len(data_iter)) + + def iteration_1(self, epoch_idx, data): + try: + data = {key: value.to(self.device) for key, value in data.items()} + logits = self.model(data['input_ids'], data['segment_label']) + # Ensure logits is a tensor, not a tuple + loss_fct = nn.CrossEntropyLoss() + loss = loss_fct(logits, data['labels']) + + # Backpropagation and optimization + self.optim.zero_grad() + loss.backward() + self.optim.step() + + if self.log_freq > 0 and epoch_idx % self.log_freq == 0: + print(f"Epoch {epoch_idx}: Loss = {loss.item()}") + + return loss + + except Exception as e: + print(f"Error during iteration: {e}") + raise + + + def save(self, epoch, file_path="output/bert_fine_tuned_trained.model"): + """ + Saving the current BERT model on file_path + + :param epoch: current epoch number + :param file_path: model output path which gonna be file_path+"ep%d" % epoch + :return: final_output_path + """ + output_path = file_path + ".ep%d" % epoch + torch.save(self.model.cpu(), output_path) + self.model.to(self.device) + print("EP:%d Model Saved on:" % epoch, output_path) + return output_path + + +class BERTAttention: + def __init__(self, bert: BERT, vocab_obj, train_dataloader: DataLoader, workspace_name=None, code=None, finetune_task=None, with_cuda=True): + + # available_gpus = list(range(torch.cuda.device_count())) + + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(with_cuda, cuda_condition, " Device used = ", self.device) + self.bert = bert.to(self.device) + + # if with_cuda and torch.cuda.device_count() > 1: + # print("Using %d GPUS for BERT" % torch.cuda.device_count()) + # self.bert = nn.DataParallel(self.bert, device_ids=available_gpus) + + self.train_dataloader = train_dataloader + self.workspace_name = workspace_name + self.code = code + self.finetune_task = finetune_task + self.vocab_obj = vocab_obj + + def getAttention(self): + # self.log_file = f"{self.workspace_name}/logs/{self.code}/log_attention.txt" + + + labels = ['PercentChange', 'NumeratorQuantity2', 'NumeratorQuantity1', 'DenominatorQuantity1', + 'OptionalTask_1', 'EquationAnswer', 'NumeratorFactor', 'DenominatorFactor', + 'OptionalTask_2', 'FirstRow1:1', 'FirstRow1:2', 'FirstRow2:1', 'FirstRow2:2', 'SecondRow', + 'ThirdRow', 'FinalAnswer','FinalAnswerDirection'] + df_all = pd.DataFrame(0.0, index=labels, columns=labels) + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(self.train_dataloader), + desc="attention", + total=len(self.train_dataloader), + bar_format="{l_bar}{r_bar}") + count = 0 + for i, data in data_iter: + data = {key: value.to(self.device) for key, value in data.items()} + a = self.bert.forward(data["bert_input"], data["segment_label"]) + non_zero = np.sum(data["segment_label"].cpu().detach().numpy()) + + # Last Transformer Layer + last_layer = self.bert.attention_values[-1].transpose(1,0,2,3) + # print(last_layer.shape) + head, d_model, s, s = last_layer.shape + + for d in range(d_model): + seq_labels = self.vocab_obj.to_sentence(data["bert_input"].cpu().detach().numpy().tolist()[d])[1:non_zero-1] + # df_all = pd.DataFrame(0.0, index=seq_labels, columns=seq_labels) + indices_to_choose = defaultdict(int) + + for k,s in enumerate(seq_labels): + if s in labels: + indices_to_choose[s] = k + indices_chosen = list(indices_to_choose.values()) + selected_seq_labels = [s for l,s in enumerate(seq_labels) if l in indices_chosen] + # print(len(seq_labels), len(selected_seq_labels)) + for h in range(head): + # fig, ax = plt.subplots(figsize=(12, 12)) + # seq_labels = self.vocab_obj.to_sentence(data["bert_input"].cpu().detach().numpy().tolist()[d])#[1:non_zero-1] + # seq_labels = self.vocab_obj.to_sentence(data["bert_input"].cpu().detach().numpy().tolist()[d])[1:non_zero-1] +# indices_to_choose = defaultdict(int) + +# for k,s in enumerate(seq_labels): +# if s in labels: +# indices_to_choose[s] = k +# indices_chosen = list(indices_to_choose.values()) +# selected_seq_labels = [s for l,s in enumerate(seq_labels) if l in indices_chosen] + # print(f"Chosen index: {seq_labels, indices_to_choose, indices_chosen, selected_seq_labels}") + + df_cm = pd.DataFrame(last_layer[h][d][indices_chosen,:][:,indices_chosen], index = selected_seq_labels, columns = selected_seq_labels) + df_all = df_all.add(df_cm, fill_value=0) + count += 1 + + # df_cm = pd.DataFrame(last_layer[h][d][1:non_zero-1,:][:,1:non_zero-1], index=seq_labels, columns=seq_labels) + # df_all = df_all.add(df_cm, fill_value=0) + + # df_all = df_all.reindex(index=seq_labels, columns=seq_labels) + # sns.heatmap(df_all, annot=False) + # plt.title("Attentions") #Probabilities + # plt.xlabel("Steps") + # plt.ylabel("Steps") + # plt.grid(True) + # plt.tick_params(axis='x', bottom=False, top=True, labelbottom=False, labeltop=True, labelrotation=90) + # plt.savefig(f"{self.workspace_name}/plots/{self.code}/{self.finetune_task}_attention_scores_over_[{h}]_head_n_data[{d}].png", bbox_inches='tight') + # plt.show() + # plt.close() + + + + print(f"Count of total : {count, head * self.train_dataloader.dataset.len}") + df_all = df_all.div(count) # head * self.train_dataloader.dataset.len + df_all = df_all.reindex(index=labels, columns=labels) + sns.heatmap(df_all, annot=False) + plt.title("Attentions") #Probabilities + plt.xlabel("Steps") + plt.ylabel("Steps") + plt.grid(True) + plt.tick_params(axis='x', bottom=False, top=True, labelbottom=False, labeltop=True, labelrotation=90) + plt.savefig(f"{self.workspace_name}/plots/{self.code}/{self.finetune_task}_attention_scores.png", bbox_inches='tight') + plt.show() + plt.close() + + + + diff --git a/src/reference_code/bert_reference_code.py b/src/reference_code/bert_reference_code.py new file mode 100644 index 0000000000000000000000000000000000000000..109437a3d828ec0b1d3d9c0fa6c0b459a5505b40 --- /dev/null +++ b/src/reference_code/bert_reference_code.py @@ -0,0 +1,1622 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model. """ + + +import logging +import math +import os +import warnings + +import torch +import torch.utils.checkpoint +from torch import nn +from torch.nn import CrossEntropyLoss, MSELoss + +from .activations import gelu, gelu_new, swish +from .configuration_bert import BertConfig +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer + + +logger = logging.getLogger(__name__) + +_TOKENIZER_FOR_DOC = "BertTokenizer" + +BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "bert-base-uncased", + "bert-large-uncased", + "bert-base-cased", + "bert-large-cased", + "bert-base-multilingual-uncased", + "bert-base-multilingual-cased", + "bert-base-chinese", + "bert-base-german-cased", + "bert-large-uncased-whole-word-masking", + "bert-large-cased-whole-word-masking", + "bert-large-uncased-whole-word-masking-finetuned-squad", + "bert-large-cased-whole-word-masking-finetuned-squad", + "bert-base-cased-finetuned-mrpc", + "bert-base-german-dbmdz-cased", + "bert-base-german-dbmdz-uncased", + "cl-tohoku/bert-base-japanese", + "cl-tohoku/bert-base-japanese-whole-word-masking", + "cl-tohoku/bert-base-japanese-char", + "cl-tohoku/bert-base-japanese-char-whole-word-masking", + "TurkuNLP/bert-base-finnish-cased-v1", + "TurkuNLP/bert-base-finnish-uncased-v1", + "wietsedv/bert-base-dutch-cased", + # See all BERT models at https://huggingface.co/models?filter=bert +] + + +def load_tf_weights_in_bert(model, config, tf_checkpoint_path): + """ Load tf checkpoints in a pytorch model. + """ + try: + import re + import numpy as np + import tensorflow as tf + except ImportError: + logger.error( + "Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see " + "https://www.tensorflow.org/install/ for installation instructions." + ) + raise + tf_path = os.path.abspath(tf_checkpoint_path) + logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + # Load weights from TF model + init_vars = tf.train.list_variables(tf_path) + names = [] + arrays = [] + for name, shape in init_vars: + logger.info("Loading TF weight {} with shape {}".format(name, shape)) + array = tf.train.load_variable(tf_path, name) + names.append(name) + arrays.append(array) + + for name, array in zip(names, arrays): + name = name.split("/") + # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v + # which are not required for using pretrained model + if any( + n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] + for n in name + ): + logger.info("Skipping {}".format("/".join(name))) + continue + pointer = model + for m_name in name: + if re.fullmatch(r"[A-Za-z]+_\d+", m_name): + scope_names = re.split(r"_(\d+)", m_name) + else: + scope_names = [m_name] + if scope_names[0] == "kernel" or scope_names[0] == "gamma": + pointer = getattr(pointer, "weight") + elif scope_names[0] == "output_bias" or scope_names[0] == "beta": + pointer = getattr(pointer, "bias") + elif scope_names[0] == "output_weights": + pointer = getattr(pointer, "weight") + elif scope_names[0] == "squad": + pointer = getattr(pointer, "classifier") + else: + try: + pointer = getattr(pointer, scope_names[0]) + except AttributeError: + logger.info("Skipping {}".format("/".join(name))) + continue + if len(scope_names) >= 2: + num = int(scope_names[1]) + pointer = pointer[num] + if m_name[-11:] == "_embeddings": + pointer = getattr(pointer, "weight") + elif m_name == "kernel": + array = np.transpose(array) + try: + assert pointer.shape == array.shape + except AssertionError as e: + e.args += (pointer.shape, array.shape) + raise + logger.info("Initialize PyTorch weight {}".format(name)) + pointer.data = torch.from_numpy(array) + return model + + +def mish(x): + return x * torch.tanh(nn.functional.softplus(x)) + + +ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish, "gelu_new": gelu_new, "mish": mish} + + +BertLayerNorm = torch.nn.LayerNorm + + +class BertEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + + def __init__(self, config): + super().__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None): + if input_ids is not None: + input_shape = input_ids.size() + else: + input_shape = inputs_embeds.size()[:-1] + + seq_length = input_shape[1] + device = input_ids.device if input_ids is not None else inputs_embeds.device + if position_ids is None: + position_ids = torch.arange(seq_length, dtype=torch.long, device=device) + position_ids = position_ids.unsqueeze(0).expand(input_shape) + if token_type_ids is None: + token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device) + + if inputs_embeds is None: + inputs_embeds = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = inputs_embeds + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config): + super().__init__() + if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads) + ) + + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=False, + ): + mixed_query_layer = self.query(hidden_states) + + # If this is instantiated as a cross-attention module, the keys + # and values come from an encoder; the attention mask needs to be + # such that the encoder's padding tokens are not attended to. + if encoder_hidden_states is not None: + mixed_key_layer = self.key(encoder_hidden_states) + mixed_value_layer = self.value(encoder_hidden_states) + attention_mask = encoder_attention_mask + else: + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + if attention_mask is not None: + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + # Mask heads if we want to + if head_mask is not None: + attention_probs = attention_probs * head_mask + + context_layer = torch.matmul(attention_probs, value_layer) + + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + + outputs = (context_layer, attention_probs) if output_attentions else (context_layer,) + return outputs + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertAttention(nn.Module): + def __init__(self, config): + super().__init__() + self.self = BertSelfAttention(config) + self.output = BertSelfOutput(config) + self.pruned_heads = set() + + def prune_heads(self, heads): + if len(heads) == 0: + return + heads, index = find_pruneable_heads_and_indices( + heads, self.self.num_attention_heads, self.self.attention_head_size, self.pruned_heads + ) + + # Prune linear layers + self.self.query = prune_linear_layer(self.self.query, index) + self.self.key = prune_linear_layer(self.self.key, index) + self.self.value = prune_linear_layer(self.self.value, index) + self.output.dense = prune_linear_layer(self.output.dense, index, dim=1) + + # Update hyper params and store pruned heads + self.self.num_attention_heads = self.self.num_attention_heads - len(heads) + self.self.all_head_size = self.self.attention_head_size * self.self.num_attention_heads + self.pruned_heads = self.pruned_heads.union(heads) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=False, + ): + self_outputs = self.self( + hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions, + ) + + attention_output = self.output(self_outputs[0], hidden_states) + outputs = (attention_output,) + self_outputs[1:] # add attentions if we output them + return outputs + + +class BertIntermediate(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + if isinstance(config.hidden_act, str): + self.intermediate_act_fn = ACT2FN[config.hidden_act] + else: + self.intermediate_act_fn = config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config): + super().__init__() + self.attention = BertAttention(config) + self.is_decoder = config.is_decoder + if self.is_decoder: + self.crossattention = BertAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=False, + ): + self_attention_outputs = self.attention( + hidden_states, attention_mask, head_mask, output_attentions=output_attentions, + ) + attention_output = self_attention_outputs[0] + outputs = self_attention_outputs[1:] # add self attentions if we output attention weights + + if self.is_decoder and encoder_hidden_states is not None: + cross_attention_outputs = self.crossattention( + attention_output, + attention_mask, + head_mask, + encoder_hidden_states, + encoder_attention_mask, + output_attentions, + ) + attention_output = cross_attention_outputs[0] + outputs = outputs + cross_attention_outputs[1:] # add cross attentions if we output attention weights + + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + outputs = (layer_output,) + outputs + return outputs + + +class BertEncoder(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.layer = nn.ModuleList([BertLayer(config) for _ in range(config.num_hidden_layers)]) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=False, + output_hidden_states=False, + ): + all_hidden_states = () + all_attentions = () + for i, layer_module in enumerate(self.layer): + if output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states,) + + if getattr(self.config, "gradient_checkpointing", False): + + def create_custom_forward(module): + def custom_forward(*inputs): + return module(*inputs, output_attentions) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(layer_module), + hidden_states, + attention_mask, + head_mask[i], + encoder_hidden_states, + encoder_attention_mask, + ) + else: + layer_outputs = layer_module( + hidden_states, + attention_mask, + head_mask[i], + encoder_hidden_states, + encoder_attention_mask, + output_attentions, + ) + hidden_states = layer_outputs[0] + + if output_attentions: + all_attentions = all_attentions + (layer_outputs[1],) + + # Add last layer + if output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states,) + + outputs = (hidden_states,) + if output_hidden_states: + outputs = outputs + (all_hidden_states,) + if output_attentions: + outputs = outputs + (all_attentions,) + return outputs # last-layer hidden state, (all hidden states), (all attentions) + + +class BertPooler(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + if isinstance(config.hidden_act, str): + self.transform_act_fn = ACT2FN[config.hidden_act] + else: + self.transform_act_fn = config.hidden_act + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config): + super().__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + self.bias = nn.Parameter(torch.zeros(config.vocab_size)) + + # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings` + self.decoder.bias = self.bias + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config): + super().__init__() + self.predictions = BertLMPredictionHead(config) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertOnlyNSPHead(nn.Module): + def __init__(self, config): + super().__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class BertPreTrainingHeads(nn.Module): + def __init__(self, config): + super().__init__() + self.predictions = BertLMPredictionHead(config) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + + +class BertPreTrainedModel(PreTrainedModel): + """ An abstract class to handle weights initialization and + a simple interface for downloading and loading pretrained models. + """ + + config_class = BertConfig + load_tf_weights = load_tf_weights_in_bert + base_model_prefix = "bert" + + def _init_weights(self, module): + """ Initialize the weights """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + elif isinstance(module, BertLayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + +BERT_START_DOCSTRING = r""" + This model is a PyTorch `torch.nn.Module `_ sub-class. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general + usage and behavior. + + Parameters: + config (:class:`~transformers.BertConfig`): Model configuration class with all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the configuration. + Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights. +""" + +BERT_INPUTS_DOCSTRING = r""" + Args: + input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + Indices of input sequence tokens in the vocabulary. + + Indices can be obtained using :class:`transformers.BertTokenizer`. + See :func:`transformers.PreTrainedTokenizer.encode` and + :func:`transformers.PreTrainedTokenizer.__call__` for details. + + `What are input IDs? <../glossary.html#input-ids>`__ + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`): + Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: + ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. + + `What are attention masks? <../glossary.html#attention-mask>`__ + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`): + Segment token indices to indicate first and second portions of the inputs. + Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1`` + corresponds to a `sentence B` token + + `What are token type IDs? <../glossary.html#token-type-ids>`_ + position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`): + Indices of positions of each input sequence tokens in the position embeddings. + Selected in the range ``[0, config.max_position_embeddings - 1]``. + + `What are position IDs? <../glossary.html#position-ids>`_ + head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`, defaults to :obj:`None`): + Mask to nullify selected heads of the self-attention modules. + Mask values selected in ``[0, 1]``: + :obj:`1` indicates the head is **not masked**, :obj:`0` indicates the head is **masked**. + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`, defaults to :obj:`None`): + Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. + This is useful if you want more control over how to convert `input_ids` indices into associated vectors + than the model's internal embedding lookup matrix. + encoder_hidden_states (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`, defaults to :obj:`None`): + Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention + if the model is configured as a decoder. + encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`): + Mask to avoid performing attention on the padding token indices of the encoder input. This mask + is used in the cross-attention if the model is configured as a decoder. + Mask values selected in ``[0, 1]``: + ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): + If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. +""" + + + +[DOCS] +@add_start_docstrings( + "The bare Bert Model transformer outputting raw hidden-states without any specific head on top.", + BERT_START_DOCSTRING, +) +class BertModel(BertPreTrainedModel): + """ + + The model can behave as an encoder (with only self-attention) as well + as a decoder, in which case a layer of cross-attention is added between + the self-attention layers, following the architecture described in `Attention is all you need`_ by Ashish Vaswani, + Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin. + + To behave as an decoder the model needs to be initialized with the + :obj:`is_decoder` argument of the configuration set to :obj:`True`; an + :obj:`encoder_hidden_states` is expected as an input to the forward pass. + + .. _`Attention is all you need`: + https://arxiv.org/abs/1706.03762 + + """ + + def __init__(self, config): + super().__init__(config) + self.config = config + + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + + self.init_weights() + + +[DOCS] + def get_input_embeddings(self): + return self.embeddings.word_embeddings + + + +[DOCS] + def set_input_embeddings(self, value): + self.embeddings.word_embeddings = value + + + def _prune_heads(self, heads_to_prune): + """ Prunes heads of the model. + heads_to_prune: dict of {layer_num: list of heads to prune in this layer} + See base class PreTrainedModel + """ + for layer, heads in heads_to_prune.items(): + self.encoder.layer[layer].attention.prune_heads(heads) + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased") + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=None, + output_hidden_states=None, + ): + r""" + Return: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): + Sequence of hidden-states at the output of the last layer of the model. + pooler_output (:obj:`torch.FloatTensor`: of shape :obj:`(batch_size, hidden_size)`): + Last layer hidden-state of the first token of the sequence (classification token) + further processed by a Linear layer and a Tanh activation function. The Linear + layer weights are trained from the next sentence prediction (classification) + objective during pre-training. + + This output is usually *not* a good summary + of the semantic content of the input, you're often better with averaging or pooling + the sequence of hidden-states for the whole input sequence. + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") + elif input_ids is not None: + input_shape = input_ids.size() + elif inputs_embeds is not None: + input_shape = inputs_embeds.size()[:-1] + else: + raise ValueError("You have to specify either input_ids or inputs_embeds") + + device = input_ids.device if input_ids is not None else inputs_embeds.device + + if attention_mask is None: + attention_mask = torch.ones(input_shape, device=device) + if token_type_ids is None: + token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device) + + # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] + # ourselves in which case we just need to make it broadcastable to all heads. + extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape, device) + + # If a 2D ou 3D attention mask is provided for the cross-attention + # we need to make broadcastabe to [batch_size, num_heads, seq_length, seq_length] + if self.config.is_decoder and encoder_hidden_states is not None: + encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size() + encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length) + if encoder_attention_mask is None: + encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device) + encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask) + else: + encoder_extended_attention_mask = None + + # Prepare head mask if needed + # 1.0 in head_mask indicate we keep the head + # attention_probs has shape bsz x n_heads x N x N + # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] + # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length] + head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers) + + embedding_output = self.embeddings( + input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds + ) + encoder_outputs = self.encoder( + embedding_output, + attention_mask=extended_attention_mask, + head_mask=head_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_extended_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + sequence_output = encoder_outputs[0] + pooled_output = self.pooler(sequence_output) + + outputs = (sequence_output, pooled_output,) + encoder_outputs[ + 1: + ] # add hidden_states and attentions if they are here + return outputs # sequence_output, pooled_output, (hidden_states), (attentions) + + + + +[DOCS] +@add_start_docstrings( + """Bert Model with two heads on top as done during the pre-training: a `masked language modeling` head and + a `next sentence prediction (classification)` head. """, + BERT_START_DOCSTRING, +) +class BertForPreTraining(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.bert = BertModel(config) + self.cls = BertPreTrainingHeads(config) + + self.init_weights() + + +[DOCS] + def get_output_embeddings(self): + return self.cls.predictions.decoder + + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + labels=None, + next_sentence_label=None, + output_attentions=None, + output_hidden_states=None, + **kwargs + ): + r""" + labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`): + Labels for computing the masked language modeling loss. + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels + in ``[0, ..., config.vocab_size]`` + next_sentence_label (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`): + Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair (see :obj:`input_ids` docstring) + Indices should be in ``[0, 1]``. + ``0`` indicates sequence B is a continuation of sequence A, + ``1`` indicates sequence B is a random sequence. + kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): + Used to hide legacy arguments that have been deprecated. + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: + Total loss as the sum of the masked language modeling loss and the next sequence prediction (classification) loss. + prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) + Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). + seq_relationship_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`): + Prediction scores of the next sequence prediction (classification) head (scores of True/False + continuation before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + + + Examples:: + + >>> from transformers import BertTokenizer, BertForPreTraining + >>> import torch + + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForPreTraining.from_pretrained('bert-base-uncased') + + >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") + >>> outputs = model(**inputs) + + >>> prediction_scores, seq_relationship_scores = outputs[:2] + + """ + if "masked_lm_labels" in kwargs: + warnings.warn( + "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", + DeprecationWarning, + ) + labels = kwargs.pop("masked_lm_labels") + assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + sequence_output, pooled_output = outputs[:2] + prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output) + + outputs = (prediction_scores, seq_relationship_score,) + outputs[ + 2: + ] # add hidden states and attention if they are here + + if labels is not None and next_sentence_label is not None: + loss_fct = CrossEntropyLoss() + masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1)) + next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) + total_loss = masked_lm_loss + next_sentence_loss + outputs = (total_loss,) + outputs + + return outputs # (loss), prediction_scores, seq_relationship_score, (hidden_states), (attentions) + + + +@add_start_docstrings( + """Bert Model with a `language modeling` head on top for CLM fine-tuning. """, BERT_START_DOCSTRING +) +class BertLMHeadModel(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`." + + self.bert = BertModel(config) + self.cls = BertOnlyMLMHead(config) + + self.init_weights() + + def get_output_embeddings(self): + return self.cls.predictions.decoder + + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + labels=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=None, + output_hidden_states=None, + **kwargs + ): + r""" + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`): + Labels for computing the left-to-right language modeling loss (next word prediction). + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels + in ``[0, ..., config.vocab_size]`` + kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): + Used to hide legacy arguments that have been deprecated. + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + ltr_lm_loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + Next token prediction loss. + prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) + Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + + Example:: + + >>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig + >>> import torch + + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased') + >>> config = BertConfig.from_pretrained("bert-base-cased") + >>> config.is_decoder = True + >>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config) + + >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") + >>> outputs = model(**inputs) + + >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + """ + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + sequence_output = outputs[0] + prediction_scores = self.cls(sequence_output) + + outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention if they are here + + if labels is not None: + # we are doing next-token prediction; shift prediction scores and input ids by one + prediction_scores = prediction_scores[:, :-1, :].contiguous() + labels = labels[:, 1:].contiguous() + loss_fct = CrossEntropyLoss() + ltr_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1)) + outputs = (ltr_lm_loss,) + outputs + + return outputs # (ltr_lm_loss), prediction_scores, (hidden_states), (attentions) + + def prepare_inputs_for_generation(self, input_ids, attention_mask=None, **model_kwargs): + input_shape = input_ids.shape + + # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly + if attention_mask is None: + attention_mask = input_ids.new_ones(input_shape) + + return {"input_ids": input_ids, "attention_mask": attention_mask} + + + +[DOCS] +@add_start_docstrings("""Bert Model with a `language modeling` head on top. """, BERT_START_DOCSTRING) +class BertForMaskedLM(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + assert ( + not config.is_decoder + ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention." + + self.bert = BertModel(config) + self.cls = BertOnlyMLMHead(config) + + self.init_weights() + + +[DOCS] + def get_output_embeddings(self): + return self.cls.predictions.decoder + + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased") + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + labels=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=None, + output_hidden_states=None, + **kwargs + ): + r""" + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`): + Labels for computing the masked language modeling loss. + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels + in ``[0, ..., config.vocab_size]`` + kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): + Used to hide legacy arguments that have been deprecated. + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + masked_lm_loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: + Masked language modeling loss. + prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) + Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + if "masked_lm_labels" in kwargs: + warnings.warn( + "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", + DeprecationWarning, + ) + labels = kwargs.pop("masked_lm_labels") + assert "lm_labels" not in kwargs, "Use `BertWithLMHead` for autoregressive language modeling task." + assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + sequence_output = outputs[0] + prediction_scores = self.cls(sequence_output) + + outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention if they are here + + if labels is not None: + loss_fct = CrossEntropyLoss() # -100 index = padding token + masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1)) + outputs = (masked_lm_loss,) + outputs + + return outputs # (masked_lm_loss), prediction_scores, (hidden_states), (attentions) + + + def prepare_inputs_for_generation(self, input_ids, attention_mask=None, **model_kwargs): + input_shape = input_ids.shape + effective_batch_size = input_shape[0] + + # add a dummy token + assert self.config.pad_token_id is not None, "The PAD token should be defined for generation" + attention_mask = torch.cat([attention_mask, attention_mask.new_zeros((attention_mask.shape[0], 1))], dim=-1) + dummy_token = torch.full( + (effective_batch_size, 1), self.config.pad_token_id, dtype=torch.long, device=input_ids.device + ) + input_ids = torch.cat([input_ids, dummy_token], dim=1) + + return {"input_ids": input_ids, "attention_mask": attention_mask} + + + + +[DOCS] +@add_start_docstrings( + """Bert Model with a `next sentence prediction (classification)` head on top. """, BERT_START_DOCSTRING, +) +class BertForNextSentencePrediction(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.bert = BertModel(config) + self.cls = BertOnlyNSPHead(config) + + self.init_weights() + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + next_sentence_label=None, + output_attentions=None, + output_hidden_states=None, + ): + r""" + next_sentence_label (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): + Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair (see ``input_ids`` docstring) + Indices should be in ``[0, 1]``. + ``0`` indicates sequence B is a continuation of sequence A, + ``1`` indicates sequence B is a random sequence. + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`next_sentence_label` is provided): + Next sequence prediction (classification) loss. + seq_relationship_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`): + Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + + Examples:: + + >>> from transformers import BertTokenizer, BertForNextSentencePrediction + >>> import torch + + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased') + + >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." + >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light." + >>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt') + + >>> loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1])) + >>> assert logits[0, 0] < logits[0, 1] # next sentence was random + """ + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + pooled_output = outputs[1] + + seq_relationship_score = self.cls(pooled_output) + + outputs = (seq_relationship_score,) + outputs[2:] # add hidden states and attention if they are here + if next_sentence_label is not None: + loss_fct = CrossEntropyLoss() + next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) + outputs = (next_sentence_loss,) + outputs + + return outputs # (next_sentence_loss), seq_relationship_score, (hidden_states), (attentions) + + + + +[DOCS] +@add_start_docstrings( + """Bert Model transformer with a sequence classification/regression head on top (a linear layer on top of + the pooled output) e.g. for GLUE tasks. """, + BERT_START_DOCSTRING, +) +class BertForSequenceClassification(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + + self.bert = BertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, config.num_labels) + + self.init_weights() + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased") + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + labels=None, + output_attentions=None, + output_hidden_states=None, + ): + r""" + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): + Labels for computing the sequence classification/regression loss. + Indices should be in :obj:`[0, ..., config.num_labels - 1]`. + If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss), + If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy). + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided): + Classification (or regression if config.num_labels==1) loss. + logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`): + Classification (or regression if config.num_labels==1) scores (before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + pooled_output = outputs[1] + + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + + outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here + + if labels is not None: + if self.num_labels == 1: + # We are doing regression + loss_fct = MSELoss() + loss = loss_fct(logits.view(-1), labels.view(-1)) + else: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + outputs = (loss,) + outputs + + return outputs # (loss), logits, (hidden_states), (attentions) + + + + +[DOCS] +@add_start_docstrings( + """Bert Model with a multiple choice classification head on top (a linear layer on top of + the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """, + BERT_START_DOCSTRING, +) +class BertForMultipleChoice(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.bert = BertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, 1) + + self.init_weights() + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)")) + @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased") + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + labels=None, + output_attentions=None, + output_hidden_states=None, + ): + r""" + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): + Labels for computing the multiple choice classification loss. + Indices should be in ``[0, ..., num_choices-1]`` where `num_choices` is the size of the second dimension + of the input tensors. (see `input_ids` above) + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when :obj:`labels` is provided): + Classification loss. + classification_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_choices)`): + `num_choices` is the second dimension of the input tensors. (see `input_ids` above). + + Classification scores (before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1] + + input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None + attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None + token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None + position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None + inputs_embeds = ( + inputs_embeds.view(-1, inputs_embeds.size(-2), inputs_embeds.size(-1)) + if inputs_embeds is not None + else None + ) + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + pooled_output = outputs[1] + + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + reshaped_logits = logits.view(-1, num_choices) + + outputs = (reshaped_logits,) + outputs[2:] # add hidden states and attention if they are here + + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(reshaped_logits, labels) + outputs = (loss,) + outputs + + return outputs # (loss), reshaped_logits, (hidden_states), (attentions) + + + + +[DOCS] +@add_start_docstrings( + """Bert Model with a token classification head on top (a linear layer on top of + the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """, + BERT_START_DOCSTRING, +) +class BertForTokenClassification(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + + self.bert = BertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, config.num_labels) + + self.init_weights() + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased") + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + labels=None, + output_attentions=None, + output_hidden_states=None, + ): + r""" + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`): + Labels for computing the token classification loss. + Indices should be in ``[0, ..., config.num_labels - 1]``. + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) : + Classification loss. + scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`) + Classification scores (before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + sequence_output = outputs[0] + + sequence_output = self.dropout(sequence_output) + logits = self.classifier(sequence_output) + + outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here + if labels is not None: + loss_fct = CrossEntropyLoss() + # Only keep active parts of the loss + if attention_mask is not None: + active_loss = attention_mask.view(-1) == 1 + active_logits = logits.view(-1, self.num_labels) + active_labels = torch.where( + active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels) + ) + loss = loss_fct(active_logits, active_labels) + else: + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + outputs = (loss,) + outputs + + return outputs # (loss), scores, (hidden_states), (attentions) + + + + +[DOCS] +@add_start_docstrings( + """Bert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear + layers on top of the hidden-states output to compute `span start logits` and `span end logits`). """, + BERT_START_DOCSTRING, +) +class BertForQuestionAnswering(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + + self.bert = BertModel(config) + self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) + + self.init_weights() + + +[DOCS] + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased") + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + start_positions=None, + end_positions=None, + output_attentions=None, + output_hidden_states=None, + ): + r""" + start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): + Labels for position (index) of the start of the labelled span for computing the token classification loss. + Positions are clamped to the length of the sequence (`sequence_length`). + Position outside of the sequence are not taken into account for computing the loss. + end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): + Labels for position (index) of the end of the labelled span for computing the token classification loss. + Positions are clamped to the length of the sequence (`sequence_length`). + Position outside of the sequence are not taken into account for computing the loss. + + Returns: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. + start_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`): + Span-start scores (before SoftMax). + end_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`): + Span-end scores (before SoftMax). + hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) + of shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape + :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + sequence_output = outputs[0] + + logits = self.qa_outputs(sequence_output) + start_logits, end_logits = logits.split(1, dim=-1) + start_logits = start_logits.squeeze(-1) + end_logits = end_logits.squeeze(-1) + + outputs = (start_logits, end_logits,) + outputs[2:] + if start_positions is not None and end_positions is not None: + # If we are on multi-GPU, split add a dimension + if len(start_positions.size()) > 1: + start_positions = start_positions.squeeze(-1) + if len(end_positions.size()) > 1: + end_positions = end_positions.squeeze(-1) + # sometimes the start/end positions are outside our model inputs, we ignore these terms + ignored_index = start_logits.size(1) + start_positions.clamp_(0, ignored_index) + end_positions.clamp_(0, ignored_index) + + loss_fct = CrossEntropyLoss(ignore_index=ignored_index) + start_loss = loss_fct(start_logits, start_positions) + end_loss = loss_fct(end_logits, end_positions) + total_loss = (start_loss + end_loss) / 2 + outputs = (total_loss,) + outputs + + return outputs # (loss), start_logits, end_logits, (hidden_states), (attentions) diff --git a/src/reference_code/evaluate_embeddings.py b/src/reference_code/evaluate_embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..2b5a97e7b004ca0f3bcdefd71efd5838d9d7821a --- /dev/null +++ b/src/reference_code/evaluate_embeddings.py @@ -0,0 +1,136 @@ +from torch.utils.data import DataLoader +import torch.nn as nn +import torch +import numpy + +import pickle +import tqdm + +from ..bert import BERT +from ..vocab import Vocab +from ..dataset import TokenizerDataset +import argparse +from itertools import combinations + +def generate_subset(s): + subsets = [] + for r in range(len(s) + 1): + combinations_result = combinations(s, r) + if r==1: + subsets.extend(([item] for sublist in combinations_result for item in sublist)) + else: + subsets.extend((list(sublist) for sublist in combinations_result)) + subsets_dict = {i:s for i, s in enumerate(subsets)} + return subsets_dict + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-workspace_name', type=str, default=None) + parser.add_argument("-seq_len", type=int, default=100, help="maximum sequence length") + parser.add_argument('-pretrain', type=bool, default=False) + parser.add_argument('-masked_pred', type=bool, default=False) + parser.add_argument('-epoch', type=str, default=None) + # parser.add_argument('-set_label', type=bool, default=False) + # parser.add_argument('--label_standard', nargs='+', type=str, help='List of optional tasks') + + options = parser.parse_args() + + folder_path = options.workspace_name+"/" if options.workspace_name else "" + + # if options.set_label: + # label_standard = generate_subset({'optional-tasks-1', 'optional-tasks-2'}) + # pickle.dump(label_standard, open(f"{folder_path}pretraining/pretrain_opt_label.pkl", "wb")) + # else: + # label_standard = pickle.load(open(f"{folder_path}pretraining/pretrain_opt_label.pkl", "rb")) + # print(f"options.label _standard: {options.label_standard}") + vocab_path = f"{folder_path}check/pretraining/vocab.txt" + # vocab_path = f"{folder_path}pretraining/vocab.txt" + + + print("Loading Vocab", vocab_path) + vocab_obj = Vocab(vocab_path) + vocab_obj.load_vocab() + print("Vocab Size: ", len(vocab_obj.vocab)) + + # label_standard = list(pickle.load(open(f"dataset/CL4999_1920/{options.workspace_name}/unique_problems_list.pkl", "rb"))) + # label_standard = generate_subset({'optional-tasks-1', 'optional-tasks-2', 'OptionalTask_1', 'OptionalTask_2'}) + # pickle.dump(label_standard, open(f"{folder_path}pretraining/pretrain_opt_label.pkl", "wb")) + + if options.masked_pred: + str_code = "masked_prediction" + output_name = f"{folder_path}output/bert_trained.seq_model.ep{options.epoch}" + else: + str_code = "masked" + output_name = f"{folder_path}output/bert_trained.seq_encoder.model.ep{options.epoch}" + + folder_path = folder_path+"check/" + # folder_path = folder_path + if options.pretrain: + pretrain_file = f"{folder_path}pretraining/pretrain.txt" + pretrain_label = f"{folder_path}pretraining/pretrain_opt.pkl" + + # pretrain_file = f"{folder_path}finetuning/train.txt" + # pretrain_label = f"{folder_path}finetuning/train_label.txt" + + embedding_file_path = f"{folder_path}embeddings/pretrain_embeddings_{str_code}_{options.epoch}.pkl" + print("Loading Pretrain Dataset ", pretrain_file) + pretrain_dataset = TokenizerDataset(pretrain_file, pretrain_label, vocab_obj, seq_len=options.seq_len) + + print("Creating Dataloader") + pretrain_data_loader = DataLoader(pretrain_dataset, batch_size=32, num_workers=4) + else: + val_file = f"{folder_path}pretraining/test.txt" + val_label = f"{folder_path}pretraining/test_opt.txt" + +# val_file = f"{folder_path}finetuning/test.txt" +# val_label = f"{folder_path}finetuning/test_label.txt" + embedding_file_path = f"{folder_path}embeddings/test_embeddings_{str_code}_{options.epoch}.pkl" + + print("Loading Validation Dataset ", val_file) + val_dataset = TokenizerDataset(val_file, val_label, vocab_obj, seq_len=options.seq_len) + + print("Creating Dataloader") + val_data_loader = DataLoader(val_dataset, batch_size=32, num_workers=4) + + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + print(device) + print("Load Pre-trained BERT model...") + print(output_name) + bert = torch.load(output_name, map_location=device) +# learned_parameters = model_ep0.state_dict() + for param in bert.parameters(): + param.requires_grad = False + + if options.pretrain: + print("Pretrain-embeddings....") + data_iter = tqdm.tqdm(enumerate(pretrain_data_loader), + desc="pre-train", + total=len(pretrain_data_loader), + bar_format="{l_bar}{r_bar}") + pretrain_embeddings = [] + for i, data in data_iter: + data = {key: value.to(device) for key, value in data.items()} + hrep = bert(data["bert_input"], data["segment_label"]) + # print(hrep[:,0].cpu().detach().numpy()) + embeddings = [h for h in hrep[:,0].cpu().detach().numpy()] + pretrain_embeddings.extend(embeddings) + pickle.dump(pretrain_embeddings, open(embedding_file_path,"wb")) + # pickle.dump(pretrain_embeddings, open("embeddings/finetune_cfa_train_embeddings.pkl","wb")) + + else: + print("Validation-embeddings....") + data_iter = tqdm.tqdm(enumerate(val_data_loader), + desc="validation", + total=len(val_data_loader), + bar_format="{l_bar}{r_bar}") + val_embeddings = [] + for i, data in data_iter: + data = {key: value.to(device) for key, value in data.items()} + hrep = bert(data["bert_input"], data["segment_label"]) + # print(,hrep[:,0].shape) + embeddings = [h for h in hrep[:,0].cpu().detach().numpy()] + val_embeddings.extend(embeddings) + pickle.dump(val_embeddings, open(embedding_file_path,"wb")) + # pickle.dump(val_embeddings, open("embeddings/finetune_cfa_test_embeddings.pkl","wb")) + diff --git a/src/reference_code/metrics.py b/src/reference_code/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..96642d1026a4869375b585c24b56e34690b5f7b4 --- /dev/null +++ b/src/reference_code/metrics.py @@ -0,0 +1,149 @@ +import numpy as np +from scipy.special import softmax + + +class CELoss(object): + + def compute_bin_boundaries(self, probabilities = np.array([])): + + #uniform bin spacing + if probabilities.size == 0: + bin_boundaries = np.linspace(0, 1, self.n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + else: + #size of bins + bin_n = int(self.n_data/self.n_bins) + + bin_boundaries = np.array([]) + + probabilities_sort = np.sort(probabilities) + + for i in range(0,self.n_bins): + bin_boundaries = np.append(bin_boundaries,probabilities_sort[i*bin_n]) + bin_boundaries = np.append(bin_boundaries,1.0) + + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + + def get_probabilities(self, output, labels, logits): + #If not probabilities apply softmax! + if logits: + self.probabilities = softmax(output, axis=1) + else: + self.probabilities = output + + self.labels = np.argmax(labels, axis=1) + self.confidences = np.max(self.probabilities, axis=1) + self.predictions = np.argmax(self.probabilities, axis=1) + self.accuracies = np.equal(self.predictions, self.labels) + + def binary_matrices(self): + idx = np.arange(self.n_data) + #make matrices of zeros + pred_matrix = np.zeros([self.n_data,self.n_class]) + label_matrix = np.zeros([self.n_data,self.n_class]) + #self.acc_matrix = np.zeros([self.n_data,self.n_class]) + pred_matrix[idx,self.predictions] = 1 + label_matrix[idx,self.labels] = 1 + + self.acc_matrix = np.equal(pred_matrix, label_matrix) + + + def compute_bins(self, index = None): + self.bin_prop = np.zeros(self.n_bins) + self.bin_acc = np.zeros(self.n_bins) + self.bin_conf = np.zeros(self.n_bins) + self.bin_score = np.zeros(self.n_bins) + + if index == None: + confidences = self.confidences + accuracies = self.accuracies + else: + confidences = self.probabilities[:,index] + accuracies = self.acc_matrix[:,index] + + + for i, (bin_lower, bin_upper) in enumerate(zip(self.bin_lowers, self.bin_uppers)): + # Calculated |confidence - accuracy| in each bin + in_bin = np.greater(confidences,bin_lower.item()) * np.less_equal(confidences,bin_upper.item()) + self.bin_prop[i] = np.mean(in_bin) + + if self.bin_prop[i].item() > 0: + self.bin_acc[i] = np.mean(accuracies[in_bin]) + self.bin_conf[i] = np.mean(confidences[in_bin]) + self.bin_score[i] = np.abs(self.bin_conf[i] - self.bin_acc[i]) + +class MaxProbCELoss(CELoss): + def loss(self, output, labels, n_bins = 15, logits = True): + self.n_bins = n_bins + super().compute_bin_boundaries() + super().get_probabilities(output, labels, logits) + super().compute_bins() + +#http://people.cs.pitt.edu/~milos/research/AAAI_Calibration.pdf +class ECELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.dot(self.bin_prop,self.bin_score) + +class MCELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.max(self.bin_score) + +#https://arxiv.org/abs/1905.11001 +#Overconfidence Loss (Good in high risk applications where confident but wrong predictions can be especially harmful) +class OELoss(MaxProbCELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + super().loss(output, labels, n_bins, logits) + return np.dot(self.bin_prop,self.bin_conf * np.maximum(self.bin_conf-self.bin_acc,np.zeros(self.n_bins))) + + +#https://arxiv.org/abs/1904.01685 +class SCELoss(CELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + sce = 0.0 + self.n_bins = n_bins + self.n_data = len(output) + self.n_class = len(output[0]) + + super().compute_bin_boundaries() + super().get_probabilities(output, labels, logits) + super().binary_matrices() + + for i in range(self.n_class): + super().compute_bins(i) + sce += np.dot(self.bin_prop,self.bin_score) + + return sce/self.n_class + +class TACELoss(CELoss): + + def loss(self, output, labels, threshold = 0.01, n_bins = 15, logits = True): + tace = 0.0 + self.n_bins = n_bins + self.n_data = len(output) + self.n_class = len(output[0]) + + super().get_probabilities(output, labels, logits) + self.probabilities[self.probabilities < threshold] = 0 + super().binary_matrices() + + for i in range(self.n_class): + super().compute_bin_boundaries(self.probabilities[:,i]) + super().compute_bins(i) + tace += np.dot(self.bin_prop,self.bin_score) + + return tace/self.n_class + +#create TACELoss with threshold fixed at 0 +class ACELoss(TACELoss): + + def loss(self, output, labels, n_bins = 15, logits = True): + return super().loss(output, labels, 0.0 , n_bins, logits) diff --git a/src/reference_code/pretrainer-old.py b/src/reference_code/pretrainer-old.py new file mode 100644 index 0000000000000000000000000000000000000000..7925315a486f2d9e8e4f7a3b5dbe7393190c495b --- /dev/null +++ b/src/reference_code/pretrainer-old.py @@ -0,0 +1,696 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.optim import Adam, SGD +from torch.utils.data import DataLoader +import pickle + +from ..bert import BERT +from ..seq_model import BERTSM +from ..classifier_model import BERTForClassification +from ..optim_schedule import ScheduledOptim + +import tqdm +import sys +import time + +import numpy as np +# import visualization + +from sklearn.metrics import precision_score, recall_score, f1_score + +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +from collections import defaultdict +import os + +class ECE(nn.Module): + + def __init__(self, n_bins=15): + """ + n_bins (int): number of confidence interval bins + """ + super(ECE, self).__init__() + bin_boundaries = torch.linspace(0, 1, n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + def forward(self, logits, labels): + softmaxes = F.softmax(logits, dim=1) + confidences, predictions = torch.max(softmaxes, 1) + labels = torch.argmax(labels,1) + accuracies = predictions.eq(labels) + + ece = torch.zeros(1, device=logits.device) + for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): + # Calculated |confidence - accuracy| in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + accuracy_in_bin = accuracies[in_bin].float().mean() + avg_confidence_in_bin = confidences[in_bin].mean() + ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + + return ece + +def accurate_nb(preds, labels): + pred_flat = np.argmax(preds, axis=1).flatten() + labels_flat = np.argmax(labels, axis=1).flatten() + labels_flat = labels.flatten() + return np.sum(pred_flat == labels_flat) + +class BERTTrainer: + """ + BERTTrainer pretrains BERT model on input sequence of strategies. + BERTTrainer make the pretrained BERT model with one training method objective. + 1. Masked Strategy Modelling : 3.3.1 Task #1: Masked SM + """ + + def __init__(self, bert: BERT, vocab_size: int, + train_dataloader: DataLoader, val_dataloader: DataLoader = None, test_dataloader: DataLoader = None, + lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=5000, + with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, same_student_prediction = False, + workspace_name=None, code=None): + """ + :param bert: BERT model which you want to train + :param vocab_size: total word vocab size + :param train_dataloader: train dataset data loader + :param test_dataloader: test dataset data loader [can be None] + :param lr: learning rate of optimizer + :param betas: Adam optimizer betas + :param weight_decay: Adam optimizer weight decay param + :param with_cuda: traning with cuda + :param log_freq: logging frequency of the batch iteration + """ + + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(cuda_condition, " Device used = ", self.device) + + available_gpus = list(range(torch.cuda.device_count())) + + # This BERT model will be saved every epoch + self.bert = bert.to(self.device) + # Initialize the BERT Language Model, with BERT model + self.model = BERTSM(bert, vocab_size).to(self.device) + + # Distributed GPU training if CUDA can detect more than 1 GPU + if with_cuda and torch.cuda.device_count() > 1: + print("Using %d GPUS for BERT" % torch.cuda.device_count()) + self.model = nn.DataParallel(self.model, device_ids=available_gpus) + + # Setting the train and test data loader + self.train_data = train_dataloader + self.val_data = val_dataloader + self.test_data = test_dataloader + + # Setting the Adam optimizer with hyper-param + self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) + self.optim_schedule = ScheduledOptim(self.optim, self.bert.hidden, n_warmup_steps=warmup_steps) + + # Using Negative Log Likelihood Loss function for predicting the masked_token + self.criterion = nn.NLLLoss(ignore_index=0) + + self.log_freq = log_freq + self.same_student_prediction = same_student_prediction + self.workspace_name = workspace_name + self.save_model = False + self.code = code + self.avg_loss = 10000 + self.start_time = time.time() + + print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) + + def train(self, epoch): + self.iteration(epoch, self.train_data) + + def val(self, epoch): + self.iteration(epoch, self.val_data, phase="val") + + def test(self, epoch): + self.iteration(epoch, self.test_data, phase="test") + + def iteration(self, epoch, data_loader, phase="train"): + """ + loop over the data_loader for training or testing + if on train status, backward operation is activated + and also auto save the model every peoch + + :param epoch: current epoch index + :param data_loader: torch.utils.data.DataLoader for iteration + :param train: boolean value of is train or test + :return: None + """ + # str_code = "train" if train else "test" + # code = "masked_prediction" if self.same_student_prediction else "masked" + + self.log_file = f"{self.workspace_name}/logs/{self.code}/log_{phase}_pretrained.txt" + # bert_hidden_representations = [] + if epoch == 0: + f = open(self.log_file, 'w') + f.close() + if phase == "val": + self.avg_loss = 10000 + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(data_loader), + desc="EP_%s:%d" % (phase, epoch), + total=len(data_loader), + bar_format="{l_bar}{r_bar}") + + avg_loss_mask = 0.0 + total_correct_mask = 0 + total_element_mask = 0 + + avg_loss_pred = 0.0 + total_correct_pred = 0 + total_element_pred = 0 + + avg_loss = 0.0 + + if phase == "train": + self.model.train() + else: + self.model.eval() + with open(self.log_file, 'a') as f: + sys.stdout = f + for i, data in data_iter: + # 0. batch_data will be sent into the device(GPU or cpu) + data = {key: value.to(self.device) for key, value in data.items()} + # if i == 0: + # print(f"data : {data[0]}") + # 1. forward the next_sentence_prediction and masked_lm model + # next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"]) + if self.same_student_prediction: + bert_hidden_rep, mask_lm_output, same_student_output = self.model.forward(data["bert_input"], data["segment_label"], self.same_student_prediction) + else: + bert_hidden_rep, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"], self.same_student_prediction) + + # embeddings = [h for h in bert_hidden_rep.cpu().detach().numpy()] + # bert_hidden_representations.extend(embeddings) + + + # 2-2. NLLLoss of predicting masked token word + mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"]) + + # 2-3. Adding next_loss and mask_loss : 3.4 Pre-training Procedure + if self.same_student_prediction: + # 2-1. NLL(negative log likelihood) loss of is_next classification result + same_student_loss = self.criterion(same_student_output, data["is_same_student"]) + loss = same_student_loss + mask_loss + else: + loss = mask_loss + + # 3. backward and optimization only in train + if phase == "train": + self.optim_schedule.zero_grad() + loss.backward() + self.optim_schedule.step_and_update_lr() + + + # print(f"mask_lm_output : {mask_lm_output}") + # non_zero_mask = (data["bert_label"] != 0).float() + # print(f"bert_label : {data['bert_label']}") + non_zero_mask = (data["bert_label"] != 0).float() + predictions = torch.argmax(mask_lm_output, dim=-1) + # print(f"predictions : {predictions}") + predicted_masked = predictions*non_zero_mask + # print(f"predicted_masked : {predicted_masked}") + mask_correct = ((data["bert_label"] == predicted_masked)*non_zero_mask).sum().item() + # print(f"mask_correct : {mask_correct}") + # print(f"non_zero_mask.sum().item() : {non_zero_mask.sum().item()}") + + avg_loss_mask += loss.item() + total_correct_mask += mask_correct + total_element_mask += non_zero_mask.sum().item() + # total_element_mask += data["bert_label"].sum().item() + + torch.cuda.empty_cache() + post_fix = { + "epoch": epoch, + "iter": i, + "avg_loss": avg_loss_mask / (i + 1), + "avg_acc_mask": (total_correct_mask / total_element_mask * 100) if total_element_mask != 0 else 0, + "loss": loss.item() + } + + # next sentence prediction accuracy + if self.same_student_prediction: + correct = same_student_output.argmax(dim=-1).eq(data["is_same_student"]).sum().item() + avg_loss_pred += loss.item() + total_correct_pred += correct + total_element_pred += data["is_same_student"].nelement() + # correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item() + post_fix["avg_loss"] = avg_loss_pred / (i + 1) + post_fix["avg_acc_pred"] = total_correct_pred / total_element_pred * 100 + post_fix["loss"] = loss.item() + + avg_loss +=loss.item() + + if i % self.log_freq == 0: + data_iter.write(str(post_fix)) + # if not train and epoch > 20 : + # pickle.dump(mask_lm_output.cpu().detach().numpy(), open(f"logs/mask/mask_out_e{epoch}_{i}.pkl","wb")) + # pickle.dump(data["bert_label"].cpu().detach().numpy(), open(f"logs/mask/label_e{epoch}_{i}.pkl","wb")) + end_time = time.time() + final_msg = { + "epoch": f"EP{epoch}_{phase}", + "avg_loss": avg_loss / len(data_iter), + "total_masked_acc": total_correct_mask * 100.0 / total_element_mask if total_element_mask != 0 else 0, + "time_taken_from_start": end_time - self.start_time + } + + if self.same_student_prediction: + final_msg["total_prediction_acc"] = total_correct_pred * 100.0 / total_element_pred + + print(final_msg) + + f.close() + sys.stdout = sys.__stdout__ + + if phase == "val": + self.save_model = False + if self.avg_loss > (avg_loss / len(data_iter)): + self.save_model = True + self.avg_loss = (avg_loss / len(data_iter)) + + # pickle.dump(bert_hidden_representations, open(f"embeddings/{code}/{str_code}_embeddings_{epoch}.pkl","wb")) + + + + def save(self, epoch, file_path="output/bert_trained.model"): + """ + Saving the current BERT model on file_path + + :param epoch: current epoch number + :param file_path: model output path which gonna be file_path+"ep%d" % epoch + :return: final_output_path + """ +# if self.code: +# fpath = file_path.split("/") +# # output_path = fpath[0]+ "/"+ fpath[1]+f"/{self.code}/" + fpath[2] + ".ep%d" % epoch +# output_path = "/",join(fpath[0]+ "/"+ fpath[1]+f"/{self.code}/" + fpath[-1] + ".ep%d" % epoch + +# else: + output_path = file_path + ".ep%d" % epoch + + torch.save(self.bert.cpu(), output_path) + self.bert.to(self.device) + print("EP:%d Model Saved on:" % epoch, output_path) + return output_path + + +class BERTFineTuneTrainer: + + def __init__(self, bert: BERT, vocab_size: int, + train_dataloader: DataLoader, test_dataloader: DataLoader = None, + lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000, + with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None, + num_labels=2, finetune_task=""): + """ + :param bert: BERT model which you want to train + :param vocab_size: total word vocab size + :param train_dataloader: train dataset data loader + :param test_dataloader: test dataset data loader [can be None] + :param lr: learning rate of optimizer + :param betas: Adam optimizer betas + :param weight_decay: Adam optimizer weight decay param + :param with_cuda: traning with cuda + :param log_freq: logging frequency of the batch iteration + """ + + # Setup cuda device for BERT training, argument -c, --cuda should be true + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(with_cuda, cuda_condition, " Device used = ", self.device) + + # This BERT model will be saved every epoch + self.bert = bert + for param in self.bert.parameters(): + param.requires_grad = False + # Initialize the BERT Language Model, with BERT model + self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device) + + # Distributed GPU training if CUDA can detect more than 1 GPU + if with_cuda and torch.cuda.device_count() > 1: + print("Using %d GPUS for BERT" % torch.cuda.device_count()) + self.model = nn.DataParallel(self.model, device_ids=cuda_devices) + + # Setting the train and test data loader + self.train_data = train_dataloader + self.test_data = test_dataloader + + self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9 + # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1) + + if num_labels == 1: + self.criterion = nn.MSELoss() + elif num_labels == 2: + self.criterion = nn.BCEWithLogitsLoss() + # self.criterion = nn.CrossEntropyLoss() + elif num_labels > 2: + self.criterion = nn.CrossEntropyLoss() + # self.criterion = nn.BCEWithLogitsLoss() + + # self.ece_criterion = ECE().to(self.device) + + self.log_freq = log_freq + self.workspace_name = workspace_name + self.finetune_task = finetune_task + self.save_model = False + self.avg_loss = 10000 + self.start_time = time.time() + self.probability_list = [] + print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) + + def train(self, epoch): + self.iteration(epoch, self.train_data) + + def test(self, epoch): + self.iteration(epoch, self.test_data, train=False) + + def iteration(self, epoch, data_loader, train=True): + """ + loop over the data_loader for training or testing + if on train status, backward operation is activated + and also auto save the model every peoch + + :param epoch: current epoch index + :param data_loader: torch.utils.data.DataLoader for iteration + :param train: boolean value of is train or test + :return: None + """ + str_code = "train" if train else "test" + + self.log_file = f"{self.workspace_name}/logs/{self.finetune_task}/log_{str_code}_finetuned.txt" + + if epoch == 0: + f = open(self.log_file, 'w') + f.close() + if not train: + self.avg_loss = 10000 + + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(data_loader), + desc="EP_%s:%d" % (str_code, epoch), + total=len(data_loader), + bar_format="{l_bar}{r_bar}") + + avg_loss = 0.0 + total_correct = 0 + total_element = 0 + plabels = [] + tlabels = [] + + eval_accurate_nb = 0 + nb_eval_examples = 0 + logits_list = [] + labels_list = [] + + if train: + self.model.train() + else: + self.model.eval() + self.probability_list = [] + with open(self.log_file, 'a') as f: + sys.stdout = f + + for i, data in data_iter: + # 0. batch_data will be sent into the device(GPU or cpu) + data = {key: value.to(self.device) for key, value in data.items()} + if train: + h_rep, logits = self.model.forward(data["bert_input"], data["segment_label"]) + else: + with torch.no_grad(): + h_rep, logits = self.model.forward(data["bert_input"], data["segment_label"]) + # print(logits, logits.shape) + logits_list.append(logits.cpu()) + labels_list.append(data["progress_status"].cpu()) + # print(">>>>>>>>>>>>", progress_output) + # print(f"{epoch}---nelement--- {data['progress_status'].nelement()}") + # print(data["progress_status"].shape, logits.shape) + progress_loss = self.criterion(logits, data["progress_status"]) + loss = progress_loss + + if torch.cuda.device_count() > 1: + loss = loss.mean() + + # 3. backward and optimization only in train + if train: + self.optim.zero_grad() + loss.backward() + # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) + self.optim.step() + + # progress prediction accuracy + # correct = progress_output.argmax(dim=-1).eq(data["progress_status"]).sum().item() + probs = nn.LogSoftmax(dim=-1)(logits) + self.probability_list.append(probs) + predicted_labels = torch.argmax(probs, dim=-1) + true_labels = torch.argmax(data["progress_status"], dim=-1) + plabels.extend(predicted_labels.cpu().numpy()) + tlabels.extend(true_labels.cpu().numpy()) + + # Compare predicted labels to true labels and calculate accuracy + correct = (predicted_labels == true_labels).sum().item() + avg_loss += loss.item() + total_correct += correct + # total_element += true_labels.nelement() + total_element += data["progress_status"].nelement() + # print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element) + + # if train: + post_fix = { + "epoch": epoch, + "iter": i, + "avg_loss": avg_loss / (i + 1), + "avg_acc": total_correct / total_element * 100, + "loss": loss.item() + } +# else: +# logits = logits.detach().cpu().numpy() +# label_ids = data["progress_status"].to('cpu').numpy() +# tmp_eval_nb = accurate_nb(logits, label_ids) + +# eval_accurate_nb += tmp_eval_nb +# nb_eval_examples += label_ids.shape[0] + +# # total_element += data["progress_status"].nelement() +# # avg_loss += loss.item() + +# post_fix = { +# "epoch": epoch, +# "iter": i, +# "avg_loss": avg_loss / (i + 1), +# "avg_acc": tmp_eval_nb / total_element * 100, +# "loss": loss.item() +# } + + + if i % self.log_freq == 0: + data_iter.write(str(post_fix)) + + # precisions = precision_score(plabels, tlabels, average="weighted") + # recalls = recall_score(plabels, tlabels, average="weighted") + f1_scores = f1_score(plabels, tlabels, average="weighted") + # if train: + end_time = time.time() + final_msg = { + "epoch": f"EP{epoch}_{str_code}", + "avg_loss": avg_loss / len(data_iter), + "total_acc": total_correct * 100.0 / total_element, + # "precisions": precisions, + # "recalls": recalls, + "f1_scores": f1_scores, + "time_taken_from_start": end_time - self.start_time + } +# else: +# eval_accuracy = eval_accurate_nb/nb_eval_examples + +# logits_ece = torch.cat(logits_list) +# labels_ece = torch.cat(labels_list) +# ece = self.ece_criterion(logits_ece, labels_ece).item() +# end_time = time.time() +# final_msg = { +# "epoch": f"EP{epoch}_{str_code}", +# "eval_accuracy": eval_accuracy, +# "ece": ece, +# "avg_loss": avg_loss / len(data_iter), +# "precisions": precisions, +# "recalls": recalls, +# "f1_scores": f1_scores, +# "time_taken_from_start": end_time - self.start_time +# } +# if self.save_model: +# conf_hist = visualization.ConfidenceHistogram() +# plt_test = conf_hist.plot(np.array(logits_ece), np.array(labels_ece), title= f"Confidence Histogram {epoch}") +# plt_test.savefig(f"{self.workspace_name}/plots/confidence_histogram/{self.finetune_task}/conf_histogram_test_{epoch}.png",bbox_inches='tight') +# plt_test.close() + +# rel_diagram = visualization.ReliabilityDiagram() +# plt_test_2 = rel_diagram.plot(np.array(logits_ece), np.array(labels_ece),title=f"Reliability Diagram {epoch}") +# plt_test_2.savefig(f"{self.workspace_name}/plots/confidence_histogram/{self.finetune_task}/rel_diagram_test_{epoch}.png",bbox_inches='tight') +# plt_test_2.close() + print(final_msg) + + # print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_acc=", total_correct * 100.0 / total_element) + f.close() + sys.stdout = sys.__stdout__ + self.save_model = False + if self.avg_loss > (avg_loss / len(data_iter)): + self.save_model = True + self.avg_loss = (avg_loss / len(data_iter)) + + def iteration_1(self, epoch_idx, data): + try: + data = {key: value.to(self.device) for key, value in data.items()} + logits = self.model(data['input_ids'], data['segment_label']) + # Ensure logits is a tensor, not a tuple + loss_fct = nn.CrossEntropyLoss() + loss = loss_fct(logits, data['labels']) + + # Backpropagation and optimization + self.optim.zero_grad() + loss.backward() + self.optim.step() + + if self.log_freq > 0 and epoch_idx % self.log_freq == 0: + print(f"Epoch {epoch_idx}: Loss = {loss.item()}") + + return loss + + except Exception as e: + print(f"Error during iteration: {e}") + raise + + + + + + # plt_test.show() + # print("EP%d_%s, " % (epoch, str_code)) + + def save(self, epoch, file_path="output/bert_fine_tuned_trained.model"): + """ + Saving the current BERT model on file_path + + :param epoch: current epoch number + :param file_path: model output path which gonna be file_path+"ep%d" % epoch + :return: final_output_path + """ + if self.finetune_task: + fpath = file_path.split("/") + output_path = fpath[0]+ "/"+ fpath[1]+f"/{self.finetune_task}/" + fpath[2] + ".ep%d" % epoch + else: + output_path = file_path + ".ep%d" % epoch + torch.save(self.model.cpu(), output_path) + self.model.to(self.device) + print("EP:%d Model Saved on:" % epoch, output_path) + return output_path + + +class BERTAttention: + def __init__(self, bert: BERT, vocab_obj, train_dataloader: DataLoader, workspace_name=None, code=None, finetune_task=None, with_cuda=True): + + # available_gpus = list(range(torch.cuda.device_count())) + + cuda_condition = torch.cuda.is_available() and with_cuda + self.device = torch.device("cuda:0" if cuda_condition else "cpu") + print(with_cuda, cuda_condition, " Device used = ", self.device) + self.bert = bert.to(self.device) + + # if with_cuda and torch.cuda.device_count() > 1: + # print("Using %d GPUS for BERT" % torch.cuda.device_count()) + # self.bert = nn.DataParallel(self.bert, device_ids=available_gpus) + + self.train_dataloader = train_dataloader + self.workspace_name = workspace_name + self.code = code + self.finetune_task = finetune_task + self.vocab_obj = vocab_obj + + def getAttention(self): + # self.log_file = f"{self.workspace_name}/logs/{self.code}/log_attention.txt" + + + labels = ['PercentChange', 'NumeratorQuantity2', 'NumeratorQuantity1', 'DenominatorQuantity1', + 'OptionalTask_1', 'EquationAnswer', 'NumeratorFactor', 'DenominatorFactor', + 'OptionalTask_2', 'FirstRow1:1', 'FirstRow1:2', 'FirstRow2:1', 'FirstRow2:2', 'SecondRow', + 'ThirdRow', 'FinalAnswer','FinalAnswerDirection'] + df_all = pd.DataFrame(0.0, index=labels, columns=labels) + # Setting the tqdm progress bar + data_iter = tqdm.tqdm(enumerate(self.train_dataloader), + desc="attention", + total=len(self.train_dataloader), + bar_format="{l_bar}{r_bar}") + count = 0 + for i, data in data_iter: + data = {key: value.to(self.device) for key, value in data.items()} + a = self.bert.forward(data["bert_input"], data["segment_label"]) + non_zero = np.sum(data["segment_label"].cpu().detach().numpy()) + + # Last Transformer Layer + last_layer = self.bert.attention_values[-1].transpose(1,0,2,3) + # print(last_layer.shape) + head, d_model, s, s = last_layer.shape + + for d in range(d_model): + seq_labels = self.vocab_obj.to_sentence(data["bert_input"].cpu().detach().numpy().tolist()[d])[1:non_zero-1] + # df_all = pd.DataFrame(0.0, index=seq_labels, columns=seq_labels) + indices_to_choose = defaultdict(int) + + for k,s in enumerate(seq_labels): + if s in labels: + indices_to_choose[s] = k + indices_chosen = list(indices_to_choose.values()) + selected_seq_labels = [s for l,s in enumerate(seq_labels) if l in indices_chosen] + # print(len(seq_labels), len(selected_seq_labels)) + for h in range(head): + # fig, ax = plt.subplots(figsize=(12, 12)) + # seq_labels = self.vocab_obj.to_sentence(data["bert_input"].cpu().detach().numpy().tolist()[d])#[1:non_zero-1] + # seq_labels = self.vocab_obj.to_sentence(data["bert_input"].cpu().detach().numpy().tolist()[d])[1:non_zero-1] +# indices_to_choose = defaultdict(int) + +# for k,s in enumerate(seq_labels): +# if s in labels: +# indices_to_choose[s] = k +# indices_chosen = list(indices_to_choose.values()) +# selected_seq_labels = [s for l,s in enumerate(seq_labels) if l in indices_chosen] + # print(f"Chosen index: {seq_labels, indices_to_choose, indices_chosen, selected_seq_labels}") + + df_cm = pd.DataFrame(last_layer[h][d][indices_chosen,:][:,indices_chosen], index = selected_seq_labels, columns = selected_seq_labels) + df_all = df_all.add(df_cm, fill_value=0) + count += 1 + + # df_cm = pd.DataFrame(last_layer[h][d][1:non_zero-1,:][:,1:non_zero-1], index=seq_labels, columns=seq_labels) + # df_all = df_all.add(df_cm, fill_value=0) + + # df_all = df_all.reindex(index=seq_labels, columns=seq_labels) + # sns.heatmap(df_all, annot=False) + # plt.title("Attentions") #Probabilities + # plt.xlabel("Steps") + # plt.ylabel("Steps") + # plt.grid(True) + # plt.tick_params(axis='x', bottom=False, top=True, labelbottom=False, labeltop=True, labelrotation=90) + # plt.savefig(f"{self.workspace_name}/plots/{self.code}/{self.finetune_task}_attention_scores_over_[{h}]_head_n_data[{d}].png", bbox_inches='tight') + # plt.show() + # plt.close() + + + + print(f"Count of total : {count, head * self.train_dataloader.dataset.len}") + df_all = df_all.div(count) # head * self.train_dataloader.dataset.len + df_all = df_all.reindex(index=labels, columns=labels) + sns.heatmap(df_all, annot=False) + plt.title("Attentions") #Probabilities + plt.xlabel("Steps") + plt.ylabel("Steps") + plt.grid(True) + plt.tick_params(axis='x', bottom=False, top=True, labelbottom=False, labeltop=True, labelrotation=90) + plt.savefig(f"{self.workspace_name}/plots/{self.code}/{self.finetune_task}_attention_scores.png", bbox_inches='tight') + plt.show() + plt.close() + + + + diff --git a/src/reference_code/test.py b/src/reference_code/test.py new file mode 100644 index 0000000000000000000000000000000000000000..943f31142414994f2efee2ecf2d51f60638b7674 --- /dev/null +++ b/src/reference_code/test.py @@ -0,0 +1,493 @@ +import torch +from torch import nn, optim +from torch.nn import functional as F +from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler +import numpy as np +from keras.preprocessing.sequence import pad_sequences +from transformers import BertTokenizer +from transformers import BertForSequenceClassification +import random +from sklearn.metrics import f1_score +from utils import * +import os +import argparse + + + +import warnings +warnings.filterwarnings("ignore") + +class ModelWithTemperature(nn.Module): + """ + A thin decorator, which wraps a model with temperature scaling + model (nn.Module): + A classification neural network + NB: Output of the neural network should be the classification logits, + NOT the softmax (or log softmax)! + """ + def __init__(self, model): + super(ModelWithTemperature, self).__init__() + self.model = model + self.temperature = nn.Parameter(torch.ones(1) * 1.5) + + def forward(self, input_ids, token_type_ids, attention_mask): + logits = self.model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)[0] + return self.temperature_scale(logits) + + def temperature_scale(self, logits): + """ + Perform temperature scaling on logits + """ + # Expand temperature to match the size of logits + temperature = self.temperature.unsqueeze(1).expand(logits.size(0), logits.size(1)) + return logits / temperature + + # This function probably should live outside of this class, but whatever + def set_temperature(self, valid_loader, args): + """ + Tune the tempearature of the model (using the validation set). + We're going to set it to optimize NLL. + valid_loader (DataLoader): validation set loader + """ + nll_criterion = nn.CrossEntropyLoss() + ece_criterion = ECE().to(args.device) + + # First: collect all the logits and labels for the validation set + logits_list = [] + labels_list = [] + with torch.no_grad(): + for step, batch in enumerate(valid_loader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + logits = self.model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + logits_list.append(logits) + labels_list.append(b_labels) + logits = torch.cat(logits_list) + labels = torch.cat(labels_list) + + # Calculate NLL and ECE before temperature scaling + before_temperature_nll = nll_criterion(logits, labels).item() + before_temperature_ece = ece_criterion(logits, labels).item() + print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece)) + + # Next: optimize the temperature w.r.t. NLL + optimizer = optim.LBFGS([self.temperature], lr=0.01, max_iter=50) + + def eval(): + loss = nll_criterion(self.temperature_scale(logits), labels) + loss.backward() + return loss + optimizer.step(eval) + + # Calculate NLL and ECE after temperature scaling + after_temperature_nll = nll_criterion(self.temperature_scale(logits), labels).item() + after_temperature_ece = ece_criterion(self.temperature_scale(logits), labels).item() + print('Optimal temperature: %.3f' % self.temperature.item()) + print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece)) + + return self + +class ECE(nn.Module): + + def __init__(self, n_bins=15): + """ + n_bins (int): number of confidence interval bins + """ + super(ECE, self).__init__() + bin_boundaries = torch.linspace(0, 1, n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + def forward(self, logits, labels): + softmaxes = F.softmax(logits, dim=1) + confidences, predictions = torch.max(softmaxes, 1) + accuracies = predictions.eq(labels) + + ece = torch.zeros(1, device=logits.device) + for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): + # Calculated |confidence - accuracy| in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + accuracy_in_bin = accuracies[in_bin].float().mean() + avg_confidence_in_bin = confidences[in_bin].mean() + ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + + return ece + + +class ECE_v2(nn.Module): + def __init__(self, n_bins=15): + """ + n_bins (int): number of confidence interval bins + """ + super(ECE_v2, self).__init__() + bin_boundaries = torch.linspace(0, 1, n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + def forward(self, softmaxes, labels): + confidences, predictions = torch.max(softmaxes, 1) + accuracies = predictions.eq(labels) + ece = torch.zeros(1, device=softmaxes.device) + + for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): + # Calculated |confidence - accuracy| in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + accuracy_in_bin = accuracies[in_bin].float().mean() + avg_confidence_in_bin = confidences[in_bin].mean() + ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + return ece + +def accurate_nb(preds, labels): + pred_flat = np.argmax(preds, axis=1).flatten() + labels_flat = labels.flatten() + return np.sum(pred_flat == labels_flat) + + +def set_seed(args): + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + +def apply_dropout(m): + if type(m) == nn.Dropout: + m.train() + + +def main(): + + parser = argparse.ArgumentParser(description='Test code - measure the detection peformance') + parser.add_argument('--eva_iter', default=1, type=int, help='number of passes for mc-dropout when evaluation') + parser.add_argument('--model', type=str, choices=['base', 'manifold-smoothing', 'mc-dropout','temperature'], default='base') + parser.add_argument('--seed', type=int, default=0, help='random seed for test') + parser.add_argument("--epochs", default=10, type=int, help="Number of epochs for training.") + parser.add_argument('--index', type=int, default=0, help='random seed you used during training') + parser.add_argument('--in_dataset', required=True, help='target dataset: 20news') + parser.add_argument('--out_dataset', required=True, help='out-of-dist dataset') + parser.add_argument('--eval_batch_size', type=int, default=32) + parser.add_argument('--saved_dataset', type=str, default='n') + parser.add_argument('--eps_out', default=0.001, type=float, help="Perturbation size of out-of-domain adversarial training") + parser.add_argument("--eps_y", default=0.1, type=float, help="Perturbation size of label") + parser.add_argument('--eps_in', default=0.0001, type=float, help="Perturbation size of in-domain adversarial training") + + args = parser.parse_args() + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + args.device = device + set_seed(args) + + outf = 'test/'+args.model+'-'+str(args.index) + if not os.path.isdir(outf): + os.makedirs(outf) + + if args.model == 'base': + dirname = '{}/BERT-base-{}'.format(args.in_dataset, args.index) + pretrained_dir = './model_save/{}'.format(dirname) + # Load a trained model and vocabulary that you have fine-tuned + model = BertForSequenceClassification.from_pretrained(pretrained_dir) + model.to(args.device) + print('Load Tekenizer') + + elif args.model == 'mc-dropout': + dirname = '{}/BERT-base-{}'.format(args.in_dataset, args.index) + pretrained_dir = './model_save/{}'.format(dirname) + # Load a trained model and vocabulary that you have fine-tuned + model = BertForSequenceClassification.from_pretrained(pretrained_dir) + model.to(args.device) + + elif args.model == 'temperature': + dirname = '{}/BERT-base-{}'.format(args.in_dataset, args.index) + pretrained_dir = './model_save/{}'.format(dirname) + orig_model = BertForSequenceClassification.from_pretrained(pretrained_dir) + orig_model.to(args.device) + model = ModelWithTemperature(orig_model) + model.to(args.device) + + elif args.model == 'manifold-smoothing': + dirname = '{}/BERT-mf-{}-{}-{}-{}'.format(args.in_dataset, args.index, args.eps_in, args.eps_y, args.eps_out) + print(dirname) + pretrained_dir = './model_save/{}'.format(dirname) + model = BertForSequenceClassification.from_pretrained(pretrained_dir) + model.to(args.device) + + + if args.saved_dataset == 'n': + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + train_sentences, val_sentences, test_sentences, train_labels, val_labels, test_labels = load_dataset(args.in_dataset) + _, _, nt_test_sentences, _, _, nt_test_labels = load_dataset(args.out_dataset) + + val_input_ids = [] + test_input_ids = [] + nt_test_input_ids = [] + + if args.in_dataset == '20news' or args.in_dataset == '20news-15': + MAX_LEN = 150 + else: + MAX_LEN = 256 + + for sent in val_sentences: + encoded_sent = tokenizer.encode( + sent, # Sentence to encode. + add_special_tokens = True, # Add '[CLS]' and '[SEP]' + truncation= True, + max_length = MAX_LEN, # Truncate all sentences. + #return_tensors = 'pt', # Return pytorch tensors. + ) + # Add the encoded sentence to the list. + val_input_ids.append(encoded_sent) + + + for sent in test_sentences: + encoded_sent = tokenizer.encode( + sent, # Sentence to encode. + add_special_tokens = True, # Add '[CLS]' and '[SEP]' + truncation= True, + max_length = MAX_LEN, # Truncate all sentences. + #return_tensors = 'pt', # Return pytorch tensors. + ) + # Add the encoded sentence to the list. + test_input_ids.append(encoded_sent) + + for sent in nt_test_sentences: + encoded_sent = tokenizer.encode( + sent, + add_special_tokens = True, + truncation= True, + max_length = MAX_LEN, + ) + nt_test_input_ids.append(encoded_sent) + + # Pad our input tokens + val_input_ids = pad_sequences(val_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + test_input_ids = pad_sequences(test_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + nt_test_input_ids = pad_sequences(nt_test_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + + val_attention_masks = [] + test_attention_masks = [] + nt_test_attention_masks = [] + + for seq in val_input_ids: + seq_mask = [float(i>0) for i in seq] + val_attention_masks.append(seq_mask) + for seq in test_input_ids: + seq_mask = [float(i>0) for i in seq] + test_attention_masks.append(seq_mask) + for seq in nt_test_input_ids: + seq_mask = [float(i>0) for i in seq] + nt_test_attention_masks.append(seq_mask) + + + val_inputs = torch.tensor(val_input_ids) + val_labels = torch.tensor(val_labels) + val_masks = torch.tensor(val_attention_masks) + + test_inputs = torch.tensor(test_input_ids) + test_labels = torch.tensor(test_labels) + test_masks = torch.tensor(test_attention_masks) + + nt_test_inputs = torch.tensor(nt_test_input_ids) + nt_test_labels = torch.tensor(nt_test_labels) + nt_test_masks = torch.tensor(nt_test_attention_masks) + + val_data = TensorDataset(val_inputs, val_masks, val_labels) + test_data = TensorDataset(test_inputs, test_masks, test_labels) + nt_test_data = TensorDataset(nt_test_inputs, nt_test_masks, nt_test_labels) + + dataset_dir = 'dataset/test' + if not os.path.exists(dataset_dir): + os.makedirs(dataset_dir) + torch.save(val_data, dataset_dir+'/{}_val_in_domain.pt'.format(args.in_dataset)) + torch.save(test_data, dataset_dir+'/{}_test_in_domain.pt'.format(args.in_dataset)) + torch.save(nt_test_data, dataset_dir+'/{}_test_out_of_domain.pt'.format(args.out_dataset)) + + else: + dataset_dir = 'dataset/test' + val_data = torch.load(dataset_dir+'/{}_val_in_domain.pt'.format(args.in_dataset)) + test_data = torch.load(dataset_dir+'/{}_test_in_domain.pt'.format(args.in_dataset)) + nt_test_data = torch.load(dataset_dir+'/{}_test_out_of_domain.pt'.format(args.out_dataset)) + + + + + +######## saved dataset + test_sampler = SequentialSampler(test_data) + test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=args.eval_batch_size) + + nt_test_sampler = SequentialSampler(nt_test_data) + nt_test_dataloader = DataLoader(nt_test_data, sampler=nt_test_sampler, batch_size=args.eval_batch_size) + val_sampler = SequentialSampler(val_data) + val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=args.eval_batch_size) + + if args.model == 'temperature': + model.set_temperature(val_dataloader, args) + + model.eval() + + if args.model == 'mc-dropout': + model.apply(apply_dropout) + + correct = 0 + total = 0 + output_list = [] + labels_list = [] + +##### validation dat + with torch.no_grad(): + for step, batch in enumerate(val_dataloader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + total += b_labels.shape[0] + batch_output = 0 + for j in range(args.eva_iter): + if args.model == 'temperature': + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask) #logits + else: + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] #logits + batch_output = batch_output + F.softmax(current_batch, dim=1) + batch_output = batch_output/args.eva_iter + output_list.append(batch_output) + labels_list.append(b_labels) + score, predicted = batch_output.max(1) + correct += predicted.eq(b_labels).sum().item() + + ###calculate accuracy and ECE + val_eval_accuracy = correct/total + print("Val Accuracy: {}".format(val_eval_accuracy)) + ece_criterion = ECE_v2().to(args.device) + softmaxes_ece = torch.cat(output_list) + labels_ece = torch.cat(labels_list) + val_ece = ece_criterion(softmaxes_ece, labels_ece).item() + print('ECE on Val data: {}'.format(val_ece)) + +#### Test data + correct = 0 + total = 0 + output_list = [] + labels_list = [] + predict_list = [] + true_list = [] + true_list_ood = [] + predict_mis = [] + predict_in = [] + score_list = [] + correct_index_all = [] + ## test on in-distribution test set + with torch.no_grad(): + for step, batch in enumerate(test_dataloader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + total += b_labels.shape[0] + batch_output = 0 + for j in range(args.eva_iter): + if args.model == 'temperature': + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask) #logits + else: + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] #logits + batch_output = batch_output + F.softmax(current_batch, dim=1) + batch_output = batch_output/args.eva_iter + output_list.append(batch_output) + labels_list.append(b_labels) + score, predicted = batch_output.max(1) + + correct += predicted.eq(b_labels).sum().item() + + correct_index = (predicted == b_labels) + correct_index_all.append(correct_index) + score_list.append(score) + + ###calcutae accuracy + eval_accuracy = correct/total + print("Test Accuracy: {}".format(eval_accuracy)) + + ##calculate ece + ece_criterion = ECE_v2().to(args.device) + softmaxes_ece = torch.cat(output_list) + labels_ece = torch.cat(labels_list) + ece = ece_criterion(softmaxes_ece, labels_ece).item() + print('ECE on Test data: {}'.format(ece)) + + #confidence for in-distribution data + score_in_array = torch.cat(score_list) + #indices of data that are classified correctly + correct_array = torch.cat(correct_index_all) + label_array = torch.cat(labels_list) + +### test on out-of-distribution data + predict_ood = [] + score_ood_list = [] + true_list_ood = [] + with torch.no_grad(): + for step, batch in enumerate(nt_test_dataloader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + batch_output = 0 + for j in range(args.eva_iter): + if args.model == 'temperature': + current_batch = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask) + else: + current_batch = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + batch_output = batch_output + F.softmax(current_batch, dim=1) + batch_output = batch_output/args.eva_iter + score_out, _ = batch_output.max(1) + + score_ood_list.append(score_out) + + score_ood_array = torch.cat(score_ood_list) + + + + label_array = label_array.cpu().numpy() + score_ood_array = score_ood_array.cpu().numpy() + score_in_array = score_in_array.cpu().numpy() + correct_array = correct_array.cpu().numpy() + + + + + ####### calculate NBAUCC for detection task + predict_o = np.zeros(len(score_in_array)+len(score_ood_array)) + true_o = np.ones(len(score_in_array)+len(score_ood_array)) + true_o[:len(score_in_array)] = 0 ## in-distribution data as false, ood data as positive + true_mis = np.ones(len(score_in_array)) + true_mis[correct_array] = 0 ##true instances as false, misclassified instances as positive + predict_mis = np.zeros(len(score_in_array)) + + + + ood_sum = 0 + mis_sum = 0 + + ood_sum_list = [] + mis_sum_list = [] + +#### upper bound of the threshold tau for NBAUCC + stop_points = [0.50, 1.] + + for threshold in np.arange(0., 1.01, 0.02): + predict_ood_index1 = (score_in_array < threshold) + predict_ood_index2 = (score_ood_array < threshold) + predict_ood_index = np.concatenate((predict_ood_index1, predict_ood_index2), axis=0) + predict_o[predict_ood_index] = 1 + predict_mis[score_in_array= 5: + if word in most_commons: + doc_words.append(word) + else: + doc_words.append("") + doc_str = ' '.join(doc_words).strip() + clean_docs.append(doc_str) + return clean_docs + + + +def load_dataset(dataset): + + if dataset == 'sst': + df_train = pd.read_csv("./dataset/sst/SST-2/train.tsv", delimiter='\t', header=0) + + df_val = pd.read_csv("./dataset/sst/SST-2/dev.tsv", delimiter='\t', header=0) + + df_test = pd.read_csv("./dataset/sst/SST-2/sst-test.tsv", delimiter='\t', header=None, names=['sentence', 'label']) + + train_sentences = df_train.sentence.values + val_sentences = df_val.sentence.values + test_sentences = df_test.sentence.values + train_labels = df_train.label.values + val_labels = df_val.label.values + test_labels = df_test.label.values + + + if dataset == '20news': + + VALIDATION_SPLIT = 0.8 + newsgroups_train = fetch_20newsgroups('dataset/20news', subset='train', shuffle=True, random_state=0) + print(newsgroups_train.target_names) + print(len(newsgroups_train.data)) + + newsgroups_test = fetch_20newsgroups('dataset/20news', subset='test', shuffle=False) + + print(len(newsgroups_test.data)) + + train_len = int(VALIDATION_SPLIT * len(newsgroups_train.data)) + + train_sentences = newsgroups_train.data[:train_len] + val_sentences = newsgroups_train.data[train_len:] + test_sentences = newsgroups_test.data + train_labels = newsgroups_train.target[:train_len] + val_labels = newsgroups_train.target[train_len:] + test_labels = newsgroups_test.target + + + + if dataset == '20news-15': + VALIDATION_SPLIT = 0.8 + cats = ['alt.atheism', + 'comp.graphics', + 'comp.os.ms-windows.misc', + 'comp.sys.ibm.pc.hardware', + 'comp.sys.mac.hardware', + 'comp.windows.x', + 'rec.autos', + 'rec.motorcycles', + 'rec.sport.baseball', + 'rec.sport.hockey', + 'misc.forsale', + 'sci.crypt', + 'sci.electronics', + 'sci.med', + 'sci.space'] + newsgroups_train = fetch_20newsgroups('dataset/20news', subset='train', shuffle=True, categories=cats, random_state=0) + print(newsgroups_train.target_names) + print(len(newsgroups_train.data)) + + newsgroups_test = fetch_20newsgroups('dataset/20news', subset='test', shuffle=False, categories=cats) + + print(len(newsgroups_test.data)) + + train_len = int(VALIDATION_SPLIT * len(newsgroups_train.data)) + + train_sentences = newsgroups_train.data[:train_len] + val_sentences = newsgroups_train.data[train_len:] + test_sentences = newsgroups_test.data + train_labels = newsgroups_train.target[:train_len] + val_labels = newsgroups_train.target[train_len:] + test_labels = newsgroups_test.target + + + if dataset == '20news-5': + cats = [ + 'soc.religion.christian', + 'talk.politics.guns', + 'talk.politics.mideast', + 'talk.politics.misc', + 'talk.religion.misc'] + + newsgroups_test = fetch_20newsgroups('dataset/20news', subset='test', shuffle=False, categories=cats) + print(newsgroups_test.target_names) + print(len(newsgroups_test.data)) + + train_sentences = None + val_sentences = None + test_sentences = newsgroups_test.data + train_labels = None + val_labels = None + test_labels = newsgroups_test.target + + if dataset == 'wos': + TESTING_SPLIT = 0.6 + VALIDATION_SPLIT = 0.8 + file_path = './dataset/WebOfScience/WOS46985/X.txt' + with open(file_path, 'r') as read_file: + x_temp = read_file.readlines() + x_all = [] + for x in x_temp: + x_all.append(str(x)) + + print(len(x_all)) + + file_path = './dataset/WebOfScience/WOS46985/Y.txt' + with open(file_path, 'r') as read_file: + y_temp= read_file.readlines() + y_all = [] + for y in y_temp: + y_all.append(int(y)) + print(len(y_all)) + print(max(y_all), min(y_all)) + + + x_in = [] + y_in = [] + for i in range(len(x_all)): + x_in.append(x_all[i]) + y_in.append(y_all[i]) + + + train_val_len = int(TESTING_SPLIT * len(x_in)) + train_len = int(VALIDATION_SPLIT * train_val_len) + + train_sentences = x_in[:train_len] + val_sentences = x_in[train_len:train_val_len] + test_sentences = x_in[train_val_len:] + + train_labels = y_in[:train_len] + val_labels = y_in[train_len:train_val_len] + test_labels = y_in[train_val_len:] + + print(len(train_labels)) + print(len(val_labels)) + print(len(test_labels)) + + + if dataset == 'wos-100': + TESTING_SPLIT = 0.6 + VALIDATION_SPLIT = 0.8 + file_path = './dataset/WebOfScience/WOS46985/X.txt' + with open(file_path, 'r') as read_file: + x_temp = read_file.readlines() + x_all = [] + for x in x_temp: + x_all.append(str(x)) + + print(len(x_all)) + + file_path = './dataset/WebOfScience/WOS46985/Y.txt' + with open(file_path, 'r') as read_file: + y_temp= read_file.readlines() + y_all = [] + for y in y_temp: + y_all.append(int(y)) + print(len(y_all)) + print(max(y_all), min(y_all)) + + + x_in = [] + y_in = [] + for i in range(len(x_all)): + if y_all[i] in range(100): + x_in.append(x_all[i]) + y_in.append(y_all[i]) + + for i in range(133): + num = 0 + for y in y_in: + if y == i: + num = num + 1 + # print(num) + + train_val_len = int(TESTING_SPLIT * len(x_in)) + train_len = int(VALIDATION_SPLIT * train_val_len) + + train_sentences = x_in[:train_len] + val_sentences = x_in[train_len:train_val_len] + test_sentences = x_in[train_val_len:] + + train_labels = y_in[:train_len] + val_labels = y_in[train_len:train_val_len] + test_labels = y_in[train_val_len:] + + print(len(train_labels)) + print(len(val_labels)) + print(len(test_labels)) + + if dataset == 'wos-34': + TESTING_SPLIT = 0.6 + VALIDATION_SPLIT = 0.8 + file_path = './dataset/WebOfScience/WOS46985/X.txt' + with open(file_path, 'r') as read_file: + x_temp = read_file.readlines() + x_all = [] + for x in x_temp: + x_all.append(str(x)) + + print(len(x_all)) + + file_path = './dataset/WebOfScience/WOS46985/Y.txt' + with open(file_path, 'r') as read_file: + y_temp= read_file.readlines() + y_all = [] + for y in y_temp: + y_all.append(int(y)) + print(len(y_all)) + print(max(y_all), min(y_all)) + + x_in = [] + y_in = [] + for i in range(len(x_all)): + if (y_all[i] in range(100)) != True: + x_in.append(x_all[i]) + y_in.append(y_all[i]) + + for i in range(133): + num = 0 + for y in y_in: + if y == i: + num = num + 1 + # print(num) + + train_val_len = int(TESTING_SPLIT * len(x_in)) + train_len = int(VALIDATION_SPLIT * train_val_len) + + train_sentences = None + val_sentences = None + test_sentences = x_in[train_val_len:] + + train_labels = None + val_labels = None + test_labels = y_in[train_val_len:] + + print(len(test_labels)) + + if dataset == 'agnews': + + VALIDATION_SPLIT = 0.8 + labels_in_domain = [1, 2] + + train_df = pd.read_csv('./dataset/agnews/train.csv', header=None) + train_df.rename(columns={0: 'label',1: 'title', 2:'sentence'}, inplace=True) + # train_df = pd.concat([train_df, pd.get_dummies(train_df['label'],prefix='label')], axis=1) + print(train_df.dtypes) + train_in_df_sentence = [] + train_in_df_label = [] + + for i in range(len(train_df.sentence.values)): + sentence_temp = ''.join(str(train_df.sentence.values[i])) + train_in_df_sentence.append(sentence_temp) + train_in_df_label.append(train_df.label.values[i]-1) + + test_df = pd.read_csv('./dataset/agnews/test.csv', header=None) + test_df.rename(columns={0: 'label',1: 'title', 2:'sentence'}, inplace=True) + # test_df = pd.concat([test_df, pd.get_dummies(test_df['label'],prefix='label')], axis=1) + test_in_df_sentence = [] + test_in_df_label = [] + for i in range(len(test_df.sentence.values)): + test_in_df_sentence.append(str(test_df.sentence.values[i])) + test_in_df_label.append(test_df.label.values[i]-1) + + train_len = int(VALIDATION_SPLIT * len(train_in_df_sentence)) + + train_sentences = train_in_df_sentence[:train_len] + val_sentences = train_in_df_sentence[train_len:] + test_sentences = test_in_df_sentence + train_labels = train_in_df_label[:train_len] + val_labels = train_in_df_label[train_len:] + test_labels = test_in_df_label + print(len(train_sentences)) + print(len(val_sentences)) + print(len(test_sentences)) + + + return train_sentences, val_sentences, test_sentences, train_labels, val_labels, test_labels + + \ No newline at end of file diff --git a/src/reference_code/visualization.py b/src/reference_code/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..1b65b541b0883f7d8548e4e824d55bcf8055cc81 --- /dev/null +++ b/src/reference_code/visualization.py @@ -0,0 +1,78 @@ +import numpy as np +#import matplotlib as mpl +#mpl.use('Agg') +import matplotlib.pyplot as plt + +import metrics + +class ConfidenceHistogram(metrics.MaxProbCELoss): + + def plot(self, output, labels, n_bins = 15, logits = True, title = None): + super().loss(output, labels, n_bins, logits) + #scale each datapoint + n = len(labels) + w = np.ones(n)/n + + plt.rcParams["font.family"] = "serif" + #size and axis limits + plt.figure(figsize=(3,3)) + plt.xlim(0,1) + plt.ylim(0,1) + plt.xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0], ['0.0', '0.2', '0.4', '0.6', '0.8', '1.0']) + plt.yticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0], ['0.0', '0.2', '0.4', '0.6', '0.8', '1.0']) + #plot grid + plt.grid(color='tab:grey', linestyle=(0, (1, 5)), linewidth=1,zorder=0) + #plot histogram + plt.hist(self.confidences,n_bins,weights = w,color='b',range=(0.0,1.0),edgecolor = 'k') + + #plot vertical dashed lines + acc = np.mean(self.accuracies) + conf = np.mean(self.confidences) + plt.axvline(x=acc, color='tab:grey', linestyle='--', linewidth = 3) + plt.axvline(x=conf, color='tab:grey', linestyle='--', linewidth = 3) + if acc > conf: + plt.text(acc+0.03,0.9,'Accuracy',rotation=90,fontsize=11) + plt.text(conf-0.07,0.9,'Avg. Confidence',rotation=90, fontsize=11) + else: + plt.text(acc-0.07,0.9,'Accuracy',rotation=90,fontsize=11) + plt.text(conf+0.03,0.9,'Avg. Confidence',rotation=90, fontsize=11) + + plt.ylabel('% of Samples',fontsize=13) + plt.xlabel('Confidence',fontsize=13) + plt.tight_layout() + if title is not None: + plt.title(title,fontsize=16) + return plt + +class ReliabilityDiagram(metrics.MaxProbCELoss): + + def plot(self, output, labels, n_bins = 15, logits = True, title = None): + super().loss(output, labels, n_bins, logits) + + #computations + delta = 1.0/n_bins + x = np.arange(0,1,delta) + mid = np.linspace(delta/2,1-delta/2,n_bins) + error = np.abs(np.subtract(mid,self.bin_acc)) + + plt.rcParams["font.family"] = "serif" + #size and axis limits + plt.figure(figsize=(3,3)) + plt.xlim(0,1) + plt.ylim(0,1) + #plot grid + plt.grid(color='tab:grey', linestyle=(0, (1, 5)), linewidth=1,zorder=0) + #plot bars and identity line + plt.bar(x, self.bin_acc, color = 'b', width=delta,align='edge',edgecolor = 'k',label='Outputs',zorder=5) + plt.bar(x, error, bottom=np.minimum(self.bin_acc,mid), color = 'mistyrose', alpha=0.5, width=delta,align='edge',edgecolor = 'r',hatch='/',label='Gap',zorder=10) + ident = [0.0, 1.0] + plt.plot(ident,ident,linestyle='--',color='tab:grey',zorder=15) + #labels and legend + plt.ylabel('Accuracy',fontsize=13) + plt.xlabel('Confidence',fontsize=13) + plt.legend(loc='upper left',framealpha=1.0,fontsize='medium') + if title is not None: + plt.title(title,fontsize=16) + plt.tight_layout() + + return plt diff --git a/src/req_changes.py b/src/req_changes.py new file mode 100644 index 0000000000000000000000000000000000000000..2a3c5e31026b33b97d9340c29c25f4fe30da97e2 --- /dev/null +++ b/src/req_changes.py @@ -0,0 +1,17 @@ +import yaml + +# Load the environment.yml file +with open('src/environment.yml') as file: + env_data = yaml.safe_load(file) + +# Open the requirements.txt file for writing +with open('src/requirements.txt', 'w') as req_file: + # Iterate over the dependencies + for dep in env_data['dependencies']: + if isinstance(dep, str): + # Write conda package as is (optional: convert to pip package name if known) + req_file.write(dep + '\n') + elif isinstance(dep, dict) and 'pip' in dep: + # Write pip packages as is + for pip_dep in dep['pip']: + req_file.write(pip_dep + '\n') diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..55c7bc7c473d6437dc472b78d072b331e6d12e1e --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b630fe650f761dd962091bf49f4e95cb855eb917c8376070879bd10053575130 +size 123 diff --git a/src/seq_model.py b/src/seq_model.py new file mode 100644 index 0000000000000000000000000000000000000000..49516a87a643cb959876ef5fe096e4f4df22f7e5 --- /dev/null +++ b/src/seq_model.py @@ -0,0 +1,43 @@ +import torch.nn as nn + +from .bert import BERT + + +class BERTSM(nn.Module): + """ + BERT Sequence Model + Masked Sequence Model + """ + + def __init__(self, bert: BERT, vocab_size): + """ + :param bert: BERT model which should be trained + :param vocab_size: total vocab size for masked_lm + """ + + super().__init__() + self.bert = bert + self.mask_lm = MaskedSequenceModel(self.bert.hidden, vocab_size) + + def forward(self, x, segment_label): + x = self.bert(x, segment_label) + return self.mask_lm(x), x[:, 0] + + +class MaskedSequenceModel(nn.Module): + """ + predicting origin token from masked input sequence + n-class classification problem, n-class = vocab_size + """ + + def __init__(self, hidden, vocab_size): + """ + :param hidden: output size of BERT model + :param vocab_size: total vocab size + """ + super().__init__() + self.linear = nn.Linear(hidden, vocab_size) + self.softmax = nn.LogSoftmax(dim=-1) + + def forward(self, x): + return self.softmax(self.linear(x)) \ No newline at end of file diff --git a/src/test.py b/src/test.py new file mode 100644 index 0000000000000000000000000000000000000000..943f31142414994f2efee2ecf2d51f60638b7674 --- /dev/null +++ b/src/test.py @@ -0,0 +1,493 @@ +import torch +from torch import nn, optim +from torch.nn import functional as F +from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler +import numpy as np +from keras.preprocessing.sequence import pad_sequences +from transformers import BertTokenizer +from transformers import BertForSequenceClassification +import random +from sklearn.metrics import f1_score +from utils import * +import os +import argparse + + + +import warnings +warnings.filterwarnings("ignore") + +class ModelWithTemperature(nn.Module): + """ + A thin decorator, which wraps a model with temperature scaling + model (nn.Module): + A classification neural network + NB: Output of the neural network should be the classification logits, + NOT the softmax (or log softmax)! + """ + def __init__(self, model): + super(ModelWithTemperature, self).__init__() + self.model = model + self.temperature = nn.Parameter(torch.ones(1) * 1.5) + + def forward(self, input_ids, token_type_ids, attention_mask): + logits = self.model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)[0] + return self.temperature_scale(logits) + + def temperature_scale(self, logits): + """ + Perform temperature scaling on logits + """ + # Expand temperature to match the size of logits + temperature = self.temperature.unsqueeze(1).expand(logits.size(0), logits.size(1)) + return logits / temperature + + # This function probably should live outside of this class, but whatever + def set_temperature(self, valid_loader, args): + """ + Tune the tempearature of the model (using the validation set). + We're going to set it to optimize NLL. + valid_loader (DataLoader): validation set loader + """ + nll_criterion = nn.CrossEntropyLoss() + ece_criterion = ECE().to(args.device) + + # First: collect all the logits and labels for the validation set + logits_list = [] + labels_list = [] + with torch.no_grad(): + for step, batch in enumerate(valid_loader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + logits = self.model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + logits_list.append(logits) + labels_list.append(b_labels) + logits = torch.cat(logits_list) + labels = torch.cat(labels_list) + + # Calculate NLL and ECE before temperature scaling + before_temperature_nll = nll_criterion(logits, labels).item() + before_temperature_ece = ece_criterion(logits, labels).item() + print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece)) + + # Next: optimize the temperature w.r.t. NLL + optimizer = optim.LBFGS([self.temperature], lr=0.01, max_iter=50) + + def eval(): + loss = nll_criterion(self.temperature_scale(logits), labels) + loss.backward() + return loss + optimizer.step(eval) + + # Calculate NLL and ECE after temperature scaling + after_temperature_nll = nll_criterion(self.temperature_scale(logits), labels).item() + after_temperature_ece = ece_criterion(self.temperature_scale(logits), labels).item() + print('Optimal temperature: %.3f' % self.temperature.item()) + print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece)) + + return self + +class ECE(nn.Module): + + def __init__(self, n_bins=15): + """ + n_bins (int): number of confidence interval bins + """ + super(ECE, self).__init__() + bin_boundaries = torch.linspace(0, 1, n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + def forward(self, logits, labels): + softmaxes = F.softmax(logits, dim=1) + confidences, predictions = torch.max(softmaxes, 1) + accuracies = predictions.eq(labels) + + ece = torch.zeros(1, device=logits.device) + for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): + # Calculated |confidence - accuracy| in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + accuracy_in_bin = accuracies[in_bin].float().mean() + avg_confidence_in_bin = confidences[in_bin].mean() + ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + + return ece + + +class ECE_v2(nn.Module): + def __init__(self, n_bins=15): + """ + n_bins (int): number of confidence interval bins + """ + super(ECE_v2, self).__init__() + bin_boundaries = torch.linspace(0, 1, n_bins + 1) + self.bin_lowers = bin_boundaries[:-1] + self.bin_uppers = bin_boundaries[1:] + + def forward(self, softmaxes, labels): + confidences, predictions = torch.max(softmaxes, 1) + accuracies = predictions.eq(labels) + ece = torch.zeros(1, device=softmaxes.device) + + for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): + # Calculated |confidence - accuracy| in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + accuracy_in_bin = accuracies[in_bin].float().mean() + avg_confidence_in_bin = confidences[in_bin].mean() + ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + return ece + +def accurate_nb(preds, labels): + pred_flat = np.argmax(preds, axis=1).flatten() + labels_flat = labels.flatten() + return np.sum(pred_flat == labels_flat) + + +def set_seed(args): + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + +def apply_dropout(m): + if type(m) == nn.Dropout: + m.train() + + +def main(): + + parser = argparse.ArgumentParser(description='Test code - measure the detection peformance') + parser.add_argument('--eva_iter', default=1, type=int, help='number of passes for mc-dropout when evaluation') + parser.add_argument('--model', type=str, choices=['base', 'manifold-smoothing', 'mc-dropout','temperature'], default='base') + parser.add_argument('--seed', type=int, default=0, help='random seed for test') + parser.add_argument("--epochs", default=10, type=int, help="Number of epochs for training.") + parser.add_argument('--index', type=int, default=0, help='random seed you used during training') + parser.add_argument('--in_dataset', required=True, help='target dataset: 20news') + parser.add_argument('--out_dataset', required=True, help='out-of-dist dataset') + parser.add_argument('--eval_batch_size', type=int, default=32) + parser.add_argument('--saved_dataset', type=str, default='n') + parser.add_argument('--eps_out', default=0.001, type=float, help="Perturbation size of out-of-domain adversarial training") + parser.add_argument("--eps_y", default=0.1, type=float, help="Perturbation size of label") + parser.add_argument('--eps_in', default=0.0001, type=float, help="Perturbation size of in-domain adversarial training") + + args = parser.parse_args() + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + args.device = device + set_seed(args) + + outf = 'test/'+args.model+'-'+str(args.index) + if not os.path.isdir(outf): + os.makedirs(outf) + + if args.model == 'base': + dirname = '{}/BERT-base-{}'.format(args.in_dataset, args.index) + pretrained_dir = './model_save/{}'.format(dirname) + # Load a trained model and vocabulary that you have fine-tuned + model = BertForSequenceClassification.from_pretrained(pretrained_dir) + model.to(args.device) + print('Load Tekenizer') + + elif args.model == 'mc-dropout': + dirname = '{}/BERT-base-{}'.format(args.in_dataset, args.index) + pretrained_dir = './model_save/{}'.format(dirname) + # Load a trained model and vocabulary that you have fine-tuned + model = BertForSequenceClassification.from_pretrained(pretrained_dir) + model.to(args.device) + + elif args.model == 'temperature': + dirname = '{}/BERT-base-{}'.format(args.in_dataset, args.index) + pretrained_dir = './model_save/{}'.format(dirname) + orig_model = BertForSequenceClassification.from_pretrained(pretrained_dir) + orig_model.to(args.device) + model = ModelWithTemperature(orig_model) + model.to(args.device) + + elif args.model == 'manifold-smoothing': + dirname = '{}/BERT-mf-{}-{}-{}-{}'.format(args.in_dataset, args.index, args.eps_in, args.eps_y, args.eps_out) + print(dirname) + pretrained_dir = './model_save/{}'.format(dirname) + model = BertForSequenceClassification.from_pretrained(pretrained_dir) + model.to(args.device) + + + if args.saved_dataset == 'n': + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + train_sentences, val_sentences, test_sentences, train_labels, val_labels, test_labels = load_dataset(args.in_dataset) + _, _, nt_test_sentences, _, _, nt_test_labels = load_dataset(args.out_dataset) + + val_input_ids = [] + test_input_ids = [] + nt_test_input_ids = [] + + if args.in_dataset == '20news' or args.in_dataset == '20news-15': + MAX_LEN = 150 + else: + MAX_LEN = 256 + + for sent in val_sentences: + encoded_sent = tokenizer.encode( + sent, # Sentence to encode. + add_special_tokens = True, # Add '[CLS]' and '[SEP]' + truncation= True, + max_length = MAX_LEN, # Truncate all sentences. + #return_tensors = 'pt', # Return pytorch tensors. + ) + # Add the encoded sentence to the list. + val_input_ids.append(encoded_sent) + + + for sent in test_sentences: + encoded_sent = tokenizer.encode( + sent, # Sentence to encode. + add_special_tokens = True, # Add '[CLS]' and '[SEP]' + truncation= True, + max_length = MAX_LEN, # Truncate all sentences. + #return_tensors = 'pt', # Return pytorch tensors. + ) + # Add the encoded sentence to the list. + test_input_ids.append(encoded_sent) + + for sent in nt_test_sentences: + encoded_sent = tokenizer.encode( + sent, + add_special_tokens = True, + truncation= True, + max_length = MAX_LEN, + ) + nt_test_input_ids.append(encoded_sent) + + # Pad our input tokens + val_input_ids = pad_sequences(val_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + test_input_ids = pad_sequences(test_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + nt_test_input_ids = pad_sequences(nt_test_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post") + + val_attention_masks = [] + test_attention_masks = [] + nt_test_attention_masks = [] + + for seq in val_input_ids: + seq_mask = [float(i>0) for i in seq] + val_attention_masks.append(seq_mask) + for seq in test_input_ids: + seq_mask = [float(i>0) for i in seq] + test_attention_masks.append(seq_mask) + for seq in nt_test_input_ids: + seq_mask = [float(i>0) for i in seq] + nt_test_attention_masks.append(seq_mask) + + + val_inputs = torch.tensor(val_input_ids) + val_labels = torch.tensor(val_labels) + val_masks = torch.tensor(val_attention_masks) + + test_inputs = torch.tensor(test_input_ids) + test_labels = torch.tensor(test_labels) + test_masks = torch.tensor(test_attention_masks) + + nt_test_inputs = torch.tensor(nt_test_input_ids) + nt_test_labels = torch.tensor(nt_test_labels) + nt_test_masks = torch.tensor(nt_test_attention_masks) + + val_data = TensorDataset(val_inputs, val_masks, val_labels) + test_data = TensorDataset(test_inputs, test_masks, test_labels) + nt_test_data = TensorDataset(nt_test_inputs, nt_test_masks, nt_test_labels) + + dataset_dir = 'dataset/test' + if not os.path.exists(dataset_dir): + os.makedirs(dataset_dir) + torch.save(val_data, dataset_dir+'/{}_val_in_domain.pt'.format(args.in_dataset)) + torch.save(test_data, dataset_dir+'/{}_test_in_domain.pt'.format(args.in_dataset)) + torch.save(nt_test_data, dataset_dir+'/{}_test_out_of_domain.pt'.format(args.out_dataset)) + + else: + dataset_dir = 'dataset/test' + val_data = torch.load(dataset_dir+'/{}_val_in_domain.pt'.format(args.in_dataset)) + test_data = torch.load(dataset_dir+'/{}_test_in_domain.pt'.format(args.in_dataset)) + nt_test_data = torch.load(dataset_dir+'/{}_test_out_of_domain.pt'.format(args.out_dataset)) + + + + + +######## saved dataset + test_sampler = SequentialSampler(test_data) + test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=args.eval_batch_size) + + nt_test_sampler = SequentialSampler(nt_test_data) + nt_test_dataloader = DataLoader(nt_test_data, sampler=nt_test_sampler, batch_size=args.eval_batch_size) + val_sampler = SequentialSampler(val_data) + val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=args.eval_batch_size) + + if args.model == 'temperature': + model.set_temperature(val_dataloader, args) + + model.eval() + + if args.model == 'mc-dropout': + model.apply(apply_dropout) + + correct = 0 + total = 0 + output_list = [] + labels_list = [] + +##### validation dat + with torch.no_grad(): + for step, batch in enumerate(val_dataloader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + total += b_labels.shape[0] + batch_output = 0 + for j in range(args.eva_iter): + if args.model == 'temperature': + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask) #logits + else: + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] #logits + batch_output = batch_output + F.softmax(current_batch, dim=1) + batch_output = batch_output/args.eva_iter + output_list.append(batch_output) + labels_list.append(b_labels) + score, predicted = batch_output.max(1) + correct += predicted.eq(b_labels).sum().item() + + ###calculate accuracy and ECE + val_eval_accuracy = correct/total + print("Val Accuracy: {}".format(val_eval_accuracy)) + ece_criterion = ECE_v2().to(args.device) + softmaxes_ece = torch.cat(output_list) + labels_ece = torch.cat(labels_list) + val_ece = ece_criterion(softmaxes_ece, labels_ece).item() + print('ECE on Val data: {}'.format(val_ece)) + +#### Test data + correct = 0 + total = 0 + output_list = [] + labels_list = [] + predict_list = [] + true_list = [] + true_list_ood = [] + predict_mis = [] + predict_in = [] + score_list = [] + correct_index_all = [] + ## test on in-distribution test set + with torch.no_grad(): + for step, batch in enumerate(test_dataloader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + total += b_labels.shape[0] + batch_output = 0 + for j in range(args.eva_iter): + if args.model == 'temperature': + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask) #logits + else: + current_batch = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] #logits + batch_output = batch_output + F.softmax(current_batch, dim=1) + batch_output = batch_output/args.eva_iter + output_list.append(batch_output) + labels_list.append(b_labels) + score, predicted = batch_output.max(1) + + correct += predicted.eq(b_labels).sum().item() + + correct_index = (predicted == b_labels) + correct_index_all.append(correct_index) + score_list.append(score) + + ###calcutae accuracy + eval_accuracy = correct/total + print("Test Accuracy: {}".format(eval_accuracy)) + + ##calculate ece + ece_criterion = ECE_v2().to(args.device) + softmaxes_ece = torch.cat(output_list) + labels_ece = torch.cat(labels_list) + ece = ece_criterion(softmaxes_ece, labels_ece).item() + print('ECE on Test data: {}'.format(ece)) + + #confidence for in-distribution data + score_in_array = torch.cat(score_list) + #indices of data that are classified correctly + correct_array = torch.cat(correct_index_all) + label_array = torch.cat(labels_list) + +### test on out-of-distribution data + predict_ood = [] + score_ood_list = [] + true_list_ood = [] + with torch.no_grad(): + for step, batch in enumerate(nt_test_dataloader): + batch = tuple(t.to(args.device) for t in batch) + b_input_ids, b_input_mask, b_labels = batch + batch_output = 0 + for j in range(args.eva_iter): + if args.model == 'temperature': + current_batch = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask) + else: + current_batch = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] + batch_output = batch_output + F.softmax(current_batch, dim=1) + batch_output = batch_output/args.eva_iter + score_out, _ = batch_output.max(1) + + score_ood_list.append(score_out) + + score_ood_array = torch.cat(score_ood_list) + + + + label_array = label_array.cpu().numpy() + score_ood_array = score_ood_array.cpu().numpy() + score_in_array = score_in_array.cpu().numpy() + correct_array = correct_array.cpu().numpy() + + + + + ####### calculate NBAUCC for detection task + predict_o = np.zeros(len(score_in_array)+len(score_ood_array)) + true_o = np.ones(len(score_in_array)+len(score_ood_array)) + true_o[:len(score_in_array)] = 0 ## in-distribution data as false, ood data as positive + true_mis = np.ones(len(score_in_array)) + true_mis[correct_array] = 0 ##true instances as false, misclassified instances as positive + predict_mis = np.zeros(len(score_in_array)) + + + + ood_sum = 0 + mis_sum = 0 + + ood_sum_list = [] + mis_sum_list = [] + +#### upper bound of the threshold tau for NBAUCC + stop_points = [0.50, 1.] + + for threshold in np.arange(0., 1.01, 0.02): + predict_ood_index1 = (score_in_array < threshold) + predict_ood_index2 = (score_ood_array < threshold) + predict_ood_index = np.concatenate((predict_ood_index1, predict_ood_index2), axis=0) + predict_o[predict_ood_index] = 1 + predict_mis[score_in_array