Spaces:
Sleeping
Sleeping
Commit
·
8dac844
1
Parent(s):
887060c
Add other files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .DS_Store +0 -0
- CustomBERTModel.py +33 -0
- Untitled.ipynb +0 -0
- __pycache__/metrics.cpython-312.pyc +0 -0
- __pycache__/recalibration.cpython-312.pyc +0 -0
- __pycache__/visualization.cpython-312.pyc +0 -0
- data_preprocessor.py +170 -0
- hint_fine_tuning.py +382 -0
- main.py +322 -0
- metrics.py +149 -0
- new_fine_tuning/.DS_Store +0 -0
- new_fine_tuning/README.md +197 -0
- new_fine_tuning/__pycache__/metrics.cpython-312.pyc +0 -0
- new_fine_tuning/__pycache__/recalibration.cpython-312.pyc +0 -0
- new_fine_tuning/__pycache__/visualization.cpython-312.pyc +0 -0
- new_hint_fine_tuned.py +131 -0
- new_test_saved_finetuned_model.py +613 -0
- plot.png +0 -0
- prepare_pretraining_input_vocab_file.py +0 -0
- ratio_proportion_change3/finetuning/test.txt +3 -0
- ratio_proportion_change3/finetuning/test_in.txt +3 -0
- ratio_proportion_change3/finetuning/test_in_info.txt +3 -0
- ratio_proportion_change3/finetuning/test_in_label.txt +3 -0
- ratio_proportion_change3/finetuning/test_label.txt +3 -0
- ratio_proportion_change3/finetuning/testr_in_label.txt +3 -0
- ratio_proportion_change3/finetuning/testr_label.txt +3 -0
- ratio_proportion_change3/finetuning/train.txt +3 -0
- ratio_proportion_change3/finetuning/train_in.txt +3 -0
- ratio_proportion_change3/finetuning/train_in_info.txt +3 -0
- ratio_proportion_change3/finetuning/train_in_label.txt +3 -0
- ratio_proportion_change3/finetuning/train_info.txt +3 -0
- ratio_proportion_change3/finetuning/train_label.txt +3 -0
- ratio_proportion_change3/finetuning/trainr_in_label.txt +3 -0
- ratio_proportion_change3/finetuning/trainr_label.txt +3 -0
- ratio_proportion_change3/logs/masked/log_test_10per_finetuned.txt +3 -0
- ratio_proportion_change3/logs/masked/log_test_FS_finetuned.txt +3 -0
- ratio_proportion_change3/logs/masked/log_test_IS_finetuned.txt +3 -0
- ratio_proportion_change3/logs/masked/log_test_pretrained.txt +3 -0
- ratio_proportion_change3/logs/masked/log_train_10per_finetuned.txt +3 -0
- ratio_proportion_change3/logs/masked/log_train_FS_finetuned.txt +3 -0
- ratio_proportion_change3/logs/masked/log_train_IS_finetuned.txt +3 -0
- ratio_proportion_change3/logs/masked/log_train_pretrained.txt +3 -0
- ratio_proportion_change3/output/FS/train.txt +3 -0
- ratio_proportion_change3/output/FS/train_label.txt +3 -0
- ratio_proportion_change3/output/IS/train.txt +3 -0
- ratio_proportion_change3/output/IS/train_label.txt +3 -0
- ratio_proportion_change3/output/correctness/bert_fine_tuned.model.ep48 +0 -0
- ratio_proportion_change3/output/correctness/test.txt +3 -0
- ratio_proportion_change3/output/correctness/test_label.txt +3 -0
- ratio_proportion_change3/output/effectiveness/bert_fine_tuned.model.ep28 +0 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
CustomBERTModel.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from src.bert import BERT
|
4 |
+
|
5 |
+
class CustomBERTModel(nn.Module):
|
6 |
+
def __init__(self, vocab_size, output_dim, pre_trained_model_path):
|
7 |
+
super(CustomBERTModel, self).__init__()
|
8 |
+
hidden_size = 768
|
9 |
+
self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=4, attn_heads=8, dropout=0.1)
|
10 |
+
|
11 |
+
# Load the pre-trained model's state_dict
|
12 |
+
checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
|
13 |
+
if isinstance(checkpoint, dict):
|
14 |
+
self.bert.load_state_dict(checkpoint)
|
15 |
+
else:
|
16 |
+
raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.")
|
17 |
+
|
18 |
+
# Fully connected layer with input size 768 (matching BERT hidden size)
|
19 |
+
self.fc = nn.Linear(hidden_size, output_dim)
|
20 |
+
|
21 |
+
def forward(self, sequence, segment_info):
|
22 |
+
sequence = sequence.to(next(self.parameters()).device)
|
23 |
+
segment_info = segment_info.to(sequence.device)
|
24 |
+
|
25 |
+
x = self.bert(sequence, segment_info)
|
26 |
+
print(f"BERT output shape: {x.shape}")
|
27 |
+
|
28 |
+
cls_embeddings = x[:, 0] # Extract CLS token embeddings
|
29 |
+
print(f"CLS Embeddings shape: {cls_embeddings.shape}")
|
30 |
+
|
31 |
+
logits = self.fc(cls_embeddings) # Pass tensor of size (batch_size, 768) to the fully connected layer
|
32 |
+
|
33 |
+
return logits
|
Untitled.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
__pycache__/metrics.cpython-312.pyc
ADDED
Binary file (9.14 kB). View file
|
|
__pycache__/recalibration.cpython-312.pyc
ADDED
Binary file (5.49 kB). View file
|
|
__pycache__/visualization.cpython-312.pyc
ADDED
Binary file (5.26 kB). View file
|
|
data_preprocessor.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
import sys
|
5 |
+
|
6 |
+
class DataPreprocessor:
|
7 |
+
def __init__(self, input_file_path):
|
8 |
+
self.input_file_path = input_file_path
|
9 |
+
self.unique_students = None
|
10 |
+
self.unique_problems = None
|
11 |
+
self.unique_prob_hierarchy = None
|
12 |
+
self.unique_steps = None
|
13 |
+
self.unique_kcs = None
|
14 |
+
|
15 |
+
def analyze_dataset(self):
|
16 |
+
file_iterator = self.load_file_iterator()
|
17 |
+
|
18 |
+
start_time = time.time()
|
19 |
+
self.unique_students = {"st"}
|
20 |
+
self.unique_problems = {"pr"}
|
21 |
+
self.unique_prob_hierarchy = {"ph"}
|
22 |
+
self.unique_kcs = {"kc"}
|
23 |
+
for chunk_data in file_iterator:
|
24 |
+
for student_id, std_groups in chunk_data.groupby('Anon Student Id'):
|
25 |
+
self.unique_students.update({student_id})
|
26 |
+
prob_hierarchy = std_groups.groupby('Level (Workspace Id)')
|
27 |
+
for hierarchy, hierarchy_groups in prob_hierarchy:
|
28 |
+
self.unique_prob_hierarchy.update({hierarchy})
|
29 |
+
prob_name = hierarchy_groups.groupby('Problem Name')
|
30 |
+
for problem_name, prob_name_groups in prob_name:
|
31 |
+
self.unique_problems.update({problem_name})
|
32 |
+
sub_skills = prob_name_groups['KC Model(MATHia)']
|
33 |
+
for a in sub_skills:
|
34 |
+
if str(a) != "nan":
|
35 |
+
temp = a.split("~~")
|
36 |
+
for kc in temp:
|
37 |
+
self.unique_kcs.update({kc})
|
38 |
+
self.unique_students.remove("st")
|
39 |
+
self.unique_problems.remove("pr")
|
40 |
+
self.unique_prob_hierarchy.remove("ph")
|
41 |
+
self.unique_kcs.remove("kc")
|
42 |
+
end_time = time.time()
|
43 |
+
print("Time Taken to analyze dataset = ", end_time - start_time)
|
44 |
+
print("Length of unique students->", len(self.unique_students))
|
45 |
+
print("Length of unique problems->", len(self.unique_problems))
|
46 |
+
print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy))
|
47 |
+
print("Length of Unique Knowledge components ->", len(self.unique_kcs))
|
48 |
+
|
49 |
+
def analyze_dataset_by_section(self, workspace_name):
|
50 |
+
file_iterator = self.load_file_iterator()
|
51 |
+
|
52 |
+
start_time = time.time()
|
53 |
+
self.unique_students = {"st"}
|
54 |
+
self.unique_problems = {"pr"}
|
55 |
+
self.unique_prob_hierarchy = {"ph"}
|
56 |
+
self.unique_steps = {"s"}
|
57 |
+
self.unique_kcs = {"kc"}
|
58 |
+
# with open("workspace_info.txt", 'a') as f:
|
59 |
+
# sys.stdout = f
|
60 |
+
for chunk_data in file_iterator:
|
61 |
+
for student_id, std_groups in chunk_data.groupby('Anon Student Id'):
|
62 |
+
prob_hierarchy = std_groups.groupby('Level (Workspace Id)')
|
63 |
+
for hierarchy, hierarchy_groups in prob_hierarchy:
|
64 |
+
if workspace_name == hierarchy:
|
65 |
+
# print("Workspace : ", hierarchy)
|
66 |
+
self.unique_students.update({student_id})
|
67 |
+
self.unique_prob_hierarchy.update({hierarchy})
|
68 |
+
prob_name = hierarchy_groups.groupby('Problem Name')
|
69 |
+
for problem_name, prob_name_groups in prob_name:
|
70 |
+
self.unique_problems.update({problem_name})
|
71 |
+
step_names = prob_name_groups['Step Name']
|
72 |
+
sub_skills = prob_name_groups['KC Model(MATHia)']
|
73 |
+
for step in step_names:
|
74 |
+
if str(step) != "nan":
|
75 |
+
self.unique_steps.update({step})
|
76 |
+
for a in sub_skills:
|
77 |
+
if str(a) != "nan":
|
78 |
+
temp = a.split("~~")
|
79 |
+
for kc in temp:
|
80 |
+
self.unique_kcs.update({kc})
|
81 |
+
self.unique_problems.remove("pr")
|
82 |
+
self.unique_prob_hierarchy.remove("ph")
|
83 |
+
self.unique_steps.remove("s")
|
84 |
+
self.unique_kcs.remove("kc")
|
85 |
+
end_time = time.time()
|
86 |
+
print("Time Taken to analyze dataset = ", end_time - start_time)
|
87 |
+
print("Workspace-> ",workspace_name)
|
88 |
+
print("Length of unique students->", len(self.unique_students))
|
89 |
+
print("Length of unique problems->", len(self.unique_problems))
|
90 |
+
print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy))
|
91 |
+
print("Length of unique step names ->", len(self.unique_steps))
|
92 |
+
print("Length of unique knowledge components ->", len(self.unique_kcs))
|
93 |
+
# f.close()
|
94 |
+
# sys.stdout = sys.__stdout__
|
95 |
+
|
96 |
+
def analyze_dataset_by_school(self, workspace_name, school_id=None):
|
97 |
+
file_iterator = self.load_file_iterator(sep=",")
|
98 |
+
|
99 |
+
start_time = time.time()
|
100 |
+
self.unique_schools = set()
|
101 |
+
self.unique_class = set()
|
102 |
+
self.unique_students = set()
|
103 |
+
self.unique_problems = set()
|
104 |
+
self.unique_steps = set()
|
105 |
+
self.unique_kcs = set()
|
106 |
+
self.unique_actions = set()
|
107 |
+
self.unique_outcomes = set()
|
108 |
+
self.unique_new_steps_w_action_attempt = set()
|
109 |
+
self.unique_new_steps_w_kcs = set()
|
110 |
+
self.unique_new_steps_w_action_attempt_kcs = set()
|
111 |
+
|
112 |
+
for chunk_data in file_iterator:
|
113 |
+
for school, school_group in chunk_data.groupby('CF (Anon School Id)'):
|
114 |
+
# if school and school == school_id:
|
115 |
+
self.unique_schools.add(school)
|
116 |
+
for class_id, class_group in school_group.groupby('CF (Anon Class Id)'):
|
117 |
+
self.unique_class.add(class_id)
|
118 |
+
for student_id, std_group in class_group.groupby('Anon Student Id'):
|
119 |
+
self.unique_students.add(student_id)
|
120 |
+
for prob, prob_group in std_group.groupby('Problem Name'):
|
121 |
+
self.unique_problems.add(prob)
|
122 |
+
|
123 |
+
step_names = set(prob_group['Step Name'])
|
124 |
+
sub_skills = set(prob_group['KC Model(MATHia)'])
|
125 |
+
actions = set(prob_group['Action'])
|
126 |
+
outcomes = set(prob_group['Outcome'])
|
127 |
+
|
128 |
+
self.unique_steps.update(step_names)
|
129 |
+
self.unique_kcs.update(sub_skills)
|
130 |
+
self.unique_actions.update(actions)
|
131 |
+
self.unique_outcomes.update(outcomes)
|
132 |
+
|
133 |
+
for step in step_names:
|
134 |
+
if pd.isna(step):
|
135 |
+
step_group = prob_group[pd.isna(prob_group['Step Name'])]
|
136 |
+
else:
|
137 |
+
step_group = prob_group[prob_group['Step Name']==step]
|
138 |
+
|
139 |
+
for kc in set(step_group['KC Model(MATHia)']):
|
140 |
+
new_step = f"{step}:{kc}"
|
141 |
+
self.unique_new_steps_w_kcs.add(new_step)
|
142 |
+
|
143 |
+
for action, action_group in step_group.groupby('Action'):
|
144 |
+
for attempt, attempt_group in action_group.groupby('Attempt At Step'):
|
145 |
+
new_step = f"{step}:{action}:{attempt}"
|
146 |
+
self.unique_new_steps_w_action_attempt.add(new_step)
|
147 |
+
|
148 |
+
for kc in set(attempt_group["KC Model(MATHia)"]):
|
149 |
+
new_step = f"{step}:{action}:{attempt}:{kc}"
|
150 |
+
self.unique_new_steps_w_action_attempt_kcs.add(new_step)
|
151 |
+
|
152 |
+
|
153 |
+
end_time = time.time()
|
154 |
+
print("Time Taken to analyze dataset = ", end_time - start_time)
|
155 |
+
print("Workspace-> ",workspace_name)
|
156 |
+
print("Length of unique students->", len(self.unique_students))
|
157 |
+
print("Length of unique problems->", len(self.unique_problems))
|
158 |
+
print("Length of unique classes->", len(self.unique_class))
|
159 |
+
print("Length of unique step names ->", len(self.unique_steps))
|
160 |
+
print("Length of unique knowledge components ->", len(self.unique_kcs))
|
161 |
+
print("Length of unique actions ->", len(self.unique_actions))
|
162 |
+
print("Length of unique outcomes ->", len(self.unique_outcomes))
|
163 |
+
print("Length of unique new step names with actions and attempts ->", len(self.unique_new_steps_w_action_attempt))
|
164 |
+
print("Length of unique new step names with actions, attempts and kcs ->", len(self.unique_new_steps_w_action_attempt_kcs))
|
165 |
+
print("Length of unique new step names with kcs ->", len(self.unique_new_steps_w_kcs))
|
166 |
+
|
167 |
+
def load_file_iterator(self, sep="\t"):
|
168 |
+
chunk_iterator = pd.read_csv(self.input_file_path, sep=sep, header=0, iterator=True, chunksize=1000000)
|
169 |
+
return chunk_iterator
|
170 |
+
|
hint_fine_tuning.py
ADDED
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
from torch.utils.data import DataLoader, random_split, TensorDataset
|
7 |
+
from src.dataset import TokenizerDataset
|
8 |
+
from src.bert import BERT
|
9 |
+
from src.pretrainer import BERTFineTuneTrainer1
|
10 |
+
from src.vocab import Vocab
|
11 |
+
import pandas as pd
|
12 |
+
|
13 |
+
|
14 |
+
# class CustomBERTModel(nn.Module):
|
15 |
+
# def __init__(self, vocab_size, output_dim, pre_trained_model_path):
|
16 |
+
# super(CustomBERTModel, self).__init__()
|
17 |
+
# hidden_size = 768
|
18 |
+
# self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=12, attn_heads=12, dropout=0.1)
|
19 |
+
# checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
|
20 |
+
# if isinstance(checkpoint, dict):
|
21 |
+
# self.bert.load_state_dict(checkpoint)
|
22 |
+
# elif isinstance(checkpoint, BERT):
|
23 |
+
# self.bert = checkpoint
|
24 |
+
# else:
|
25 |
+
# raise TypeError(f"Expected state_dict or BERT instance, got {type(checkpoint)} instead.")
|
26 |
+
# self.fc = nn.Linear(hidden_size, output_dim)
|
27 |
+
|
28 |
+
# def forward(self, sequence, segment_info):
|
29 |
+
# sequence = sequence.to(next(self.parameters()).device)
|
30 |
+
# segment_info = segment_info.to(sequence.device)
|
31 |
+
|
32 |
+
# if sequence.size(0) == 0 or sequence.size(1) == 0:
|
33 |
+
# raise ValueError("Input sequence tensor has 0 elements. Check data preprocessing.")
|
34 |
+
|
35 |
+
# x = self.bert(sequence, segment_info)
|
36 |
+
# print(f"BERT output shape: {x.shape}")
|
37 |
+
|
38 |
+
# if x.size(0) == 0 or x.size(1) == 0:
|
39 |
+
# raise ValueError("BERT output tensor has 0 elements. Check input dimensions.")
|
40 |
+
|
41 |
+
# cls_embeddings = x[:, 0]
|
42 |
+
# logits = self.fc(cls_embeddings)
|
43 |
+
# return logits
|
44 |
+
|
45 |
+
# class CustomBERTModel(nn.Module):
|
46 |
+
# def __init__(self, vocab_size, output_dim, pre_trained_model_path):
|
47 |
+
# super(CustomBERTModel, self).__init__()
|
48 |
+
# hidden_size = 764 # Ensure this is 768
|
49 |
+
# self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=12, attn_heads=12, dropout=0.1)
|
50 |
+
|
51 |
+
# # Load the pre-trained model's state_dict
|
52 |
+
# checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
|
53 |
+
# if isinstance(checkpoint, dict):
|
54 |
+
# self.bert.load_state_dict(checkpoint)
|
55 |
+
# else:
|
56 |
+
# raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.")
|
57 |
+
|
58 |
+
# # Fully connected layer with input size 768
|
59 |
+
# self.fc = nn.Linear(hidden_size, output_dim)
|
60 |
+
|
61 |
+
# def forward(self, sequence, segment_info):
|
62 |
+
# sequence = sequence.to(next(self.parameters()).device)
|
63 |
+
# segment_info = segment_info.to(sequence.device)
|
64 |
+
|
65 |
+
# x = self.bert(sequence, segment_info)
|
66 |
+
# print(f"BERT output shape: {x.shape}") # Should output (batch_size, seq_len, 768)
|
67 |
+
|
68 |
+
# cls_embeddings = x[:, 0] # Extract CLS token embeddings
|
69 |
+
# print(f"CLS Embeddings shape: {cls_embeddings.shape}") # Should output (batch_size, 768)
|
70 |
+
|
71 |
+
# logits = self.fc(cls_embeddings) # Should now pass a tensor of size (batch_size, 768) to `fc`
|
72 |
+
|
73 |
+
# return logits
|
74 |
+
|
75 |
+
|
76 |
+
# for test
|
77 |
+
class CustomBERTModel(nn.Module):
|
78 |
+
def __init__(self, vocab_size, output_dim, pre_trained_model_path):
|
79 |
+
super(CustomBERTModel, self).__init__()
|
80 |
+
self.hidden = 764 # Ensure this is defined correctly
|
81 |
+
self.bert = BERT(vocab_size=vocab_size, hidden=self.hidden, n_layers=12, attn_heads=12, dropout=0.1)
|
82 |
+
|
83 |
+
# Load the pre-trained model's state_dict
|
84 |
+
checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
|
85 |
+
if isinstance(checkpoint, dict):
|
86 |
+
self.bert.load_state_dict(checkpoint)
|
87 |
+
else:
|
88 |
+
raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.")
|
89 |
+
|
90 |
+
self.fc = nn.Linear(self.hidden, output_dim)
|
91 |
+
|
92 |
+
def forward(self, sequence, segment_info):
|
93 |
+
x = self.bert(sequence, segment_info)
|
94 |
+
cls_embeddings = x[:, 0] # Extract CLS token embeddings
|
95 |
+
logits = self.fc(cls_embeddings) # Pass to fully connected layer
|
96 |
+
return logits
|
97 |
+
|
98 |
+
def preprocess_labels(label_csv_path):
|
99 |
+
try:
|
100 |
+
labels_df = pd.read_csv(label_csv_path)
|
101 |
+
labels = labels_df['last_hint_class'].values.astype(int)
|
102 |
+
return torch.tensor(labels, dtype=torch.long)
|
103 |
+
except Exception as e:
|
104 |
+
print(f"Error reading dataset file: {e}")
|
105 |
+
return None
|
106 |
+
|
107 |
+
|
108 |
+
def preprocess_data(data_path, vocab, max_length=128):
|
109 |
+
try:
|
110 |
+
with open(data_path, 'r') as f:
|
111 |
+
sequences = f.readlines()
|
112 |
+
except Exception as e:
|
113 |
+
print(f"Error reading data file: {e}")
|
114 |
+
return None, None
|
115 |
+
|
116 |
+
if len(sequences) == 0:
|
117 |
+
raise ValueError(f"No sequences found in data file {data_path}. Check the file content.")
|
118 |
+
|
119 |
+
tokenized_sequences = []
|
120 |
+
|
121 |
+
for sequence in sequences:
|
122 |
+
sequence = sequence.strip()
|
123 |
+
if sequence:
|
124 |
+
encoded = vocab.to_seq(sequence, seq_len=max_length)
|
125 |
+
encoded = encoded[:max_length] + [vocab.vocab.get('[PAD]', 0)] * (max_length - len(encoded))
|
126 |
+
segment_label = [0] * max_length
|
127 |
+
|
128 |
+
tokenized_sequences.append({
|
129 |
+
'input_ids': torch.tensor(encoded),
|
130 |
+
'segment_label': torch.tensor(segment_label)
|
131 |
+
})
|
132 |
+
|
133 |
+
if not tokenized_sequences:
|
134 |
+
raise ValueError("Tokenization resulted in an empty list. Check the sequences and tokenization logic.")
|
135 |
+
|
136 |
+
tokenized_sequences = [t for t in tokenized_sequences if len(t['input_ids']) == max_length]
|
137 |
+
|
138 |
+
if not tokenized_sequences:
|
139 |
+
raise ValueError("All tokenized sequences are of unexpected length. This suggests an issue with the tokenization logic.")
|
140 |
+
|
141 |
+
input_ids = torch.cat([t['input_ids'].unsqueeze(0) for t in tokenized_sequences], dim=0)
|
142 |
+
segment_labels = torch.cat([t['segment_label'].unsqueeze(0) for t in tokenized_sequences], dim=0)
|
143 |
+
|
144 |
+
print(f"Input IDs shape: {input_ids.shape}")
|
145 |
+
print(f"Segment labels shape: {segment_labels.shape}")
|
146 |
+
|
147 |
+
return input_ids, segment_labels
|
148 |
+
|
149 |
+
|
150 |
+
def collate_fn(batch):
|
151 |
+
inputs = []
|
152 |
+
labels = []
|
153 |
+
segment_labels = []
|
154 |
+
|
155 |
+
for item in batch:
|
156 |
+
if item is None:
|
157 |
+
continue
|
158 |
+
|
159 |
+
if isinstance(item, dict):
|
160 |
+
inputs.append(item['input_ids'].unsqueeze(0))
|
161 |
+
labels.append(item['label'].unsqueeze(0))
|
162 |
+
segment_labels.append(item['segment_label'].unsqueeze(0))
|
163 |
+
|
164 |
+
if len(inputs) == 0 or len(segment_labels) == 0:
|
165 |
+
print("Empty batch encountered. Returning None to skip this batch.")
|
166 |
+
return None
|
167 |
+
|
168 |
+
try:
|
169 |
+
inputs = torch.cat(inputs, dim=0)
|
170 |
+
labels = torch.cat(labels, dim=0)
|
171 |
+
segment_labels = torch.cat(segment_labels, dim=0)
|
172 |
+
except Exception as e:
|
173 |
+
print(f"Error concatenating tensors: {e}")
|
174 |
+
return None
|
175 |
+
|
176 |
+
return {
|
177 |
+
'input': inputs,
|
178 |
+
'label': labels,
|
179 |
+
'segment_label': segment_labels
|
180 |
+
}
|
181 |
+
|
182 |
+
def custom_collate_fn(batch):
|
183 |
+
processed_batch = collate_fn(batch)
|
184 |
+
|
185 |
+
if processed_batch is None or len(processed_batch['input']) == 0:
|
186 |
+
# Return a valid batch with at least one element instead of an empty one
|
187 |
+
return {
|
188 |
+
'input': torch.zeros((1, 128), dtype=torch.long),
|
189 |
+
'label': torch.zeros((1,), dtype=torch.long),
|
190 |
+
'segment_label': torch.zeros((1, 128), dtype=torch.long)
|
191 |
+
}
|
192 |
+
|
193 |
+
return processed_batch
|
194 |
+
|
195 |
+
|
196 |
+
def train_without_progress_status(trainer, epoch, shuffle):
|
197 |
+
for epoch_idx in range(epoch):
|
198 |
+
print(f"EP_train:{epoch_idx}:")
|
199 |
+
for batch in trainer.train_data:
|
200 |
+
if batch is None:
|
201 |
+
continue
|
202 |
+
|
203 |
+
# Check if batch is a string (indicating an issue)
|
204 |
+
if isinstance(batch, str):
|
205 |
+
print(f"Error: Received a string instead of a dictionary in batch: {batch}")
|
206 |
+
raise ValueError(f"Unexpected string in batch: {batch}")
|
207 |
+
|
208 |
+
# Validate the batch structure before passing to iteration
|
209 |
+
if isinstance(batch, dict):
|
210 |
+
# Verify that all expected keys are present and that the values are tensors
|
211 |
+
if all(key in batch for key in ['input_ids', 'segment_label', 'labels']):
|
212 |
+
if all(isinstance(batch[key], torch.Tensor) for key in batch):
|
213 |
+
try:
|
214 |
+
print(f"Batch Structure: {batch}") # Debugging batch before iteration
|
215 |
+
trainer.iteration(epoch_idx, batch)
|
216 |
+
except Exception as e:
|
217 |
+
print(f"Error during batch processing: {e}")
|
218 |
+
sys.stdout.flush()
|
219 |
+
raise e # Propagate the exception for better debugging
|
220 |
+
else:
|
221 |
+
print(f"Error: Expected all values in batch to be tensors, but got: {batch}")
|
222 |
+
raise ValueError("Batch contains non-tensor values.")
|
223 |
+
else:
|
224 |
+
print(f"Error: Batch missing expected keys. Batch keys: {batch.keys()}")
|
225 |
+
raise ValueError("Batch does not contain expected keys.")
|
226 |
+
else:
|
227 |
+
print(f"Error: Expected batch to be a dictionary but got {type(batch)} instead.")
|
228 |
+
raise ValueError(f"Invalid batch structure: {batch}")
|
229 |
+
|
230 |
+
# def main(opt):
|
231 |
+
# # device = torch.device("cpu")
|
232 |
+
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
233 |
+
|
234 |
+
# vocab = Vocab(opt.vocab_file)
|
235 |
+
# vocab.load_vocab()
|
236 |
+
|
237 |
+
# input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=128)
|
238 |
+
# labels = preprocess_labels(opt.dataset)
|
239 |
+
|
240 |
+
# if input_ids is None or segment_labels is None or labels is None:
|
241 |
+
# print("Error in preprocessing data. Exiting.")
|
242 |
+
# return
|
243 |
+
|
244 |
+
# dataset = TensorDataset(input_ids, segment_labels, torch.tensor(labels, dtype=torch.long))
|
245 |
+
# val_size = len(dataset) - int(0.8 * len(dataset))
|
246 |
+
# val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size])
|
247 |
+
|
248 |
+
# train_dataloader = DataLoader(
|
249 |
+
# train_dataset,
|
250 |
+
# batch_size=32,
|
251 |
+
# shuffle=True,
|
252 |
+
# collate_fn=custom_collate_fn
|
253 |
+
# )
|
254 |
+
# val_dataloader = DataLoader(
|
255 |
+
# val_dataset,
|
256 |
+
# batch_size=32,
|
257 |
+
# shuffle=False,
|
258 |
+
# collate_fn=custom_collate_fn
|
259 |
+
# )
|
260 |
+
|
261 |
+
# custom_model = CustomBERTModel(
|
262 |
+
# vocab_size=len(vocab.vocab),
|
263 |
+
# output_dim=2,
|
264 |
+
# pre_trained_model_path=opt.pre_trained_model_path
|
265 |
+
# ).to(device)
|
266 |
+
|
267 |
+
# trainer = BERTFineTuneTrainer1(
|
268 |
+
# bert=custom_model.bert,
|
269 |
+
# vocab_size=len(vocab.vocab),
|
270 |
+
# train_dataloader=train_dataloader,
|
271 |
+
# test_dataloader=val_dataloader,
|
272 |
+
# lr=5e-5,
|
273 |
+
# num_labels=2,
|
274 |
+
# with_cuda=torch.cuda.is_available(),
|
275 |
+
# log_freq=10,
|
276 |
+
# workspace_name=opt.output_dir,
|
277 |
+
# log_folder_path=opt.log_folder_path
|
278 |
+
# )
|
279 |
+
|
280 |
+
# trainer.train(epoch=20)
|
281 |
+
|
282 |
+
# # os.makedirs(opt.output_dir, exist_ok=True)
|
283 |
+
# # output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model.pth')
|
284 |
+
# # torch.save(custom_model.state_dict(), output_model_file)
|
285 |
+
# # print(f'Model saved to {output_model_file}')
|
286 |
+
|
287 |
+
# os.makedirs(opt.output_dir, exist_ok=True)
|
288 |
+
# output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth')
|
289 |
+
# torch.save(custom_model, output_model_file)
|
290 |
+
# print(f'Model saved to {output_model_file}')
|
291 |
+
|
292 |
+
|
293 |
+
def main(opt):
|
294 |
+
# Set device to GPU if available, otherwise use CPU
|
295 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
296 |
+
|
297 |
+
print(torch.cuda.is_available()) # Should return True if GPU is available
|
298 |
+
print(torch.cuda.device_count())
|
299 |
+
|
300 |
+
# Load vocabulary
|
301 |
+
vocab = Vocab(opt.vocab_file)
|
302 |
+
vocab.load_vocab()
|
303 |
+
|
304 |
+
# Preprocess data and labels
|
305 |
+
input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=128)
|
306 |
+
labels = preprocess_labels(opt.dataset)
|
307 |
+
|
308 |
+
if input_ids is None or segment_labels is None or labels is None:
|
309 |
+
print("Error in preprocessing data. Exiting.")
|
310 |
+
return
|
311 |
+
|
312 |
+
# Transfer tensors to the correct device (GPU/CPU)
|
313 |
+
input_ids = input_ids.to(device)
|
314 |
+
segment_labels = segment_labels.to(device)
|
315 |
+
labels = torch.tensor(labels, dtype=torch.long).to(device)
|
316 |
+
|
317 |
+
# Create TensorDataset and split into train and validation sets
|
318 |
+
dataset = TensorDataset(input_ids, segment_labels, labels)
|
319 |
+
val_size = len(dataset) - int(0.8 * len(dataset))
|
320 |
+
val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size])
|
321 |
+
|
322 |
+
# Create DataLoaders for training and validation
|
323 |
+
train_dataloader = DataLoader(
|
324 |
+
train_dataset,
|
325 |
+
batch_size=32,
|
326 |
+
shuffle=True,
|
327 |
+
collate_fn=custom_collate_fn
|
328 |
+
)
|
329 |
+
val_dataloader = DataLoader(
|
330 |
+
val_dataset,
|
331 |
+
batch_size=32,
|
332 |
+
shuffle=False,
|
333 |
+
collate_fn=custom_collate_fn
|
334 |
+
)
|
335 |
+
|
336 |
+
# Initialize custom BERT model and move it to the device
|
337 |
+
custom_model = CustomBERTModel(
|
338 |
+
vocab_size=len(vocab.vocab),
|
339 |
+
output_dim=2,
|
340 |
+
pre_trained_model_path=opt.pre_trained_model_path
|
341 |
+
).to(device)
|
342 |
+
|
343 |
+
# Initialize the fine-tuning trainer
|
344 |
+
trainer = BERTFineTuneTrainer1(
|
345 |
+
bert=custom_model.bert,
|
346 |
+
vocab_size=len(vocab.vocab),
|
347 |
+
train_dataloader=train_dataloader,
|
348 |
+
test_dataloader=val_dataloader,
|
349 |
+
lr=5e-5,
|
350 |
+
num_labels=2,
|
351 |
+
with_cuda=torch.cuda.is_available(),
|
352 |
+
log_freq=10,
|
353 |
+
workspace_name=opt.output_dir,
|
354 |
+
log_folder_path=opt.log_folder_path
|
355 |
+
)
|
356 |
+
|
357 |
+
# Train the model
|
358 |
+
trainer.train(epoch=20)
|
359 |
+
|
360 |
+
# Save the model to the specified output directory
|
361 |
+
# os.makedirs(opt.output_dir, exist_ok=True)
|
362 |
+
# output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth')
|
363 |
+
# torch.save(custom_model.state_dict(), output_model_file)
|
364 |
+
# print(f'Model saved to {output_model_file}')
|
365 |
+
os.makedirs(opt.output_dir, exist_ok=True)
|
366 |
+
output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth')
|
367 |
+
torch.save(custom_model, output_model_file)
|
368 |
+
print(f'Model saved to {output_model_file}')
|
369 |
+
|
370 |
+
|
371 |
+
if __name__ == '__main__':
|
372 |
+
parser = argparse.ArgumentParser(description='Fine-tune BERT model.')
|
373 |
+
parser.add_argument('--dataset', type=str, default='/home/jupyter/bert/dataset/hint_based/ratio_proportion_change_3/er/er_train.csv', help='Path to the dataset file.')
|
374 |
+
parser.add_argument('--data_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/gt/er.txt', help='Path to the input sequence file.')
|
375 |
+
parser.add_argument('--output_dir', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/output/hint_classification', help='Directory to save the fine-tuned model.')
|
376 |
+
parser.add_argument('--pre_trained_model_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/output/pretrain:1800ms:64hs:4l:8a:50s:64b:1000e:-5lr/bert_trained.seq_encoder.model.ep68', help='Path to the pre-trained BERT model.')
|
377 |
+
parser.add_argument('--vocab_file', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/pretraining/vocab.txt', help='Path to the vocabulary file.')
|
378 |
+
parser.add_argument('--log_folder_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs', help='Path to the folder for saving logs.')
|
379 |
+
|
380 |
+
|
381 |
+
opt = parser.parse_args()
|
382 |
+
main(opt)
|
main.py
ADDED
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
from torch.utils.data import DataLoader
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
|
7 |
+
from src.bert import BERT
|
8 |
+
from src.pretrainer import BERTTrainer, BERTFineTuneTrainer, BERTAttention
|
9 |
+
from src.dataset import PretrainerDataset, TokenizerDataset
|
10 |
+
from src.vocab import Vocab
|
11 |
+
|
12 |
+
import time
|
13 |
+
import os
|
14 |
+
import tqdm
|
15 |
+
import pickle
|
16 |
+
|
17 |
+
def train():
|
18 |
+
parser = argparse.ArgumentParser()
|
19 |
+
|
20 |
+
parser.add_argument('-workspace_name', type=str, default=None)
|
21 |
+
parser.add_argument('-code', type=str, default=None, help="folder for pretraining outputs and logs")
|
22 |
+
parser.add_argument('-finetune_task', type=str, default=None, help="folder inside finetuning")
|
23 |
+
parser.add_argument("-attention", type=bool, default=False, help="analyse attention scores")
|
24 |
+
parser.add_argument("-diff_test_folder", type=bool, default=False, help="use for different test folder")
|
25 |
+
parser.add_argument("-embeddings", type=bool, default=False, help="get and analyse embeddings")
|
26 |
+
parser.add_argument('-embeddings_file_name', type=str, default=None, help="file name of embeddings")
|
27 |
+
parser.add_argument("-pretrain", type=bool, default=False, help="pretraining: true, or false")
|
28 |
+
# parser.add_argument('-opts', nargs='+', type=str, default=None, help='List of optional steps')
|
29 |
+
parser.add_argument("-max_mask", type=int, default=0.15, help="% of input tokens selected for masking")
|
30 |
+
# parser.add_argument("-p", "--pretrain_dataset", type=str, default="pretraining/pretrain.txt", help="pretraining dataset for bert")
|
31 |
+
# parser.add_argument("-pv", "--pretrain_val_dataset", type=str, default="pretraining/test.txt", help="pretraining validation dataset for bert")
|
32 |
+
# default="finetuning/test.txt",
|
33 |
+
parser.add_argument("-vocab_path", type=str, default="pretraining/vocab.txt", help="built vocab model path with bert-vocab")
|
34 |
+
|
35 |
+
parser.add_argument("-train_dataset_path", type=str, default="train.txt", help="fine tune train dataset for progress classifier")
|
36 |
+
parser.add_argument("-val_dataset_path", type=str, default="val.txt", help="test set for evaluate fine tune train set")
|
37 |
+
parser.add_argument("-test_dataset_path", type=str, default="test.txt", help="test set for evaluate fine tune train set")
|
38 |
+
parser.add_argument("-num_labels", type=int, default=2, help="Number of labels")
|
39 |
+
parser.add_argument("-train_label_path", type=str, default="train_label.txt", help="fine tune train dataset for progress classifier")
|
40 |
+
parser.add_argument("-val_label_path", type=str, default="val_label.txt", help="test set for evaluate fine tune train set")
|
41 |
+
parser.add_argument("-test_label_path", type=str, default="test_label.txt", help="test set for evaluate fine tune train set")
|
42 |
+
##### change Checkpoint for finetuning
|
43 |
+
parser.add_argument("-pretrained_bert_checkpoint", type=str, default=None, help="checkpoint of saved pretrained bert model") #."output_feb09/bert_trained.model.ep40"
|
44 |
+
parser.add_argument('-check_epoch', type=int, default=None)
|
45 |
+
|
46 |
+
parser.add_argument("-hs", "--hidden", type=int, default=64, help="hidden size of transformer model") #64
|
47 |
+
parser.add_argument("-l", "--layers", type=int, default=4, help="number of layers") #4
|
48 |
+
parser.add_argument("-a", "--attn_heads", type=int, default=4, help="number of attention heads") #8
|
49 |
+
parser.add_argument("-s", "--seq_len", type=int, default=50, help="maximum sequence length")
|
50 |
+
|
51 |
+
parser.add_argument("-b", "--batch_size", type=int, default=500, help="number of batch_size") #64
|
52 |
+
parser.add_argument("-e", "--epochs", type=int, default=50)#1501, help="number of epochs") #501
|
53 |
+
# Use 50 for pretrain, and 10 for fine tune
|
54 |
+
parser.add_argument("-w", "--num_workers", type=int, default=4, help="dataloader worker size")
|
55 |
+
|
56 |
+
# Later run with cuda
|
57 |
+
parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false")
|
58 |
+
parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n")
|
59 |
+
# parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus")
|
60 |
+
parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids")
|
61 |
+
# parser.add_argument("--on_memory", type=bool, default=False, help="Loading on memory: true or false")
|
62 |
+
|
63 |
+
parser.add_argument("--dropout", type=float, default=0.1, help="dropout of network")
|
64 |
+
parser.add_argument("--lr", type=float, default=1e-05, help="learning rate of adam") #1e-3
|
65 |
+
parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam")
|
66 |
+
parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
|
67 |
+
parser.add_argument("--adam_beta2", type=float, default=0.98, help="adam first beta value") #0.999
|
68 |
+
|
69 |
+
parser.add_argument("-o", "--output_path", type=str, default="bert_trained.seq_encoder.model", help="ex)output/bert.model")
|
70 |
+
# parser.add_argument("-o", "--output_path", type=str, default="output/bert_fine_tuned.model", help="ex)output/bert.model")
|
71 |
+
|
72 |
+
args = parser.parse_args()
|
73 |
+
for k,v in vars(args).items():
|
74 |
+
if 'path' in k:
|
75 |
+
if v:
|
76 |
+
if k == "output_path":
|
77 |
+
if args.code:
|
78 |
+
setattr(args, f"{k}", args.workspace_name+f"/output/{args.code}/"+v)
|
79 |
+
elif args.finetune_task:
|
80 |
+
setattr(args, f"{k}", args.workspace_name+f"/output/{args.finetune_task}/"+v)
|
81 |
+
else:
|
82 |
+
setattr(args, f"{k}", args.workspace_name+"/output/"+v)
|
83 |
+
elif k != "vocab_path":
|
84 |
+
if args.pretrain:
|
85 |
+
setattr(args, f"{k}", args.workspace_name+"/pretraining/"+v)
|
86 |
+
else:
|
87 |
+
if args.code:
|
88 |
+
setattr(args, f"{k}", args.workspace_name+f"/{args.code}/"+v)
|
89 |
+
elif args.finetune_task:
|
90 |
+
if args.diff_test_folder and "test" in k:
|
91 |
+
setattr(args, f"{k}", args.workspace_name+f"/finetuning/"+v)
|
92 |
+
else:
|
93 |
+
setattr(args, f"{k}", args.workspace_name+f"/finetuning/{args.finetune_task}/"+v)
|
94 |
+
else:
|
95 |
+
setattr(args, f"{k}", args.workspace_name+"/finetuning/"+v)
|
96 |
+
else:
|
97 |
+
setattr(args, f"{k}", args.workspace_name+"/"+v)
|
98 |
+
|
99 |
+
print(f"args.{k} : {getattr(args, f'{k}')}")
|
100 |
+
|
101 |
+
print("Loading Vocab", args.vocab_path)
|
102 |
+
vocab_obj = Vocab(args.vocab_path)
|
103 |
+
vocab_obj.load_vocab()
|
104 |
+
print("Vocab Size: ", len(vocab_obj.vocab))
|
105 |
+
|
106 |
+
if args.attention:
|
107 |
+
print(f"Attention aggregate...... code: {args.code}, dataset: {args.finetune_task}")
|
108 |
+
if args.code:
|
109 |
+
new_folder = f"{args.workspace_name}/plots/{args.code}/"
|
110 |
+
if not os.path.exists(new_folder):
|
111 |
+
os.makedirs(new_folder)
|
112 |
+
|
113 |
+
train_dataset = TokenizerDataset(args.train_dataset_path, None, vocab_obj, seq_len=args.seq_len)
|
114 |
+
train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
|
115 |
+
print("Load Pre-trained BERT model")
|
116 |
+
cuda_condition = torch.cuda.is_available() and args.with_cuda
|
117 |
+
device = torch.device("cuda:0" if cuda_condition else "cpu")
|
118 |
+
bert = torch.load(args.pretrained_bert_checkpoint, map_location=device)
|
119 |
+
trainer = BERTAttention(bert, vocab_obj, train_dataloader = train_data_loader, workspace_name = args.workspace_name, code=args.code, finetune_task = args.finetune_task)
|
120 |
+
trainer.getAttention()
|
121 |
+
|
122 |
+
elif args.embeddings:
|
123 |
+
print("Get embeddings... and cluster... ")
|
124 |
+
train_dataset = TokenizerDataset(args.test_dataset_path, None, vocab_obj, seq_len=args.seq_len)
|
125 |
+
train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
|
126 |
+
print("Load Pre-trained BERT model")
|
127 |
+
cuda_condition = torch.cuda.is_available() and args.with_cuda
|
128 |
+
device = torch.device("cuda:0" if cuda_condition else "cpu")
|
129 |
+
bert = torch.load(args.pretrained_bert_checkpoint).to(device)
|
130 |
+
available_gpus = list(range(torch.cuda.device_count()))
|
131 |
+
if torch.cuda.device_count() > 1:
|
132 |
+
print("Using %d GPUS for BERT" % torch.cuda.device_count())
|
133 |
+
bert = nn.DataParallel(bert, device_ids=available_gpus)
|
134 |
+
|
135 |
+
data_iter = tqdm.tqdm(enumerate(train_data_loader),
|
136 |
+
desc="Model: %s" % (args.pretrained_bert_checkpoint.split("/")[-1]),
|
137 |
+
total=len(train_data_loader), bar_format="{l_bar}{r_bar}")
|
138 |
+
all_embeddings = []
|
139 |
+
for i, data in data_iter:
|
140 |
+
data = {key: value.to(device) for key, value in data.items()}
|
141 |
+
embedding = bert(data["input"], data["segment_label"])
|
142 |
+
# print(embedding.shape, embedding[:, 0].shape)
|
143 |
+
embeddings = [h for h in embedding[:,0].cpu().detach().numpy()]
|
144 |
+
all_embeddings.extend(embeddings)
|
145 |
+
|
146 |
+
new_emb_folder = f"{args.workspace_name}/embeddings"
|
147 |
+
if not os.path.exists(new_emb_folder):
|
148 |
+
os.makedirs(new_emb_folder)
|
149 |
+
pickle.dump(all_embeddings, open(f"{new_emb_folder}/{args.embeddings_file_name}.pkl", "wb"))
|
150 |
+
else:
|
151 |
+
if args.pretrain:
|
152 |
+
print("Pre-training......")
|
153 |
+
print("Loading Pretraining Train Dataset", args.train_dataset_path)
|
154 |
+
print(f"Workspace: {args.workspace_name}")
|
155 |
+
pretrain_dataset = PretrainerDataset(args.train_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask)
|
156 |
+
|
157 |
+
print("Loading Pretraining Validation Dataset", args.val_dataset_path)
|
158 |
+
pretrain_valid_dataset = PretrainerDataset(args.val_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask) \
|
159 |
+
if args.val_dataset_path is not None else None
|
160 |
+
|
161 |
+
print("Loading Pretraining Test Dataset", args.test_dataset_path)
|
162 |
+
pretrain_test_dataset = PretrainerDataset(args.test_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask) \
|
163 |
+
if args.test_dataset_path is not None else None
|
164 |
+
|
165 |
+
print("Creating Dataloader")
|
166 |
+
pretrain_data_loader = DataLoader(pretrain_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
|
167 |
+
pretrain_val_data_loader = DataLoader(pretrain_valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers)\
|
168 |
+
if pretrain_valid_dataset is not None else None
|
169 |
+
pretrain_test_data_loader = DataLoader(pretrain_test_dataset, batch_size=args.batch_size, num_workers=args.num_workers)\
|
170 |
+
if pretrain_test_dataset is not None else None
|
171 |
+
|
172 |
+
print("Building BERT model")
|
173 |
+
bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads, dropout=args.dropout)
|
174 |
+
|
175 |
+
if args.pretrained_bert_checkpoint:
|
176 |
+
print(f"BERT model : {args.pretrained_bert_checkpoint}")
|
177 |
+
bert = torch.load(args.pretrained_bert_checkpoint)
|
178 |
+
|
179 |
+
new_log_folder = f"{args.workspace_name}/logs"
|
180 |
+
new_output_folder = f"{args.workspace_name}/output"
|
181 |
+
if args.code: # is sent almost all the time
|
182 |
+
new_log_folder = f"{args.workspace_name}/logs/{args.code}"
|
183 |
+
new_output_folder = f"{args.workspace_name}/output/{args.code}"
|
184 |
+
|
185 |
+
if not os.path.exists(new_log_folder):
|
186 |
+
os.makedirs(new_log_folder)
|
187 |
+
if not os.path.exists(new_output_folder):
|
188 |
+
os.makedirs(new_output_folder)
|
189 |
+
|
190 |
+
print(f"Creating BERT Trainer .... masking: True, max_mask: {args.max_mask}")
|
191 |
+
trainer = BERTTrainer(bert, len(vocab_obj.vocab), train_dataloader=pretrain_data_loader,
|
192 |
+
val_dataloader=pretrain_val_data_loader, test_dataloader=pretrain_test_data_loader,
|
193 |
+
lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
|
194 |
+
with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq,
|
195 |
+
log_folder_path=new_log_folder)
|
196 |
+
|
197 |
+
start_time = time.time()
|
198 |
+
print(f'Pretraining Starts, Time: {time.strftime("%D %T", time.localtime(start_time))}')
|
199 |
+
# if need to pretrain from a check-point, need :check_epoch
|
200 |
+
repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs)
|
201 |
+
counter = 0
|
202 |
+
patience = 20
|
203 |
+
for epoch in repoch:
|
204 |
+
print(f'Training Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
|
205 |
+
trainer.train(epoch)
|
206 |
+
print(f'Training Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
|
207 |
+
|
208 |
+
if pretrain_val_data_loader is not None:
|
209 |
+
print(f'Validation Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
|
210 |
+
trainer.val(epoch)
|
211 |
+
print(f'Validation Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
|
212 |
+
|
213 |
+
if trainer.save_model: # or epoch%10 == 0 and epoch > 4
|
214 |
+
trainer.save(epoch, args.output_path)
|
215 |
+
counter = 0
|
216 |
+
if pretrain_test_data_loader is not None:
|
217 |
+
print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
|
218 |
+
trainer.test(epoch)
|
219 |
+
print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
|
220 |
+
else:
|
221 |
+
counter +=1
|
222 |
+
if counter >= patience:
|
223 |
+
print(f"Early stopping at epoch {epoch}")
|
224 |
+
break
|
225 |
+
|
226 |
+
end_time = time.time()
|
227 |
+
print("Time Taken to pretrain model = ", end_time - start_time)
|
228 |
+
print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}')
|
229 |
+
else:
|
230 |
+
print("Fine Tuning......")
|
231 |
+
print("Loading Train Dataset", args.train_dataset_path)
|
232 |
+
train_dataset = TokenizerDataset(args.train_dataset_path, args.train_label_path, vocab_obj, seq_len=args.seq_len)
|
233 |
+
|
234 |
+
# print("Loading Validation Dataset", args.val_dataset_path)
|
235 |
+
# val_dataset = TokenizerDataset(args.val_dataset_path, args.val_label_path, vocab_obj, seq_len=args.seq_len) \
|
236 |
+
# if args.val_dataset_path is not None else None
|
237 |
+
|
238 |
+
print("Loading Test Dataset", args.test_dataset_path)
|
239 |
+
test_dataset = TokenizerDataset(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len) \
|
240 |
+
if args.test_dataset_path is not None else None
|
241 |
+
|
242 |
+
print("Creating Dataloader...")
|
243 |
+
train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
|
244 |
+
# val_data_loader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
|
245 |
+
# if val_dataset is not None else None
|
246 |
+
test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
|
247 |
+
if test_dataset is not None else None
|
248 |
+
|
249 |
+
print("Load Pre-trained BERT model")
|
250 |
+
# bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads)
|
251 |
+
cuda_condition = torch.cuda.is_available() and args.with_cuda
|
252 |
+
device = torch.device("cuda:0" if cuda_condition else "cpu")
|
253 |
+
bert = torch.load(args.pretrained_bert_checkpoint, map_location=device)
|
254 |
+
|
255 |
+
# if args.finetune_task == "SL":
|
256 |
+
# if args.workspace_name == "ratio_proportion_change4":
|
257 |
+
# num_labels = 9
|
258 |
+
# elif args.workspace_name == "ratio_proportion_change3":
|
259 |
+
# num_labels = 9
|
260 |
+
# elif args.workspace_name == "scale_drawings_3":
|
261 |
+
# num_labels = 9
|
262 |
+
# elif args.workspace_name == "sales_tax_discounts_two_rates":
|
263 |
+
# num_labels = 3
|
264 |
+
# else:
|
265 |
+
# num_labels = 2
|
266 |
+
# # num_labels = 1
|
267 |
+
# print(f"Number of Labels : {args.num_labels}")
|
268 |
+
new_log_folder = f"{args.workspace_name}/logs"
|
269 |
+
new_output_folder = f"{args.workspace_name}/output"
|
270 |
+
if args.finetune_task: # is sent almost all the time
|
271 |
+
new_log_folder = f"{args.workspace_name}/logs/{args.finetune_task}"
|
272 |
+
new_output_folder = f"{args.workspace_name}/output/{args.finetune_task}"
|
273 |
+
|
274 |
+
if not os.path.exists(new_log_folder):
|
275 |
+
os.makedirs(new_log_folder)
|
276 |
+
if not os.path.exists(new_output_folder):
|
277 |
+
os.makedirs(new_output_folder)
|
278 |
+
|
279 |
+
print("Creating BERT Fine Tune Trainer")
|
280 |
+
trainer = BERTFineTuneTrainer(bert, len(vocab_obj.vocab),
|
281 |
+
train_dataloader=train_data_loader, test_dataloader=test_data_loader,
|
282 |
+
lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
|
283 |
+
with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq,
|
284 |
+
workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder)
|
285 |
+
|
286 |
+
print("Fine-tune training Start....")
|
287 |
+
start_time = time.time()
|
288 |
+
repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs)
|
289 |
+
counter = 0
|
290 |
+
patience = 10
|
291 |
+
for epoch in repoch:
|
292 |
+
print(f'Training Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
|
293 |
+
trainer.train(epoch)
|
294 |
+
print(f'Training Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
|
295 |
+
|
296 |
+
if test_data_loader is not None:
|
297 |
+
print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
|
298 |
+
trainer.test(epoch)
|
299 |
+
# pickle.dump(trainer.probability_list, open(f"{args.workspace_name}/output/aaai/change4_mid_prob_{epoch}.pkl","wb"))
|
300 |
+
print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
|
301 |
+
|
302 |
+
# if val_data_loader is not None:
|
303 |
+
# print(f'Validation Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
|
304 |
+
# trainer.val(epoch)
|
305 |
+
# print(f'Validation Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
|
306 |
+
|
307 |
+
if trainer.save_model: # or epoch%10 == 0
|
308 |
+
trainer.save(epoch, args.output_path)
|
309 |
+
counter = 0
|
310 |
+
else:
|
311 |
+
counter +=1
|
312 |
+
if counter >= patience:
|
313 |
+
print(f"Early stopping at epoch {epoch}")
|
314 |
+
break
|
315 |
+
|
316 |
+
end_time = time.time()
|
317 |
+
print("Time Taken to fine-tune model = ", end_time - start_time)
|
318 |
+
print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}')
|
319 |
+
|
320 |
+
|
321 |
+
if __name__ == "__main__":
|
322 |
+
train()
|
metrics.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from scipy.special import softmax
|
3 |
+
|
4 |
+
|
5 |
+
class CELoss(object):
|
6 |
+
|
7 |
+
def compute_bin_boundaries(self, probabilities = np.array([])):
|
8 |
+
|
9 |
+
#uniform bin spacing
|
10 |
+
if probabilities.size == 0:
|
11 |
+
bin_boundaries = np.linspace(0, 1, self.n_bins + 1)
|
12 |
+
self.bin_lowers = bin_boundaries[:-1]
|
13 |
+
self.bin_uppers = bin_boundaries[1:]
|
14 |
+
else:
|
15 |
+
#size of bins
|
16 |
+
bin_n = int(self.n_data/self.n_bins)
|
17 |
+
|
18 |
+
bin_boundaries = np.array([])
|
19 |
+
|
20 |
+
probabilities_sort = np.sort(probabilities)
|
21 |
+
|
22 |
+
for i in range(0,self.n_bins):
|
23 |
+
bin_boundaries = np.append(bin_boundaries,probabilities_sort[i*bin_n])
|
24 |
+
bin_boundaries = np.append(bin_boundaries,1.0)
|
25 |
+
|
26 |
+
self.bin_lowers = bin_boundaries[:-1]
|
27 |
+
self.bin_uppers = bin_boundaries[1:]
|
28 |
+
|
29 |
+
|
30 |
+
def get_probabilities(self, output, labels, logits):
|
31 |
+
#If not probabilities apply softmax!
|
32 |
+
if logits:
|
33 |
+
self.probabilities = softmax(output, axis=1)
|
34 |
+
else:
|
35 |
+
self.probabilities = output
|
36 |
+
|
37 |
+
self.labels = labels
|
38 |
+
self.confidences = np.max(self.probabilities, axis=1)
|
39 |
+
self.predictions = np.argmax(self.probabilities, axis=1)
|
40 |
+
self.accuracies = np.equal(self.predictions,labels)
|
41 |
+
|
42 |
+
def binary_matrices(self):
|
43 |
+
idx = np.arange(self.n_data)
|
44 |
+
#make matrices of zeros
|
45 |
+
pred_matrix = np.zeros([self.n_data,self.n_class])
|
46 |
+
label_matrix = np.zeros([self.n_data,self.n_class])
|
47 |
+
#self.acc_matrix = np.zeros([self.n_data,self.n_class])
|
48 |
+
pred_matrix[idx,self.predictions] = 1
|
49 |
+
label_matrix[idx,self.labels] = 1
|
50 |
+
|
51 |
+
self.acc_matrix = np.equal(pred_matrix, label_matrix)
|
52 |
+
|
53 |
+
|
54 |
+
def compute_bins(self, index = None):
|
55 |
+
self.bin_prop = np.zeros(self.n_bins)
|
56 |
+
self.bin_acc = np.zeros(self.n_bins)
|
57 |
+
self.bin_conf = np.zeros(self.n_bins)
|
58 |
+
self.bin_score = np.zeros(self.n_bins)
|
59 |
+
|
60 |
+
if index == None:
|
61 |
+
confidences = self.confidences
|
62 |
+
accuracies = self.accuracies
|
63 |
+
else:
|
64 |
+
confidences = self.probabilities[:,index]
|
65 |
+
accuracies = self.acc_matrix[:,index]
|
66 |
+
|
67 |
+
|
68 |
+
for i, (bin_lower, bin_upper) in enumerate(zip(self.bin_lowers, self.bin_uppers)):
|
69 |
+
# Calculated |confidence - accuracy| in each bin
|
70 |
+
in_bin = np.greater(confidences,bin_lower.item()) * np.less_equal(confidences,bin_upper.item())
|
71 |
+
self.bin_prop[i] = np.mean(in_bin)
|
72 |
+
|
73 |
+
if self.bin_prop[i].item() > 0:
|
74 |
+
self.bin_acc[i] = np.mean(accuracies[in_bin])
|
75 |
+
self.bin_conf[i] = np.mean(confidences[in_bin])
|
76 |
+
self.bin_score[i] = np.abs(self.bin_conf[i] - self.bin_acc[i])
|
77 |
+
|
78 |
+
class MaxProbCELoss(CELoss):
|
79 |
+
def loss(self, output, labels, n_bins = 15, logits = True):
|
80 |
+
self.n_bins = n_bins
|
81 |
+
super().compute_bin_boundaries()
|
82 |
+
super().get_probabilities(output, labels, logits)
|
83 |
+
super().compute_bins()
|
84 |
+
|
85 |
+
#http://people.cs.pitt.edu/~milos/research/AAAI_Calibration.pdf
|
86 |
+
class ECELoss(MaxProbCELoss):
|
87 |
+
|
88 |
+
def loss(self, output, labels, n_bins = 15, logits = True):
|
89 |
+
super().loss(output, labels, n_bins, logits)
|
90 |
+
return np.dot(self.bin_prop,self.bin_score)
|
91 |
+
|
92 |
+
class MCELoss(MaxProbCELoss):
|
93 |
+
|
94 |
+
def loss(self, output, labels, n_bins = 15, logits = True):
|
95 |
+
super().loss(output, labels, n_bins, logits)
|
96 |
+
return np.max(self.bin_score)
|
97 |
+
|
98 |
+
#https://arxiv.org/abs/1905.11001
|
99 |
+
#Overconfidence Loss (Good in high risk applications where confident but wrong predictions can be especially harmful)
|
100 |
+
class OELoss(MaxProbCELoss):
|
101 |
+
|
102 |
+
def loss(self, output, labels, n_bins = 15, logits = True):
|
103 |
+
super().loss(output, labels, n_bins, logits)
|
104 |
+
return np.dot(self.bin_prop,self.bin_conf * np.maximum(self.bin_conf-self.bin_acc,np.zeros(self.n_bins)))
|
105 |
+
|
106 |
+
|
107 |
+
#https://arxiv.org/abs/1904.01685
|
108 |
+
class SCELoss(CELoss):
|
109 |
+
|
110 |
+
def loss(self, output, labels, n_bins = 15, logits = True):
|
111 |
+
sce = 0.0
|
112 |
+
self.n_bins = n_bins
|
113 |
+
self.n_data = len(output)
|
114 |
+
self.n_class = len(output[0])
|
115 |
+
|
116 |
+
super().compute_bin_boundaries()
|
117 |
+
super().get_probabilities(output, labels, logits)
|
118 |
+
super().binary_matrices()
|
119 |
+
|
120 |
+
for i in range(self.n_class):
|
121 |
+
super().compute_bins(i)
|
122 |
+
sce += np.dot(self.bin_prop,self.bin_score)
|
123 |
+
|
124 |
+
return sce/self.n_class
|
125 |
+
|
126 |
+
class TACELoss(CELoss):
|
127 |
+
|
128 |
+
def loss(self, output, labels, threshold = 0.01, n_bins = 15, logits = True):
|
129 |
+
tace = 0.0
|
130 |
+
self.n_bins = n_bins
|
131 |
+
self.n_data = len(output)
|
132 |
+
self.n_class = len(output[0])
|
133 |
+
|
134 |
+
super().get_probabilities(output, labels, logits)
|
135 |
+
self.probabilities[self.probabilities < threshold] = 0
|
136 |
+
super().binary_matrices()
|
137 |
+
|
138 |
+
for i in range(self.n_class):
|
139 |
+
super().compute_bin_boundaries(self.probabilities[:,i])
|
140 |
+
super().compute_bins(i)
|
141 |
+
tace += np.dot(self.bin_prop,self.bin_score)
|
142 |
+
|
143 |
+
return tace/self.n_class
|
144 |
+
|
145 |
+
#create TACELoss with threshold fixed at 0
|
146 |
+
class ACELoss(TACELoss):
|
147 |
+
|
148 |
+
def loss(self, output, labels, n_bins = 15, logits = True):
|
149 |
+
return super().loss(output, labels, 0.0 , n_bins, logits)
|
new_fine_tuning/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
new_fine_tuning/README.md
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Pre-training Data
|
2 |
+
|
3 |
+
### ratio_proportion_change3 : Calculating Percent Change and Final Amounts
|
4 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -analyze_dataset_by_section True -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain1000.txt -train_info_path pretraining/pretrain1000_info.txt -test_file_path pretraining/test1000.txt -test_info_path pretraining/test1000_info.txt
|
5 |
+
|
6 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain2000.txt -train_info_path pretraining/pretrain2000_info.txt -test_file_path pretraining/test2000.txt -test_info_path pretraining/test2000_info.txt
|
7 |
+
|
8 |
+
#### Test simple
|
9 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code full -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path full.txt -train_info_path full_info.txt
|
10 |
+
|
11 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code gt -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path er.txt -train_info_path er_info.txt -test_file_path me.txt -test_info_path me_info.txt
|
12 |
+
|
13 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code correct -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path correct.txt -train_info_path correct_info.txt -test_file_path incorrect.txt -test_info_path incorrect_info.txt -final_step FinalAnswer
|
14 |
+
|
15 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code progress -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path graduated.txt -train_info_path graduated_info.txt -test_file_path promoted.txt -test_info_path promoted_info.txt
|
16 |
+
|
17 |
+
### ratio_proportion_change4 : Using Percents and Percent Change
|
18 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -analyze_dataset_by_section True -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain1000.txt -train_info_path pretraining/pretrain1000_info.txt -test_file_path pretraining/test1000.txt -test_info_path pretraining/test1000_info.txt
|
19 |
+
|
20 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain2000.txt -train_info_path pretraining/pretrain2000_info.txt -test_file_path pretraining/test2000.txt -test_info_path pretraining/test2000_info.txt
|
21 |
+
|
22 |
+
#### Test simple
|
23 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code full -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path full.txt -train_info_path full_info.txt
|
24 |
+
|
25 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code gt -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path er.txt -train_info_path er_info.txt -test_file_path me.txt -test_info_path me_info.txt
|
26 |
+
|
27 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code correct -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path correct.txt -train_info_path correct_info.txt -test_file_path incorrect.txt -test_info_path incorrect_info.txt -final_step FinalAnswer
|
28 |
+
|
29 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code progress -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path graduated.txt -train_info_path graduated_info.txt -test_file_path promoted.txt -test_info_path promoted_info.txt
|
30 |
+
|
31 |
+
## Pretraining
|
32 |
+
|
33 |
+
### ratio_proportion_change3 : Calculating Percent Change and Final Amounts
|
34 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3_1920 -code pretrain1000 --pretrain_dataset pretraining/pretrain1000.txt --pretrain_val_dataset pretraining/test1000.txt
|
35 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000 --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt
|
36 |
+
|
37 |
+
#### Test simple models
|
38 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 1
|
39 |
+
|
40 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 2
|
41 |
+
|
42 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 2
|
43 |
+
|
44 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 4
|
45 |
+
|
46 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 4
|
47 |
+
|
48 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 8
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
### ratio_proportion_change4 : Using Percents and Percent Change
|
53 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain1000 --pretrain_dataset pretraining/pretrain1000.txt --pretrain_val_dataset pretraining/test1000.txt
|
54 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000 --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt
|
55 |
+
|
56 |
+
#### Test simple models
|
57 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_1l1h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 1
|
58 |
+
|
59 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_1l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 2
|
60 |
+
|
61 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_2l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 2
|
62 |
+
|
63 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_2l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 4
|
64 |
+
|
65 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_4l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 4
|
66 |
+
|
67 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_4l8h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 8
|
68 |
+
|
69 |
+
|
70 |
+
## Preparing Fine Tuning Data
|
71 |
+
|
72 |
+
### ratio_proportion_change3 : Calculating Percent Change and Final Amounts
|
73 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -final_step FinalAnswer
|
74 |
+
|
75 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task check2 --train_dataset finetuning/check2/train.txt --test_dataset finetuning/check2/test.txt --train_label finetuning/check2/train_label.txt --test_label finetuning/check2/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
|
76 |
+
|
77 |
+
#### Attention Head Check
|
78 |
+
<!-- > PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_1 EquationAnswer NumeratorFactor EquationAnswer NumeratorFactor EquationAnswer NumeratorFactor DenominatorFactor NumeratorFactor DenominatorFactor NumeratorFactor DenominatorFactor FirstRow1:2 FirstRow1:1 FirstRow2:1 FirstRow2:2 FirstRow2:1 SecondRow ThirdRow FinalAnswerDirection ThirdRow FinalAnswer -->
|
79 |
+
|
80 |
+
|
81 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task er ;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task correct ;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task promoted
|
82 |
+
|
83 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task promoted
|
84 |
+
|
85 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task promoted
|
86 |
+
|
87 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task promoted
|
88 |
+
|
89 |
+
<!-- > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep923 --attention True -->
|
90 |
+
|
91 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task promoted
|
92 |
+
|
93 |
+
clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset full/full_attn.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task full
|
94 |
+
|
95 |
+
|
96 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task promoted
|
97 |
+
|
98 |
+
|
99 |
+
<!-- PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_2 FirstRow2:1 FirstRow2:2 FirstRow1:1 SecondRow ThirdRow FinalAnswer FinalAnswerDirection --> me
|
100 |
+
|
101 |
+
<!-- PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_1 DenominatorFactor NumeratorFactor OptionalTask_2 EquationAnswer FirstRow1:1 FirstRow1:2 FirstRow2:2 FirstRow2:1 FirstRow1:2 SecondRow ThirdRow FinalAnswer --> er
|
102 |
+
|
103 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset pretraining/attention_train.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep273 --attention True
|
104 |
+
|
105 |
+
<!-- PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_1 DenominatorFactor NumeratorFactor OptionalTask_2 EquationAnswer FirstRow1:1 FirstRow1:2 FirstRow2:2 FirstRow2:1 FirstRow1:2 SecondRow ThirdRow FinalAnswer -->
|
106 |
+
|
107 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset pretraining/attention_train.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep1021 --attention True
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
### ratio_proportion_change4 : Using Percents and Percent Change
|
112 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -final_step FinalAnswer
|
113 |
+
|
114 |
+
### scale_drawings_3 : Calculating Measurements Using a Scale
|
115 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name scale_drawings_3 -opt_step1 opt1-check opt1-ratio-L-n opt1-ratio-L-d opt1-ratio-R-n opt1-ratio-R-d opt1-me2-top-3 opt1-me2-top-4 opt1-me2-top-2 opt1-me2-top-1 opt1-me2-middle-1 opt1-me2-bottom-1 -opt_step2 opt2-check opt2-ratio-L-n opt2-ratio-L-d opt2-ratio-R-n opt2-ratio-R-d opt2-me2-top-3 opt2-me2-top-4 opt2-me2-top-1 opt2-me2-top-2 opt2-me2-middle-1 opt2-me2-bottom-1 -final_step unk-value1 unk-value2
|
116 |
+
|
117 |
+
### sales_tax_discounts_two_rates : Solving Problems with Both Sales Tax and Discounts
|
118 |
+
> clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name sales_tax_discounts_two_rates -opt_step1 optionalTaskGn salestaxFactor2 discountFactor2 multiplyOrderStatementGn -final_step totalCost1
|
119 |
+
|
120 |
+
|
121 |
+
# Fine Tuning Pre-trained model
|
122 |
+
|
123 |
+
## ratio_proportion_change3 : Calculating Percent Change and Final Amounts
|
124 |
+
> Selected Pretrained model: **ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279**
|
125 |
+
> New **bert/ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731**
|
126 |
+
|
127 |
+
### 10per
|
128 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51
|
129 |
+
|
130 |
+
### IS
|
131 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task IS --train_dataset finetuning/IS/train.txt --test_dataset finetuning/FS/train.txt --train_label finetuning/IS/train_label.txt --test_label finetuning/FS/train_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51
|
132 |
+
|
133 |
+
### FS
|
134 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task FS --train_dataset finetuning/FS/train.txt --test_dataset finetuning/IS/train.txt --train_label finetuning/FS/train_label.txt --test_label finetuning/IS/train_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51
|
135 |
+
|
136 |
+
### correctness
|
137 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
|
138 |
+
|
139 |
+
### SL
|
140 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
|
141 |
+
|
142 |
+
### effectiveness
|
143 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task effectiveness --train_dataset finetuning/effectiveness/train.txt --test_dataset finetuning/effectiveness/test.txt --train_label finetuning/effectiveness/train_label.txt --test_label finetuning/effectiveness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
|
144 |
+
|
145 |
+
|
146 |
+
## ratio_proportion_change4 : Using Percents and Percent Change
|
147 |
+
> Selected Pretrained model: **ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287**
|
148 |
+
### 10per
|
149 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
|
150 |
+
|
151 |
+
### IS
|
152 |
+
|
153 |
+
### FS
|
154 |
+
|
155 |
+
### correctness
|
156 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
|
157 |
+
|
158 |
+
### SL
|
159 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
|
160 |
+
|
161 |
+
### effectiveness
|
162 |
+
> clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task effectiveness --train_dataset finetuning/effectiveness/train.txt --test_dataset finetuning/effectiveness/test.txt --train_label finetuning/effectiveness/train_label.txt --test_label finetuning/effectiveness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
|
163 |
+
|
164 |
+
|
165 |
+
## scale_drawings_3 : Calculating Measurements Using a Scale
|
166 |
+
> Selected Pretrained model: **scale_drawings_3/output/bert_trained.seq_encoder.model.ep252**
|
167 |
+
### 10per
|
168 |
+
> clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51
|
169 |
+
|
170 |
+
### IS
|
171 |
+
|
172 |
+
### FS
|
173 |
+
|
174 |
+
### correctness
|
175 |
+
> clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51
|
176 |
+
|
177 |
+
### SL
|
178 |
+
> clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51
|
179 |
+
|
180 |
+
### effectiveness
|
181 |
+
|
182 |
+
## sales_tax_discounts_two_rates : Solving Problems with Both Sales Tax and Discounts
|
183 |
+
> Selected Pretrained model: **sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255**
|
184 |
+
|
185 |
+
### 10per
|
186 |
+
> clear;python3 src/main.py -workspace_name sales_tax_discounts_two_rates -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255 --epochs 51
|
187 |
+
|
188 |
+
### IS
|
189 |
+
|
190 |
+
### FS
|
191 |
+
|
192 |
+
### correctness
|
193 |
+
> clear;python3 src/main.py -workspace_name sales_tax_discounts_two_rates -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255 --epochs 51
|
194 |
+
|
195 |
+
### SL
|
196 |
+
|
197 |
+
### effectiveness
|
new_fine_tuning/__pycache__/metrics.cpython-312.pyc
ADDED
Binary file (9.16 kB). View file
|
|
new_fine_tuning/__pycache__/recalibration.cpython-312.pyc
ADDED
Binary file (5.51 kB). View file
|
|
new_fine_tuning/__pycache__/visualization.cpython-312.pyc
ADDED
Binary file (5.28 kB). View file
|
|
new_hint_fine_tuned.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from torch.utils.data import DataLoader, random_split, TensorDataset
|
6 |
+
from src.dataset import TokenizerDataset
|
7 |
+
from src.bert import BERT
|
8 |
+
from src.pretrainer import BERTFineTuneTrainer1
|
9 |
+
from src.vocab import Vocab
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
def preprocess_labels(label_csv_path):
|
13 |
+
try:
|
14 |
+
labels_df = pd.read_csv(label_csv_path)
|
15 |
+
labels = labels_df['last_hint_class'].values.astype(int)
|
16 |
+
return torch.tensor(labels, dtype=torch.long)
|
17 |
+
except Exception as e:
|
18 |
+
print(f"Error reading dataset file: {e}")
|
19 |
+
return None
|
20 |
+
|
21 |
+
def preprocess_data(data_path, vocab, max_length=128):
|
22 |
+
try:
|
23 |
+
with open(data_path, 'r') as f:
|
24 |
+
sequences = f.readlines()
|
25 |
+
except Exception as e:
|
26 |
+
print(f"Error reading data file: {e}")
|
27 |
+
return None, None
|
28 |
+
|
29 |
+
tokenized_sequences = []
|
30 |
+
for sequence in sequences:
|
31 |
+
sequence = sequence.strip()
|
32 |
+
if sequence:
|
33 |
+
encoded = vocab.to_seq(sequence, seq_len=max_length)
|
34 |
+
encoded = encoded[:max_length] + [vocab.vocab.get('[PAD]', 0)] * (max_length - len(encoded))
|
35 |
+
segment_label = [0] * max_length
|
36 |
+
|
37 |
+
tokenized_sequences.append({
|
38 |
+
'input_ids': torch.tensor(encoded),
|
39 |
+
'segment_label': torch.tensor(segment_label)
|
40 |
+
})
|
41 |
+
|
42 |
+
input_ids = torch.cat([t['input_ids'].unsqueeze(0) for t in tokenized_sequences], dim=0)
|
43 |
+
segment_labels = torch.cat([t['segment_label'].unsqueeze(0) for t in tokenized_sequences], dim=0)
|
44 |
+
|
45 |
+
print(f"Input IDs shape: {input_ids.shape}")
|
46 |
+
print(f"Segment labels shape: {segment_labels.shape}")
|
47 |
+
|
48 |
+
return input_ids, segment_labels
|
49 |
+
|
50 |
+
def custom_collate_fn(batch):
|
51 |
+
inputs = [item['input_ids'].unsqueeze(0) for item in batch]
|
52 |
+
labels = [item['label'].unsqueeze(0) for item in batch]
|
53 |
+
segment_labels = [item['segment_label'].unsqueeze(0) for item in batch]
|
54 |
+
|
55 |
+
inputs = torch.cat(inputs, dim=0)
|
56 |
+
labels = torch.cat(labels, dim=0)
|
57 |
+
segment_labels = torch.cat(segment_labels, dim=0)
|
58 |
+
|
59 |
+
return {
|
60 |
+
'input': inputs,
|
61 |
+
'label': labels,
|
62 |
+
'segment_label': segment_labels
|
63 |
+
}
|
64 |
+
|
65 |
+
def main(opt):
|
66 |
+
# Set device to GPU if available, otherwise use CPU
|
67 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
68 |
+
|
69 |
+
# Load vocabulary
|
70 |
+
vocab = Vocab(opt.vocab_file)
|
71 |
+
vocab.load_vocab()
|
72 |
+
|
73 |
+
# Preprocess data and labels
|
74 |
+
input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=50) # Using sequence length 50
|
75 |
+
labels = preprocess_labels(opt.dataset)
|
76 |
+
|
77 |
+
if input_ids is None or segment_labels is None or labels is None:
|
78 |
+
print("Error in preprocessing data. Exiting.")
|
79 |
+
return
|
80 |
+
|
81 |
+
# Create TensorDataset and split into train and validation sets
|
82 |
+
dataset = TensorDataset(input_ids, segment_labels, labels)
|
83 |
+
val_size = len(dataset) - int(0.8 * len(dataset))
|
84 |
+
val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size])
|
85 |
+
|
86 |
+
# Create DataLoaders for training and validation
|
87 |
+
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
|
88 |
+
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)
|
89 |
+
|
90 |
+
# Initialize custom BERT model and move it to the device
|
91 |
+
custom_model = CustomBERTModel(
|
92 |
+
vocab_size=len(vocab.vocab),
|
93 |
+
output_dim=2,
|
94 |
+
pre_trained_model_path=opt.pre_trained_model_path
|
95 |
+
).to(device)
|
96 |
+
|
97 |
+
# Initialize the fine-tuning trainer
|
98 |
+
trainer = BERTFineTuneTrainer1(
|
99 |
+
bert=custom_model,
|
100 |
+
vocab_size=len(vocab.vocab),
|
101 |
+
train_dataloader=train_dataloader,
|
102 |
+
test_dataloader=val_dataloader,
|
103 |
+
lr=1e-5, # Using learning rate 10^-5 as specified
|
104 |
+
num_labels=2,
|
105 |
+
with_cuda=torch.cuda.is_available(),
|
106 |
+
log_freq=10,
|
107 |
+
workspace_name=opt.output_dir,
|
108 |
+
log_folder_path=opt.log_folder_path
|
109 |
+
)
|
110 |
+
|
111 |
+
# Train the model
|
112 |
+
trainer.train(epoch=20)
|
113 |
+
|
114 |
+
# Save the model
|
115 |
+
os.makedirs(opt.output_dir, exist_ok=True)
|
116 |
+
output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_3.pth')
|
117 |
+
torch.save(custom_model, output_model_file)
|
118 |
+
print(f'Model saved to {output_model_file}')
|
119 |
+
|
120 |
+
if __name__ == '__main__':
|
121 |
+
parser = argparse.ArgumentParser(description='Fine-tune BERT model.')
|
122 |
+
parser.add_argument('--dataset', type=str, default='/home/jupyter/bert/dataset/hint_based/ratio_proportion_change_3/er/er_train.csv', help='Path to the dataset file.')
|
123 |
+
parser.add_argument('--data_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/gt/er.txt', help='Path to the input sequence file.')
|
124 |
+
parser.add_argument('--output_dir', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/output/hint_classification', help='Directory to save the fine-tuned model.')
|
125 |
+
parser.add_argument('--pre_trained_model_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/output/pretrain:1800ms:64hs:4l:8a:50s:64b:1000e:-5lr/bert_trained.seq_encoder.model.ep68', help='Path to the pre-trained BERT model.')
|
126 |
+
parser.add_argument('--vocab_file', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/pretraining/vocab.txt', help='Path to the vocabulary file.')
|
127 |
+
parser.add_argument('--log_folder_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct', help='Path to the folder for saving logs.')
|
128 |
+
|
129 |
+
|
130 |
+
opt = parser.parse_args()
|
131 |
+
main(opt)
|
new_test_saved_finetuned_model.py
ADDED
@@ -0,0 +1,613 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from torch.optim import Adam
|
6 |
+
from torch.utils.data import DataLoader
|
7 |
+
import pickle
|
8 |
+
print("here1",os.getcwd())
|
9 |
+
from src.dataset import TokenizerDataset, TokenizerDatasetForCalibration
|
10 |
+
from src.vocab import Vocab
|
11 |
+
print("here3",os.getcwd())
|
12 |
+
from src.bert import BERT
|
13 |
+
from src.seq_model import BERTSM
|
14 |
+
from src.classifier_model import BERTForClassification, BERTForClassificationWithFeats
|
15 |
+
# from src.new_finetuning.optim_schedule import ScheduledOptim
|
16 |
+
import metrics, recalibration, visualization
|
17 |
+
from recalibration import ModelWithTemperature
|
18 |
+
import tqdm
|
19 |
+
import sys
|
20 |
+
import time
|
21 |
+
import numpy as np
|
22 |
+
|
23 |
+
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score
|
24 |
+
import matplotlib.pyplot as plt
|
25 |
+
import seaborn as sns
|
26 |
+
import pandas as pd
|
27 |
+
from collections import defaultdict
|
28 |
+
print("here3",os.getcwd())
|
29 |
+
class BERTFineTuneTrainer:
|
30 |
+
|
31 |
+
def __init__(self, bertFinetunedClassifierwithFeats: BERT, #BERTForClassificationWithFeats
|
32 |
+
vocab_size: int, test_dataloader: DataLoader = None,
|
33 |
+
lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
|
34 |
+
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
|
35 |
+
num_labels=2, log_folder_path: str = None):
|
36 |
+
"""
|
37 |
+
:param bert: BERT model which you want to train
|
38 |
+
:param vocab_size: total word vocab size
|
39 |
+
:param test_dataloader: test dataset data loader [can be None]
|
40 |
+
:param lr: learning rate of optimizer
|
41 |
+
:param betas: Adam optimizer betas
|
42 |
+
:param weight_decay: Adam optimizer weight decay param
|
43 |
+
:param with_cuda: traning with cuda
|
44 |
+
:param log_freq: logging frequency of the batch iteration
|
45 |
+
"""
|
46 |
+
|
47 |
+
# Setup cuda device for BERT training, argument -c, --cuda should be true
|
48 |
+
# cuda_condition = torch.cuda.is_available() and with_cuda
|
49 |
+
# self.device = torch.device("cuda:0" if cuda_condition else "cpu")
|
50 |
+
self.device = torch.device("cpu") #torch.device("cuda:0" if cuda_condition else "cpu")
|
51 |
+
# print(cuda_condition, " Device used = ", self.device)
|
52 |
+
print(" Device used = ", self.device)
|
53 |
+
|
54 |
+
# available_gpus = list(range(torch.cuda.device_count()))
|
55 |
+
|
56 |
+
# This BERT model will be saved every epoch
|
57 |
+
self.model = bertFinetunedClassifierwithFeats.to("cpu")
|
58 |
+
print(self.model.parameters())
|
59 |
+
for param in self.model.parameters():
|
60 |
+
param.requires_grad = False
|
61 |
+
# Initialize the BERT Language Model, with BERT model
|
62 |
+
# self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
|
63 |
+
# self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
|
64 |
+
# self.model = bertFinetunedClassifierwithFeats
|
65 |
+
# print(self.model.bert.parameters())
|
66 |
+
# for param in self.model.bert.parameters():
|
67 |
+
# param.requires_grad = False
|
68 |
+
# BERTForClassificationWithFeats(self.bert, num_labels, 18).to(self.device)
|
69 |
+
|
70 |
+
# self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device)
|
71 |
+
# Distributed GPU training if CUDA can detect more than 1 GPU
|
72 |
+
# if with_cuda and torch.cuda.device_count() > 1:
|
73 |
+
# print("Using %d GPUS for BERT" % torch.cuda.device_count())
|
74 |
+
# self.model = nn.DataParallel(self.model, device_ids=available_gpus)
|
75 |
+
|
76 |
+
# Setting the train, validation and test data loader
|
77 |
+
# self.train_data = train_dataloader
|
78 |
+
# self.val_data = val_dataloader
|
79 |
+
self.test_data = test_dataloader
|
80 |
+
|
81 |
+
# self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9
|
82 |
+
self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
|
83 |
+
# self.optim_schedule = ScheduledOptim(self.optim, self.model.bert.hidden, n_warmup_steps=warmup_steps)
|
84 |
+
# self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
|
85 |
+
self.criterion = nn.CrossEntropyLoss()
|
86 |
+
|
87 |
+
# if num_labels == 1:
|
88 |
+
# self.criterion = nn.MSELoss()
|
89 |
+
# elif num_labels == 2:
|
90 |
+
# self.criterion = nn.BCEWithLogitsLoss()
|
91 |
+
# # self.criterion = nn.CrossEntropyLoss()
|
92 |
+
# elif num_labels > 2:
|
93 |
+
# self.criterion = nn.CrossEntropyLoss()
|
94 |
+
# self.criterion = nn.BCEWithLogitsLoss()
|
95 |
+
|
96 |
+
|
97 |
+
self.log_freq = log_freq
|
98 |
+
self.log_folder_path = log_folder_path
|
99 |
+
# self.workspace_name = workspace_name
|
100 |
+
# self.finetune_task = finetune_task
|
101 |
+
# self.save_model = False
|
102 |
+
# self.avg_loss = 10000
|
103 |
+
self.start_time = time.time()
|
104 |
+
# self.probability_list = []
|
105 |
+
for fi in ['test']: #'val',
|
106 |
+
f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
|
107 |
+
f.close()
|
108 |
+
print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
|
109 |
+
|
110 |
+
# def train(self, epoch):
|
111 |
+
# self.iteration(epoch, self.train_data)
|
112 |
+
|
113 |
+
# def val(self, epoch):
|
114 |
+
# self.iteration(epoch, self.val_data, phase="val")
|
115 |
+
|
116 |
+
def test(self, epoch):
|
117 |
+
# if epoch == 0:
|
118 |
+
# self.avg_loss = 10000
|
119 |
+
self.iteration(epoch, self.test_data, phase="test")
|
120 |
+
|
121 |
+
def iteration(self, epoch, data_loader, phase="train"):
|
122 |
+
"""
|
123 |
+
loop over the data_loader for training or testing
|
124 |
+
if on train status, backward operation is activated
|
125 |
+
and also auto save the model every peoch
|
126 |
+
|
127 |
+
:param epoch: current epoch index
|
128 |
+
:param data_loader: torch.utils.data.DataLoader for iteration
|
129 |
+
:param train: boolean value of is train or test
|
130 |
+
:return: None
|
131 |
+
"""
|
132 |
+
|
133 |
+
# Setting the tqdm progress bar
|
134 |
+
data_iter = tqdm.tqdm(enumerate(data_loader),
|
135 |
+
desc="EP_%s:%d" % (phase, epoch),
|
136 |
+
total=len(data_loader),
|
137 |
+
bar_format="{l_bar}{r_bar}")
|
138 |
+
|
139 |
+
avg_loss = 0.0
|
140 |
+
total_correct = 0
|
141 |
+
total_element = 0
|
142 |
+
plabels = []
|
143 |
+
tlabels = []
|
144 |
+
probabs = []
|
145 |
+
positive_class_probs=[]
|
146 |
+
if phase == "train":
|
147 |
+
self.model.train()
|
148 |
+
else:
|
149 |
+
self.model.eval()
|
150 |
+
# self.probability_list = []
|
151 |
+
|
152 |
+
with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f:
|
153 |
+
sys.stdout = f
|
154 |
+
for i, data in data_iter:
|
155 |
+
# 0. batch_data will be sent into the device(GPU or cpu)
|
156 |
+
data = {key: value.to(self.device) for key, value in data.items()}
|
157 |
+
if phase == "train":
|
158 |
+
logits = self.model.forward(data["input"], data["segment_label"], data["feat"])
|
159 |
+
else:
|
160 |
+
with torch.no_grad():
|
161 |
+
logits = self.model.forward(data["input"].cpu(), data["segment_label"].cpu(), data["feat"].cpu())
|
162 |
+
|
163 |
+
logits = logits.cpu()
|
164 |
+
loss = self.criterion(logits, data["label"])
|
165 |
+
# if torch.cuda.device_count() > 1:
|
166 |
+
# loss = loss.mean()
|
167 |
+
|
168 |
+
# 3. backward and optimization only in train
|
169 |
+
# if phase == "train":
|
170 |
+
# self.optim_schedule.zero_grad()
|
171 |
+
# loss.backward()
|
172 |
+
# self.optim_schedule.step_and_update_lr()
|
173 |
+
|
174 |
+
# prediction accuracy
|
175 |
+
probs = nn.Softmax(dim=-1)(logits) # Probabilities
|
176 |
+
probabs.extend(probs.detach().cpu().numpy().tolist())
|
177 |
+
predicted_labels = torch.argmax(probs, dim=-1) #correct
|
178 |
+
# self.probability_list.append(probs)
|
179 |
+
# true_labels = torch.argmax(data["label"], dim=-1)
|
180 |
+
plabels.extend(predicted_labels.cpu().numpy())
|
181 |
+
tlabels.extend(data['label'].cpu().numpy())
|
182 |
+
positive_class_probs = [prob[1] for prob in probabs]
|
183 |
+
# Compare predicted labels to true labels and calculate accuracy
|
184 |
+
correct = (data['label'] == predicted_labels).sum().item()
|
185 |
+
|
186 |
+
avg_loss += loss.item()
|
187 |
+
total_correct += correct
|
188 |
+
# total_element += true_labels.nelement()
|
189 |
+
total_element += data["label"].nelement()
|
190 |
+
# print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element)
|
191 |
+
|
192 |
+
post_fix = {
|
193 |
+
"epoch": epoch,
|
194 |
+
"iter": i,
|
195 |
+
"avg_loss": avg_loss / (i + 1),
|
196 |
+
"avg_acc": total_correct / total_element * 100 if total_element != 0 else 0,
|
197 |
+
"loss": loss.item()
|
198 |
+
}
|
199 |
+
if i % self.log_freq == 0:
|
200 |
+
data_iter.write(str(post_fix))
|
201 |
+
|
202 |
+
precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0)
|
203 |
+
recalls = recall_score(tlabels, plabels, average="weighted")
|
204 |
+
f1_scores = f1_score(tlabels, plabels, average="weighted")
|
205 |
+
cmatrix = confusion_matrix(tlabels, plabels)
|
206 |
+
end_time = time.time()
|
207 |
+
auc_score = roc_auc_score(tlabels, positive_class_probs)
|
208 |
+
final_msg = {
|
209 |
+
"avg_loss": avg_loss / len(data_iter),
|
210 |
+
"total_acc": total_correct * 100.0 / total_element,
|
211 |
+
"precisions": precisions,
|
212 |
+
"recalls": recalls,
|
213 |
+
"f1_scores": f1_scores,
|
214 |
+
# "confusion_matrix": f"{cmatrix}",
|
215 |
+
# "true_labels": f"{tlabels}",
|
216 |
+
# "predicted_labels": f"{plabels}",
|
217 |
+
"time_taken_from_start": end_time - self.start_time,
|
218 |
+
"auc_score":auc_score
|
219 |
+
}
|
220 |
+
with open("result.txt", 'w') as file:
|
221 |
+
for key, value in final_msg.items():
|
222 |
+
file.write(f"{key}: {value}\n")
|
223 |
+
print(final_msg)
|
224 |
+
fpr, tpr, thresholds = roc_curve(tlabels, positive_class_probs)
|
225 |
+
with open("roc_data.pkl", "wb") as f:
|
226 |
+
pickle.dump((fpr, tpr, thresholds), f)
|
227 |
+
print(final_msg)
|
228 |
+
f.close()
|
229 |
+
with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1:
|
230 |
+
sys.stdout = f1
|
231 |
+
final_msg = {
|
232 |
+
"epoch": f"EP{epoch}_{phase}",
|
233 |
+
"confusion_matrix": f"{cmatrix}",
|
234 |
+
"true_labels": f"{tlabels if epoch == 0 else ''}",
|
235 |
+
"predicted_labels": f"{plabels}",
|
236 |
+
"probabilities": f"{probabs}",
|
237 |
+
"time_taken_from_start": end_time - self.start_time
|
238 |
+
}
|
239 |
+
print(final_msg)
|
240 |
+
f1.close()
|
241 |
+
sys.stdout = sys.__stdout__
|
242 |
+
sys.stdout = sys.__stdout__
|
243 |
+
|
244 |
+
|
245 |
+
|
246 |
+
class BERTFineTuneCalibratedTrainer:
|
247 |
+
|
248 |
+
def __init__(self, bertFinetunedClassifierwithFeats: BERT, #BERTForClassificationWithFeats
|
249 |
+
vocab_size: int, test_dataloader: DataLoader = None,
|
250 |
+
lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
|
251 |
+
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
|
252 |
+
num_labels=2, log_folder_path: str = None):
|
253 |
+
"""
|
254 |
+
:param bert: BERT model which you want to train
|
255 |
+
:param vocab_size: total word vocab size
|
256 |
+
:param test_dataloader: test dataset data loader [can be None]
|
257 |
+
:param lr: learning rate of optimizer
|
258 |
+
:param betas: Adam optimizer betas
|
259 |
+
:param weight_decay: Adam optimizer weight decay param
|
260 |
+
:param with_cuda: traning with cuda
|
261 |
+
:param log_freq: logging frequency of the batch iteration
|
262 |
+
"""
|
263 |
+
|
264 |
+
# Setup cuda device for BERT training, argument -c, --cuda should be true
|
265 |
+
cuda_condition = torch.cuda.is_available() and with_cuda
|
266 |
+
self.device = torch.device("cuda:0" if cuda_condition else "cpu")
|
267 |
+
print(cuda_condition, " Device used = ", self.device)
|
268 |
+
|
269 |
+
# available_gpus = list(range(torch.cuda.device_count()))
|
270 |
+
|
271 |
+
# This BERT model will be saved every epoch
|
272 |
+
self.model = bertFinetunedClassifierwithFeats
|
273 |
+
print(self.model.parameters())
|
274 |
+
for param in self.model.parameters():
|
275 |
+
param.requires_grad = False
|
276 |
+
# Initialize the BERT Language Model, with BERT model
|
277 |
+
# self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
|
278 |
+
# self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
|
279 |
+
# self.model = bertFinetunedClassifierwithFeats
|
280 |
+
# print(self.model.bert.parameters())
|
281 |
+
# for param in self.model.bert.parameters():
|
282 |
+
# param.requires_grad = False
|
283 |
+
# BERTForClassificationWithFeats(self.bert, num_labels, 18).to(self.device)
|
284 |
+
|
285 |
+
# self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device)
|
286 |
+
# Distributed GPU training if CUDA can detect more than 1 GPU
|
287 |
+
# if with_cuda and torch.cuda.device_count() > 1:
|
288 |
+
# print("Using %d GPUS for BERT" % torch.cuda.device_count())
|
289 |
+
# self.model = nn.DataParallel(self.model, device_ids=available_gpus)
|
290 |
+
|
291 |
+
# Setting the train, validation and test data loader
|
292 |
+
# self.train_data = train_dataloader
|
293 |
+
# self.val_data = val_dataloader
|
294 |
+
self.test_data = test_dataloader
|
295 |
+
|
296 |
+
# self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9
|
297 |
+
self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
|
298 |
+
# self.optim_schedule = ScheduledOptim(self.optim, self.model.bert.hidden, n_warmup_steps=warmup_steps)
|
299 |
+
# self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
|
300 |
+
self.criterion = nn.CrossEntropyLoss()
|
301 |
+
|
302 |
+
# if num_labels == 1:
|
303 |
+
# self.criterion = nn.MSELoss()
|
304 |
+
# elif num_labels == 2:
|
305 |
+
# self.criterion = nn.BCEWithLogitsLoss()
|
306 |
+
# # self.criterion = nn.CrossEntropyLoss()
|
307 |
+
# elif num_labels > 2:
|
308 |
+
# self.criterion = nn.CrossEntropyLoss()
|
309 |
+
# self.criterion = nn.BCEWithLogitsLoss()
|
310 |
+
|
311 |
+
|
312 |
+
self.log_freq = log_freq
|
313 |
+
self.log_folder_path = log_folder_path
|
314 |
+
# self.workspace_name = workspace_name
|
315 |
+
# self.finetune_task = finetune_task
|
316 |
+
# self.save_model = False
|
317 |
+
# self.avg_loss = 10000
|
318 |
+
self.start_time = time.time()
|
319 |
+
# self.probability_list = []
|
320 |
+
for fi in ['test']: #'val',
|
321 |
+
f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
|
322 |
+
f.close()
|
323 |
+
print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
|
324 |
+
|
325 |
+
# def train(self, epoch):
|
326 |
+
# self.iteration(epoch, self.train_data)
|
327 |
+
|
328 |
+
# def val(self, epoch):
|
329 |
+
# self.iteration(epoch, self.val_data, phase="val")
|
330 |
+
|
331 |
+
def test(self, epoch):
|
332 |
+
# if epoch == 0:
|
333 |
+
# self.avg_loss = 10000
|
334 |
+
self.iteration(epoch, self.test_data, phase="test")
|
335 |
+
|
336 |
+
def iteration(self, epoch, data_loader, phase="train"):
|
337 |
+
"""
|
338 |
+
loop over the data_loader for training or testing
|
339 |
+
if on train status, backward operation is activated
|
340 |
+
and also auto save the model every peoch
|
341 |
+
|
342 |
+
:param epoch: current epoch index
|
343 |
+
:param data_loader: torch.utils.data.DataLoader for iteration
|
344 |
+
:param train: boolean value of is train or test
|
345 |
+
:return: None
|
346 |
+
"""
|
347 |
+
|
348 |
+
# Setting the tqdm progress bar
|
349 |
+
data_iter = tqdm.tqdm(enumerate(data_loader),
|
350 |
+
desc="EP_%s:%d" % (phase, epoch),
|
351 |
+
total=len(data_loader),
|
352 |
+
bar_format="{l_bar}{r_bar}")
|
353 |
+
|
354 |
+
avg_loss = 0.0
|
355 |
+
total_correct = 0
|
356 |
+
total_element = 0
|
357 |
+
plabels = []
|
358 |
+
tlabels = []
|
359 |
+
probabs = []
|
360 |
+
|
361 |
+
if phase == "train":
|
362 |
+
self.model.train()
|
363 |
+
else:
|
364 |
+
self.model.eval()
|
365 |
+
# self.probability_list = []
|
366 |
+
|
367 |
+
with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f:
|
368 |
+
sys.stdout = f
|
369 |
+
for i, data in data_iter:
|
370 |
+
# 0. batch_data will be sent into the device(GPU or cpu)
|
371 |
+
# print(data_pair[0])
|
372 |
+
data = {key: value.to(self.device) for key, value in data[0].items()}
|
373 |
+
# print(f"data : {data}")
|
374 |
+
# data = {key: value.to(self.device) for key, value in data.items()}
|
375 |
+
|
376 |
+
# if phase == "train":
|
377 |
+
# logits = self.model.forward(data["input"], data["segment_label"], data["feat"])
|
378 |
+
# else:
|
379 |
+
with torch.no_grad():
|
380 |
+
# logits = self.model.forward(data["input"], data["segment_label"], data["feat"])
|
381 |
+
logits = self.model.forward(data)
|
382 |
+
|
383 |
+
loss = self.criterion(logits, data["label"])
|
384 |
+
if torch.cuda.device_count() > 1:
|
385 |
+
loss = loss.mean()
|
386 |
+
|
387 |
+
# 3. backward and optimization only in train
|
388 |
+
# if phase == "train":
|
389 |
+
# self.optim_schedule.zero_grad()
|
390 |
+
# loss.backward()
|
391 |
+
# self.optim_schedule.step_and_update_lr()
|
392 |
+
|
393 |
+
# prediction accuracy
|
394 |
+
probs = nn.Softmax(dim=-1)(logits) # Probabilities
|
395 |
+
probabs.extend(probs.detach().cpu().numpy().tolist())
|
396 |
+
predicted_labels = torch.argmax(probs, dim=-1) #correct
|
397 |
+
# self.probability_list.append(probs)
|
398 |
+
# true_labels = torch.argmax(data["label"], dim=-1)
|
399 |
+
plabels.extend(predicted_labels.cpu().numpy())
|
400 |
+
tlabels.extend(data['label'].cpu().numpy())
|
401 |
+
positive_class_probs = [prob[1] for prob in probabs]
|
402 |
+
|
403 |
+
# Compare predicted labels to true labels and calculate accuracy
|
404 |
+
correct = (data['label'] == predicted_labels).sum().item()
|
405 |
+
|
406 |
+
avg_loss += loss.item()
|
407 |
+
total_correct += correct
|
408 |
+
# total_element += true_labels.nelement()
|
409 |
+
total_element += data["label"].nelement()
|
410 |
+
# print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element)
|
411 |
+
|
412 |
+
post_fix = {
|
413 |
+
"epoch": epoch,
|
414 |
+
"iter": i,
|
415 |
+
"avg_loss": avg_loss / (i + 1),
|
416 |
+
"avg_acc": total_correct / total_element * 100 if total_element != 0 else 0,
|
417 |
+
"loss": loss.item()
|
418 |
+
}
|
419 |
+
if i % self.log_freq == 0:
|
420 |
+
data_iter.write(str(post_fix))
|
421 |
+
|
422 |
+
precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0)
|
423 |
+
recalls = recall_score(tlabels, plabels, average="weighted")
|
424 |
+
f1_scores = f1_score(tlabels, plabels, average="weighted")
|
425 |
+
cmatrix = confusion_matrix(tlabels, plabels)
|
426 |
+
auc_score = roc_auc_score(tlabels, positive_class_probs)
|
427 |
+
end_time = time.time()
|
428 |
+
final_msg = {
|
429 |
+
"avg_loss": avg_loss / len(data_iter),
|
430 |
+
"total_acc": total_correct * 100.0 / total_element,
|
431 |
+
"precisions": precisions,
|
432 |
+
"recalls": recalls,
|
433 |
+
"f1_scores": f1_scores,
|
434 |
+
"auc_score":auc_score,
|
435 |
+
# "confusion_matrix": f"{cmatrix}",
|
436 |
+
# "true_labels": f"{tlabels}",
|
437 |
+
# "predicted_labels": f"{plabels}",
|
438 |
+
"time_taken_from_start": end_time - self.start_time
|
439 |
+
}
|
440 |
+
with open("result.txt", 'w') as file:
|
441 |
+
for key, value in final_msg.items():
|
442 |
+
file.write(f"{key}: {value}\n")
|
443 |
+
with open("plabels.txt","w") as file:
|
444 |
+
file.write(plabels)
|
445 |
+
|
446 |
+
print(final_msg)
|
447 |
+
fpr, tpr, thresholds = roc_curve(tlabels, positive_class_probs)
|
448 |
+
f.close()
|
449 |
+
with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1:
|
450 |
+
sys.stdout = f1
|
451 |
+
final_msg = {
|
452 |
+
|
453 |
+
"confusion_matrix": f"{cmatrix}",
|
454 |
+
"true_labels": f"{tlabels if epoch == 0 else ''}",
|
455 |
+
"predicted_labels": f"{plabels}",
|
456 |
+
"probabilities": f"{probabs}",
|
457 |
+
"time_taken_from_start": end_time - self.start_time
|
458 |
+
}
|
459 |
+
print(final_msg)
|
460 |
+
f1.close()
|
461 |
+
sys.stdout = sys.__stdout__
|
462 |
+
sys.stdout = sys.__stdout__
|
463 |
+
|
464 |
+
|
465 |
+
|
466 |
+
def train():
|
467 |
+
parser = argparse.ArgumentParser()
|
468 |
+
|
469 |
+
parser.add_argument('-workspace_name', type=str, default=None)
|
470 |
+
parser.add_argument('-code', type=str, default=None, help="folder for pretraining outputs and logs")
|
471 |
+
parser.add_argument('-finetune_task', type=str, default=None, help="folder inside finetuning")
|
472 |
+
parser.add_argument("-attention", type=bool, default=False, help="analyse attention scores")
|
473 |
+
parser.add_argument("-diff_test_folder", type=bool, default=False, help="use for different test folder")
|
474 |
+
parser.add_argument("-embeddings", type=bool, default=False, help="get and analyse embeddings")
|
475 |
+
parser.add_argument('-embeddings_file_name', type=str, default=None, help="file name of embeddings")
|
476 |
+
parser.add_argument("-pretrain", type=bool, default=False, help="pretraining: true, or false")
|
477 |
+
# parser.add_argument('-opts', nargs='+', type=str, default=None, help='List of optional steps')
|
478 |
+
parser.add_argument("-max_mask", type=int, default=0.15, help="% of input tokens selected for masking")
|
479 |
+
# parser.add_argument("-p", "--pretrain_dataset", type=str, default="pretraining/pretrain.txt", help="pretraining dataset for bert")
|
480 |
+
# parser.add_argument("-pv", "--pretrain_val_dataset", type=str, default="pretraining/test.txt", help="pretraining validation dataset for bert")
|
481 |
+
# default="finetuning/test.txt",
|
482 |
+
parser.add_argument("-vocab_path", type=str, default="pretraining/vocab.txt", help="built vocab model path with bert-vocab")
|
483 |
+
|
484 |
+
parser.add_argument("-train_dataset_path", type=str, default="train.txt", help="fine tune train dataset for progress classifier")
|
485 |
+
parser.add_argument("-val_dataset_path", type=str, default="val.txt", help="test set for evaluate fine tune train set")
|
486 |
+
parser.add_argument("-test_dataset_path", type=str, default="test.txt", help="test set for evaluate fine tune train set")
|
487 |
+
parser.add_argument("-num_labels", type=int, default=2, help="Number of labels")
|
488 |
+
parser.add_argument("-train_label_path", type=str, default="train_label.txt", help="fine tune train dataset for progress classifier")
|
489 |
+
parser.add_argument("-val_label_path", type=str, default="val_label.txt", help="test set for evaluate fine tune train set")
|
490 |
+
parser.add_argument("-test_label_path", type=str, default="test_label.txt", help="test set for evaluate fine tune train set")
|
491 |
+
##### change Checkpoint for finetuning
|
492 |
+
parser.add_argument("-pretrained_bert_checkpoint", type=str, default=None, help="checkpoint of saved pretrained bert model")
|
493 |
+
parser.add_argument("-finetuned_bert_classifier_checkpoint", type=str, default=None, help="checkpoint of saved finetuned bert model") #."output_feb09/bert_trained.model.ep40"
|
494 |
+
#."output_feb09/bert_trained.model.ep40"
|
495 |
+
parser.add_argument('-check_epoch', type=int, default=None)
|
496 |
+
|
497 |
+
parser.add_argument("-hs", "--hidden", type=int, default=64, help="hidden size of transformer model") #64
|
498 |
+
parser.add_argument("-l", "--layers", type=int, default=4, help="number of layers") #4
|
499 |
+
parser.add_argument("-a", "--attn_heads", type=int, default=4, help="number of attention heads") #8
|
500 |
+
parser.add_argument("-s", "--seq_len", type=int, default=128, help="maximum sequence length")
|
501 |
+
|
502 |
+
parser.add_argument("-b", "--batch_size", type=int, default=500, help="number of batch_size") #64
|
503 |
+
parser.add_argument("-e", "--epochs", type=int, default=1)#1501, help="number of epochs") #501
|
504 |
+
# Use 50 for pretrain, and 10 for fine tune
|
505 |
+
parser.add_argument("-w", "--num_workers", type=int, default=0, help="dataloader worker size")
|
506 |
+
|
507 |
+
# Later run with cuda
|
508 |
+
parser.add_argument("--with_cuda", type=bool, default=False, help="training with CUDA: true, or false")
|
509 |
+
parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n")
|
510 |
+
# parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus")
|
511 |
+
parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids")
|
512 |
+
# parser.add_argument("--on_memory", type=bool, default=False, help="Loading on memory: true or false")
|
513 |
+
|
514 |
+
parser.add_argument("--dropout", type=float, default=0.1, help="dropout of network")
|
515 |
+
parser.add_argument("--lr", type=float, default=1e-05, help="learning rate of adam") #1e-3
|
516 |
+
parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam")
|
517 |
+
parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
|
518 |
+
parser.add_argument("--adam_beta2", type=float, default=0.98, help="adam first beta value") #0.999
|
519 |
+
|
520 |
+
parser.add_argument("-o", "--output_path", type=str, default="bert_trained.seq_encoder.model", help="ex)output/bert.model")
|
521 |
+
# parser.add_argument("-o", "--output_path", type=str, default="output/bert_fine_tuned.model", help="ex)output/bert.model")
|
522 |
+
|
523 |
+
args = parser.parse_args()
|
524 |
+
for k,v in vars(args).items():
|
525 |
+
if 'path' in k:
|
526 |
+
if v:
|
527 |
+
if k == "output_path":
|
528 |
+
if args.code:
|
529 |
+
setattr(args, f"{k}", args.workspace_name+f"/output/{args.code}/"+v)
|
530 |
+
elif args.finetune_task:
|
531 |
+
setattr(args, f"{k}", args.workspace_name+f"/output/{args.finetune_task}/"+v)
|
532 |
+
else:
|
533 |
+
setattr(args, f"{k}", args.workspace_name+"/output/"+v)
|
534 |
+
elif k != "vocab_path":
|
535 |
+
if args.pretrain:
|
536 |
+
setattr(args, f"{k}", args.workspace_name+"/pretraining/"+v)
|
537 |
+
else:
|
538 |
+
if args.code:
|
539 |
+
setattr(args, f"{k}", args.workspace_name+f"/{args.code}/"+v)
|
540 |
+
elif args.finetune_task:
|
541 |
+
if args.diff_test_folder and "test" in k:
|
542 |
+
setattr(args, f"{k}", args.workspace_name+f"/finetuning/"+v)
|
543 |
+
else:
|
544 |
+
setattr(args, f"{k}", args.workspace_name+f"/finetuning/{args.finetune_task}/"+v)
|
545 |
+
else:
|
546 |
+
setattr(args, f"{k}", args.workspace_name+"/finetuning/"+v)
|
547 |
+
else:
|
548 |
+
setattr(args, f"{k}", args.workspace_name+"/"+v)
|
549 |
+
|
550 |
+
print(f"args.{k} : {getattr(args, f'{k}')}")
|
551 |
+
|
552 |
+
print("Loading Vocab", args.vocab_path)
|
553 |
+
vocab_obj = Vocab(args.vocab_path)
|
554 |
+
vocab_obj.load_vocab()
|
555 |
+
print("Vocab Size: ", len(vocab_obj.vocab))
|
556 |
+
|
557 |
+
|
558 |
+
print("Testing using finetuned model......")
|
559 |
+
print("Loading Test Dataset", args.test_dataset_path)
|
560 |
+
test_dataset = TokenizerDataset(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len)
|
561 |
+
# test_dataset = TokenizerDatasetForCalibration(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len)
|
562 |
+
|
563 |
+
print("Creating Dataloader...")
|
564 |
+
test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
|
565 |
+
|
566 |
+
print("Load fine-tuned BERT classifier model with feats")
|
567 |
+
# cuda_condition = torch.cuda.is_available() and args.with_cuda
|
568 |
+
device = torch.device("cpu") #torch.device("cuda:0" if cuda_condition else "cpu")
|
569 |
+
finetunedBERTclassifier = torch.load(args.finetuned_bert_classifier_checkpoint, map_location=device)
|
570 |
+
if isinstance(finetunedBERTclassifier, torch.nn.DataParallel):
|
571 |
+
finetunedBERTclassifier = finetunedBERTclassifier.module
|
572 |
+
|
573 |
+
new_log_folder = f"{args.workspace_name}/logs"
|
574 |
+
new_output_folder = f"{args.workspace_name}/output"
|
575 |
+
if args.finetune_task: # is sent almost all the time
|
576 |
+
new_log_folder = f"{args.workspace_name}/logs/{args.finetune_task}"
|
577 |
+
new_output_folder = f"{args.workspace_name}/output/{args.finetune_task}"
|
578 |
+
|
579 |
+
if not os.path.exists(new_log_folder):
|
580 |
+
os.makedirs(new_log_folder)
|
581 |
+
if not os.path.exists(new_output_folder):
|
582 |
+
os.makedirs(new_output_folder)
|
583 |
+
|
584 |
+
print("Creating BERT Fine Tuned Test Trainer")
|
585 |
+
trainer = BERTFineTuneTrainer(finetunedBERTclassifier,
|
586 |
+
len(vocab_obj.vocab), test_dataloader=test_data_loader,
|
587 |
+
lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
|
588 |
+
with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq,
|
589 |
+
workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder)
|
590 |
+
|
591 |
+
# trainer = BERTFineTuneCalibratedTrainer(finetunedBERTclassifier,
|
592 |
+
# len(vocab_obj.vocab), test_dataloader=test_data_loader,
|
593 |
+
# lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
|
594 |
+
# with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq,
|
595 |
+
# workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder)
|
596 |
+
print("Testing fine-tuned model Start....")
|
597 |
+
start_time = time.time()
|
598 |
+
repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs)
|
599 |
+
counter = 0
|
600 |
+
# patience = 10
|
601 |
+
for epoch in repoch:
|
602 |
+
print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
|
603 |
+
trainer.test(epoch)
|
604 |
+
# pickle.dump(trainer.probability_list, open(f"{args.workspace_name}/output/aaai/change4_mid_prob_{epoch}.pkl","wb"))
|
605 |
+
print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
|
606 |
+
end_time = time.time()
|
607 |
+
print("Time Taken to fine-tune model = ", end_time - start_time)
|
608 |
+
print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}')
|
609 |
+
|
610 |
+
|
611 |
+
|
612 |
+
if __name__ == "__main__":
|
613 |
+
train()
|
plot.png
ADDED
![]() |
prepare_pretraining_input_vocab_file.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ratio_proportion_change3/finetuning/test.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da39d07824b2cfc3a41445694ff65018b1ffdf3e9b844d464cdba3c0ad6a8b87
|
3 |
+
size 6876678
|
ratio_proportion_change3/finetuning/test_in.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5029b0f965c4f6f1d5dd981778daf0b8f0f778dd71ecad7eb984e8461fa85b9
|
3 |
+
size 1318665
|
ratio_proportion_change3/finetuning/test_in_info.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:044b445c06dbdecb8663e5db8d6f270799240f1b433a169c335c15e566dbba20
|
3 |
+
size 1660506
|
ratio_proportion_change3/finetuning/test_in_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c035490a97515200f23348bf01bd3c16def88046a7c2215d9ef169ffc089d0d
|
3 |
+
size 17202
|
ratio_proportion_change3/finetuning/test_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6fee24daa1523d1a8d7615c415fac559d0bf85ace5ab18d9db1a8dff533ff68
|
3 |
+
size 79424
|
ratio_proportion_change3/finetuning/testr_in_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b95faf33529a8cdbcedfca3853be88f917e730c79261731c4860f0d57909f13f
|
3 |
+
size 97701
|
ratio_proportion_change3/finetuning/testr_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29519e69e1ec480ae0440e23dcb57f97bbb33cdd9b91d18e5e999d3e7e58288c
|
3 |
+
size 549160
|
ratio_proportion_change3/finetuning/train.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b397618386eb7cd21cf59251b4d48c1880330477c3186375a039047f181beae
|
3 |
+
size 775465
|
ratio_proportion_change3/finetuning/train_in.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b935dcf7dbbe3ad66c2616ae3e6c342d9d1b162c4931c7a291386c5ce609ce0
|
3 |
+
size 1656785
|
ratio_proportion_change3/finetuning/train_in_info.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5663b5706417ba65ec10abecf405f5644dfa637683fe1198ea937b8838cba6a
|
3 |
+
size 2411977
|
ratio_proportion_change3/finetuning/train_in_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e38fd99af6313174626b81cad3f5a6b6e88711f9f66f57cb5c3b0e6bc2e8b4c
|
3 |
+
size 17202
|
ratio_proportion_change3/finetuning/train_info.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9968e038b75a633b4957602e37d57b7c0cb561f9ae3c2b17ad0f9eb48b554c21
|
3 |
+
size 1080190
|
ratio_proportion_change3/finetuning/train_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de505197183cefe6a1c5ff4f5cd8e07dc14ed1b601951d7c3e02947d603e58c6
|
3 |
+
size 8932
|
ratio_proportion_change3/finetuning/trainr_in_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95e450636dcb476a258439c94249f1078e9186bfe00d8e70da7b9c339f4f728c
|
3 |
+
size 129011
|
ratio_proportion_change3/finetuning/trainr_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0294122c85237764e51d69d2efc5233d2c3a0d1027b31b4f510ca68bd6e46bc1
|
3 |
+
size 61542
|
ratio_proportion_change3/logs/masked/log_test_10per_finetuned.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d42d75c9a38be298f8ee1f022a544fe49804b72979a734b42aea08f7b31fb52
|
3 |
+
size 671476
|
ratio_proportion_change3/logs/masked/log_test_FS_finetuned.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:858fc5350a9bf0c75d46b8af1dc3b0f310bab1a0afa92ca8bca1e829b57d0b73
|
3 |
+
size 149839
|
ratio_proportion_change3/logs/masked/log_test_IS_finetuned.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c66fcb29fc9f3d92bed511d4a91530ad79a13860b93e418f0b8c6c1be0e54169
|
3 |
+
size 149828
|
ratio_proportion_change3/logs/masked/log_test_pretrained.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df1193ce0490717b442303f51da68869c6419f461ce5044b5a275b40e7bfb368
|
3 |
+
size 1055582
|
ratio_proportion_change3/logs/masked/log_train_10per_finetuned.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf58b6b3ba0d0a9562cfd510ce1a7bff20a4bb0ee1faa907397314333d26dcd2
|
3 |
+
size 88900
|
ratio_proportion_change3/logs/masked/log_train_FS_finetuned.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e92c0a8722c7b21b36f5028493692ccf32b473c20d3f6027d54e5fd822960432
|
3 |
+
size 167286
|
ratio_proportion_change3/logs/masked/log_train_IS_finetuned.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:644994580015b35979dce25d0e2b3be7b6ef6d02193a1b0ea6d10411412c5495
|
3 |
+
size 167148
|
ratio_proportion_change3/logs/masked/log_train_pretrained.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6095cbd3be17925bc64b05902281c01c2c3255df63ea2e5cd48b5d402c06033b
|
3 |
+
size 4116343
|
ratio_proportion_change3/output/FS/train.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:367628f1b9aa5047a07d5eb6e574e166e12d533d18a1634045424736bff9cc42
|
3 |
+
size 1699339
|
ratio_proportion_change3/output/FS/train_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be1eeaf1d96b6010aec2db568d20170e79d5e53bb790e250074f877931ab23d3
|
3 |
+
size 20636
|
ratio_proportion_change3/output/IS/train.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc804d5d3a54d0cbe69b295464378609916a2c5b2a8c0696757d20be185e1427
|
3 |
+
size 1361007
|
ratio_proportion_change3/output/IS/train_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a394ba9e86a56b82422fd9b7a7212bde72eae95fbd8d899e0e9fb9c21132a605
|
3 |
+
size 20636
|
ratio_proportion_change3/output/correctness/bert_fine_tuned.model.ep48
ADDED
Binary file (974 kB). View file
|
|
ratio_proportion_change3/output/correctness/test.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0faf7af6b63c26cb29b586e087c84881365a94b22d71f1a8587bfa979f2d5794
|
3 |
+
size 6253326
|
ratio_proportion_change3/output/correctness/test_label.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:708d181754296d2bbbe56ce509eb896ca69bd2d7a418839c0a09836bf1c31541
|
3 |
+
size 75023
|
ratio_proportion_change3/output/effectiveness/bert_fine_tuned.model.ep28
ADDED
Binary file (974 kB). View file
|
|