asaduzzaman607 commited on
Commit
8dac844
·
1 Parent(s): 887060c

Add other files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. CustomBERTModel.py +33 -0
  3. Untitled.ipynb +0 -0
  4. __pycache__/metrics.cpython-312.pyc +0 -0
  5. __pycache__/recalibration.cpython-312.pyc +0 -0
  6. __pycache__/visualization.cpython-312.pyc +0 -0
  7. data_preprocessor.py +170 -0
  8. hint_fine_tuning.py +382 -0
  9. main.py +322 -0
  10. metrics.py +149 -0
  11. new_fine_tuning/.DS_Store +0 -0
  12. new_fine_tuning/README.md +197 -0
  13. new_fine_tuning/__pycache__/metrics.cpython-312.pyc +0 -0
  14. new_fine_tuning/__pycache__/recalibration.cpython-312.pyc +0 -0
  15. new_fine_tuning/__pycache__/visualization.cpython-312.pyc +0 -0
  16. new_hint_fine_tuned.py +131 -0
  17. new_test_saved_finetuned_model.py +613 -0
  18. plot.png +0 -0
  19. prepare_pretraining_input_vocab_file.py +0 -0
  20. ratio_proportion_change3/finetuning/test.txt +3 -0
  21. ratio_proportion_change3/finetuning/test_in.txt +3 -0
  22. ratio_proportion_change3/finetuning/test_in_info.txt +3 -0
  23. ratio_proportion_change3/finetuning/test_in_label.txt +3 -0
  24. ratio_proportion_change3/finetuning/test_label.txt +3 -0
  25. ratio_proportion_change3/finetuning/testr_in_label.txt +3 -0
  26. ratio_proportion_change3/finetuning/testr_label.txt +3 -0
  27. ratio_proportion_change3/finetuning/train.txt +3 -0
  28. ratio_proportion_change3/finetuning/train_in.txt +3 -0
  29. ratio_proportion_change3/finetuning/train_in_info.txt +3 -0
  30. ratio_proportion_change3/finetuning/train_in_label.txt +3 -0
  31. ratio_proportion_change3/finetuning/train_info.txt +3 -0
  32. ratio_proportion_change3/finetuning/train_label.txt +3 -0
  33. ratio_proportion_change3/finetuning/trainr_in_label.txt +3 -0
  34. ratio_proportion_change3/finetuning/trainr_label.txt +3 -0
  35. ratio_proportion_change3/logs/masked/log_test_10per_finetuned.txt +3 -0
  36. ratio_proportion_change3/logs/masked/log_test_FS_finetuned.txt +3 -0
  37. ratio_proportion_change3/logs/masked/log_test_IS_finetuned.txt +3 -0
  38. ratio_proportion_change3/logs/masked/log_test_pretrained.txt +3 -0
  39. ratio_proportion_change3/logs/masked/log_train_10per_finetuned.txt +3 -0
  40. ratio_proportion_change3/logs/masked/log_train_FS_finetuned.txt +3 -0
  41. ratio_proportion_change3/logs/masked/log_train_IS_finetuned.txt +3 -0
  42. ratio_proportion_change3/logs/masked/log_train_pretrained.txt +3 -0
  43. ratio_proportion_change3/output/FS/train.txt +3 -0
  44. ratio_proportion_change3/output/FS/train_label.txt +3 -0
  45. ratio_proportion_change3/output/IS/train.txt +3 -0
  46. ratio_proportion_change3/output/IS/train_label.txt +3 -0
  47. ratio_proportion_change3/output/correctness/bert_fine_tuned.model.ep48 +0 -0
  48. ratio_proportion_change3/output/correctness/test.txt +3 -0
  49. ratio_proportion_change3/output/correctness/test_label.txt +3 -0
  50. ratio_proportion_change3/output/effectiveness/bert_fine_tuned.model.ep28 +0 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
CustomBERTModel.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from src.bert import BERT
4
+
5
+ class CustomBERTModel(nn.Module):
6
+ def __init__(self, vocab_size, output_dim, pre_trained_model_path):
7
+ super(CustomBERTModel, self).__init__()
8
+ hidden_size = 768
9
+ self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=4, attn_heads=8, dropout=0.1)
10
+
11
+ # Load the pre-trained model's state_dict
12
+ checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
13
+ if isinstance(checkpoint, dict):
14
+ self.bert.load_state_dict(checkpoint)
15
+ else:
16
+ raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.")
17
+
18
+ # Fully connected layer with input size 768 (matching BERT hidden size)
19
+ self.fc = nn.Linear(hidden_size, output_dim)
20
+
21
+ def forward(self, sequence, segment_info):
22
+ sequence = sequence.to(next(self.parameters()).device)
23
+ segment_info = segment_info.to(sequence.device)
24
+
25
+ x = self.bert(sequence, segment_info)
26
+ print(f"BERT output shape: {x.shape}")
27
+
28
+ cls_embeddings = x[:, 0] # Extract CLS token embeddings
29
+ print(f"CLS Embeddings shape: {cls_embeddings.shape}")
30
+
31
+ logits = self.fc(cls_embeddings) # Pass tensor of size (batch_size, 768) to the fully connected layer
32
+
33
+ return logits
Untitled.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
__pycache__/metrics.cpython-312.pyc ADDED
Binary file (9.14 kB). View file
 
__pycache__/recalibration.cpython-312.pyc ADDED
Binary file (5.49 kB). View file
 
__pycache__/visualization.cpython-312.pyc ADDED
Binary file (5.26 kB). View file
 
data_preprocessor.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+
4
+ import sys
5
+
6
+ class DataPreprocessor:
7
+ def __init__(self, input_file_path):
8
+ self.input_file_path = input_file_path
9
+ self.unique_students = None
10
+ self.unique_problems = None
11
+ self.unique_prob_hierarchy = None
12
+ self.unique_steps = None
13
+ self.unique_kcs = None
14
+
15
+ def analyze_dataset(self):
16
+ file_iterator = self.load_file_iterator()
17
+
18
+ start_time = time.time()
19
+ self.unique_students = {"st"}
20
+ self.unique_problems = {"pr"}
21
+ self.unique_prob_hierarchy = {"ph"}
22
+ self.unique_kcs = {"kc"}
23
+ for chunk_data in file_iterator:
24
+ for student_id, std_groups in chunk_data.groupby('Anon Student Id'):
25
+ self.unique_students.update({student_id})
26
+ prob_hierarchy = std_groups.groupby('Level (Workspace Id)')
27
+ for hierarchy, hierarchy_groups in prob_hierarchy:
28
+ self.unique_prob_hierarchy.update({hierarchy})
29
+ prob_name = hierarchy_groups.groupby('Problem Name')
30
+ for problem_name, prob_name_groups in prob_name:
31
+ self.unique_problems.update({problem_name})
32
+ sub_skills = prob_name_groups['KC Model(MATHia)']
33
+ for a in sub_skills:
34
+ if str(a) != "nan":
35
+ temp = a.split("~~")
36
+ for kc in temp:
37
+ self.unique_kcs.update({kc})
38
+ self.unique_students.remove("st")
39
+ self.unique_problems.remove("pr")
40
+ self.unique_prob_hierarchy.remove("ph")
41
+ self.unique_kcs.remove("kc")
42
+ end_time = time.time()
43
+ print("Time Taken to analyze dataset = ", end_time - start_time)
44
+ print("Length of unique students->", len(self.unique_students))
45
+ print("Length of unique problems->", len(self.unique_problems))
46
+ print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy))
47
+ print("Length of Unique Knowledge components ->", len(self.unique_kcs))
48
+
49
+ def analyze_dataset_by_section(self, workspace_name):
50
+ file_iterator = self.load_file_iterator()
51
+
52
+ start_time = time.time()
53
+ self.unique_students = {"st"}
54
+ self.unique_problems = {"pr"}
55
+ self.unique_prob_hierarchy = {"ph"}
56
+ self.unique_steps = {"s"}
57
+ self.unique_kcs = {"kc"}
58
+ # with open("workspace_info.txt", 'a') as f:
59
+ # sys.stdout = f
60
+ for chunk_data in file_iterator:
61
+ for student_id, std_groups in chunk_data.groupby('Anon Student Id'):
62
+ prob_hierarchy = std_groups.groupby('Level (Workspace Id)')
63
+ for hierarchy, hierarchy_groups in prob_hierarchy:
64
+ if workspace_name == hierarchy:
65
+ # print("Workspace : ", hierarchy)
66
+ self.unique_students.update({student_id})
67
+ self.unique_prob_hierarchy.update({hierarchy})
68
+ prob_name = hierarchy_groups.groupby('Problem Name')
69
+ for problem_name, prob_name_groups in prob_name:
70
+ self.unique_problems.update({problem_name})
71
+ step_names = prob_name_groups['Step Name']
72
+ sub_skills = prob_name_groups['KC Model(MATHia)']
73
+ for step in step_names:
74
+ if str(step) != "nan":
75
+ self.unique_steps.update({step})
76
+ for a in sub_skills:
77
+ if str(a) != "nan":
78
+ temp = a.split("~~")
79
+ for kc in temp:
80
+ self.unique_kcs.update({kc})
81
+ self.unique_problems.remove("pr")
82
+ self.unique_prob_hierarchy.remove("ph")
83
+ self.unique_steps.remove("s")
84
+ self.unique_kcs.remove("kc")
85
+ end_time = time.time()
86
+ print("Time Taken to analyze dataset = ", end_time - start_time)
87
+ print("Workspace-> ",workspace_name)
88
+ print("Length of unique students->", len(self.unique_students))
89
+ print("Length of unique problems->", len(self.unique_problems))
90
+ print("Length of unique problem hierarchy->", len(self.unique_prob_hierarchy))
91
+ print("Length of unique step names ->", len(self.unique_steps))
92
+ print("Length of unique knowledge components ->", len(self.unique_kcs))
93
+ # f.close()
94
+ # sys.stdout = sys.__stdout__
95
+
96
+ def analyze_dataset_by_school(self, workspace_name, school_id=None):
97
+ file_iterator = self.load_file_iterator(sep=",")
98
+
99
+ start_time = time.time()
100
+ self.unique_schools = set()
101
+ self.unique_class = set()
102
+ self.unique_students = set()
103
+ self.unique_problems = set()
104
+ self.unique_steps = set()
105
+ self.unique_kcs = set()
106
+ self.unique_actions = set()
107
+ self.unique_outcomes = set()
108
+ self.unique_new_steps_w_action_attempt = set()
109
+ self.unique_new_steps_w_kcs = set()
110
+ self.unique_new_steps_w_action_attempt_kcs = set()
111
+
112
+ for chunk_data in file_iterator:
113
+ for school, school_group in chunk_data.groupby('CF (Anon School Id)'):
114
+ # if school and school == school_id:
115
+ self.unique_schools.add(school)
116
+ for class_id, class_group in school_group.groupby('CF (Anon Class Id)'):
117
+ self.unique_class.add(class_id)
118
+ for student_id, std_group in class_group.groupby('Anon Student Id'):
119
+ self.unique_students.add(student_id)
120
+ for prob, prob_group in std_group.groupby('Problem Name'):
121
+ self.unique_problems.add(prob)
122
+
123
+ step_names = set(prob_group['Step Name'])
124
+ sub_skills = set(prob_group['KC Model(MATHia)'])
125
+ actions = set(prob_group['Action'])
126
+ outcomes = set(prob_group['Outcome'])
127
+
128
+ self.unique_steps.update(step_names)
129
+ self.unique_kcs.update(sub_skills)
130
+ self.unique_actions.update(actions)
131
+ self.unique_outcomes.update(outcomes)
132
+
133
+ for step in step_names:
134
+ if pd.isna(step):
135
+ step_group = prob_group[pd.isna(prob_group['Step Name'])]
136
+ else:
137
+ step_group = prob_group[prob_group['Step Name']==step]
138
+
139
+ for kc in set(step_group['KC Model(MATHia)']):
140
+ new_step = f"{step}:{kc}"
141
+ self.unique_new_steps_w_kcs.add(new_step)
142
+
143
+ for action, action_group in step_group.groupby('Action'):
144
+ for attempt, attempt_group in action_group.groupby('Attempt At Step'):
145
+ new_step = f"{step}:{action}:{attempt}"
146
+ self.unique_new_steps_w_action_attempt.add(new_step)
147
+
148
+ for kc in set(attempt_group["KC Model(MATHia)"]):
149
+ new_step = f"{step}:{action}:{attempt}:{kc}"
150
+ self.unique_new_steps_w_action_attempt_kcs.add(new_step)
151
+
152
+
153
+ end_time = time.time()
154
+ print("Time Taken to analyze dataset = ", end_time - start_time)
155
+ print("Workspace-> ",workspace_name)
156
+ print("Length of unique students->", len(self.unique_students))
157
+ print("Length of unique problems->", len(self.unique_problems))
158
+ print("Length of unique classes->", len(self.unique_class))
159
+ print("Length of unique step names ->", len(self.unique_steps))
160
+ print("Length of unique knowledge components ->", len(self.unique_kcs))
161
+ print("Length of unique actions ->", len(self.unique_actions))
162
+ print("Length of unique outcomes ->", len(self.unique_outcomes))
163
+ print("Length of unique new step names with actions and attempts ->", len(self.unique_new_steps_w_action_attempt))
164
+ print("Length of unique new step names with actions, attempts and kcs ->", len(self.unique_new_steps_w_action_attempt_kcs))
165
+ print("Length of unique new step names with kcs ->", len(self.unique_new_steps_w_kcs))
166
+
167
+ def load_file_iterator(self, sep="\t"):
168
+ chunk_iterator = pd.read_csv(self.input_file_path, sep=sep, header=0, iterator=True, chunksize=1000000)
169
+ return chunk_iterator
170
+
hint_fine_tuning.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import sys
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.utils.data import DataLoader, random_split, TensorDataset
7
+ from src.dataset import TokenizerDataset
8
+ from src.bert import BERT
9
+ from src.pretrainer import BERTFineTuneTrainer1
10
+ from src.vocab import Vocab
11
+ import pandas as pd
12
+
13
+
14
+ # class CustomBERTModel(nn.Module):
15
+ # def __init__(self, vocab_size, output_dim, pre_trained_model_path):
16
+ # super(CustomBERTModel, self).__init__()
17
+ # hidden_size = 768
18
+ # self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=12, attn_heads=12, dropout=0.1)
19
+ # checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
20
+ # if isinstance(checkpoint, dict):
21
+ # self.bert.load_state_dict(checkpoint)
22
+ # elif isinstance(checkpoint, BERT):
23
+ # self.bert = checkpoint
24
+ # else:
25
+ # raise TypeError(f"Expected state_dict or BERT instance, got {type(checkpoint)} instead.")
26
+ # self.fc = nn.Linear(hidden_size, output_dim)
27
+
28
+ # def forward(self, sequence, segment_info):
29
+ # sequence = sequence.to(next(self.parameters()).device)
30
+ # segment_info = segment_info.to(sequence.device)
31
+
32
+ # if sequence.size(0) == 0 or sequence.size(1) == 0:
33
+ # raise ValueError("Input sequence tensor has 0 elements. Check data preprocessing.")
34
+
35
+ # x = self.bert(sequence, segment_info)
36
+ # print(f"BERT output shape: {x.shape}")
37
+
38
+ # if x.size(0) == 0 or x.size(1) == 0:
39
+ # raise ValueError("BERT output tensor has 0 elements. Check input dimensions.")
40
+
41
+ # cls_embeddings = x[:, 0]
42
+ # logits = self.fc(cls_embeddings)
43
+ # return logits
44
+
45
+ # class CustomBERTModel(nn.Module):
46
+ # def __init__(self, vocab_size, output_dim, pre_trained_model_path):
47
+ # super(CustomBERTModel, self).__init__()
48
+ # hidden_size = 764 # Ensure this is 768
49
+ # self.bert = BERT(vocab_size=vocab_size, hidden=hidden_size, n_layers=12, attn_heads=12, dropout=0.1)
50
+
51
+ # # Load the pre-trained model's state_dict
52
+ # checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
53
+ # if isinstance(checkpoint, dict):
54
+ # self.bert.load_state_dict(checkpoint)
55
+ # else:
56
+ # raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.")
57
+
58
+ # # Fully connected layer with input size 768
59
+ # self.fc = nn.Linear(hidden_size, output_dim)
60
+
61
+ # def forward(self, sequence, segment_info):
62
+ # sequence = sequence.to(next(self.parameters()).device)
63
+ # segment_info = segment_info.to(sequence.device)
64
+
65
+ # x = self.bert(sequence, segment_info)
66
+ # print(f"BERT output shape: {x.shape}") # Should output (batch_size, seq_len, 768)
67
+
68
+ # cls_embeddings = x[:, 0] # Extract CLS token embeddings
69
+ # print(f"CLS Embeddings shape: {cls_embeddings.shape}") # Should output (batch_size, 768)
70
+
71
+ # logits = self.fc(cls_embeddings) # Should now pass a tensor of size (batch_size, 768) to `fc`
72
+
73
+ # return logits
74
+
75
+
76
+ # for test
77
+ class CustomBERTModel(nn.Module):
78
+ def __init__(self, vocab_size, output_dim, pre_trained_model_path):
79
+ super(CustomBERTModel, self).__init__()
80
+ self.hidden = 764 # Ensure this is defined correctly
81
+ self.bert = BERT(vocab_size=vocab_size, hidden=self.hidden, n_layers=12, attn_heads=12, dropout=0.1)
82
+
83
+ # Load the pre-trained model's state_dict
84
+ checkpoint = torch.load(pre_trained_model_path, map_location=torch.device('cpu'))
85
+ if isinstance(checkpoint, dict):
86
+ self.bert.load_state_dict(checkpoint)
87
+ else:
88
+ raise TypeError(f"Expected state_dict, got {type(checkpoint)} instead.")
89
+
90
+ self.fc = nn.Linear(self.hidden, output_dim)
91
+
92
+ def forward(self, sequence, segment_info):
93
+ x = self.bert(sequence, segment_info)
94
+ cls_embeddings = x[:, 0] # Extract CLS token embeddings
95
+ logits = self.fc(cls_embeddings) # Pass to fully connected layer
96
+ return logits
97
+
98
+ def preprocess_labels(label_csv_path):
99
+ try:
100
+ labels_df = pd.read_csv(label_csv_path)
101
+ labels = labels_df['last_hint_class'].values.astype(int)
102
+ return torch.tensor(labels, dtype=torch.long)
103
+ except Exception as e:
104
+ print(f"Error reading dataset file: {e}")
105
+ return None
106
+
107
+
108
+ def preprocess_data(data_path, vocab, max_length=128):
109
+ try:
110
+ with open(data_path, 'r') as f:
111
+ sequences = f.readlines()
112
+ except Exception as e:
113
+ print(f"Error reading data file: {e}")
114
+ return None, None
115
+
116
+ if len(sequences) == 0:
117
+ raise ValueError(f"No sequences found in data file {data_path}. Check the file content.")
118
+
119
+ tokenized_sequences = []
120
+
121
+ for sequence in sequences:
122
+ sequence = sequence.strip()
123
+ if sequence:
124
+ encoded = vocab.to_seq(sequence, seq_len=max_length)
125
+ encoded = encoded[:max_length] + [vocab.vocab.get('[PAD]', 0)] * (max_length - len(encoded))
126
+ segment_label = [0] * max_length
127
+
128
+ tokenized_sequences.append({
129
+ 'input_ids': torch.tensor(encoded),
130
+ 'segment_label': torch.tensor(segment_label)
131
+ })
132
+
133
+ if not tokenized_sequences:
134
+ raise ValueError("Tokenization resulted in an empty list. Check the sequences and tokenization logic.")
135
+
136
+ tokenized_sequences = [t for t in tokenized_sequences if len(t['input_ids']) == max_length]
137
+
138
+ if not tokenized_sequences:
139
+ raise ValueError("All tokenized sequences are of unexpected length. This suggests an issue with the tokenization logic.")
140
+
141
+ input_ids = torch.cat([t['input_ids'].unsqueeze(0) for t in tokenized_sequences], dim=0)
142
+ segment_labels = torch.cat([t['segment_label'].unsqueeze(0) for t in tokenized_sequences], dim=0)
143
+
144
+ print(f"Input IDs shape: {input_ids.shape}")
145
+ print(f"Segment labels shape: {segment_labels.shape}")
146
+
147
+ return input_ids, segment_labels
148
+
149
+
150
+ def collate_fn(batch):
151
+ inputs = []
152
+ labels = []
153
+ segment_labels = []
154
+
155
+ for item in batch:
156
+ if item is None:
157
+ continue
158
+
159
+ if isinstance(item, dict):
160
+ inputs.append(item['input_ids'].unsqueeze(0))
161
+ labels.append(item['label'].unsqueeze(0))
162
+ segment_labels.append(item['segment_label'].unsqueeze(0))
163
+
164
+ if len(inputs) == 0 or len(segment_labels) == 0:
165
+ print("Empty batch encountered. Returning None to skip this batch.")
166
+ return None
167
+
168
+ try:
169
+ inputs = torch.cat(inputs, dim=0)
170
+ labels = torch.cat(labels, dim=0)
171
+ segment_labels = torch.cat(segment_labels, dim=0)
172
+ except Exception as e:
173
+ print(f"Error concatenating tensors: {e}")
174
+ return None
175
+
176
+ return {
177
+ 'input': inputs,
178
+ 'label': labels,
179
+ 'segment_label': segment_labels
180
+ }
181
+
182
+ def custom_collate_fn(batch):
183
+ processed_batch = collate_fn(batch)
184
+
185
+ if processed_batch is None or len(processed_batch['input']) == 0:
186
+ # Return a valid batch with at least one element instead of an empty one
187
+ return {
188
+ 'input': torch.zeros((1, 128), dtype=torch.long),
189
+ 'label': torch.zeros((1,), dtype=torch.long),
190
+ 'segment_label': torch.zeros((1, 128), dtype=torch.long)
191
+ }
192
+
193
+ return processed_batch
194
+
195
+
196
+ def train_without_progress_status(trainer, epoch, shuffle):
197
+ for epoch_idx in range(epoch):
198
+ print(f"EP_train:{epoch_idx}:")
199
+ for batch in trainer.train_data:
200
+ if batch is None:
201
+ continue
202
+
203
+ # Check if batch is a string (indicating an issue)
204
+ if isinstance(batch, str):
205
+ print(f"Error: Received a string instead of a dictionary in batch: {batch}")
206
+ raise ValueError(f"Unexpected string in batch: {batch}")
207
+
208
+ # Validate the batch structure before passing to iteration
209
+ if isinstance(batch, dict):
210
+ # Verify that all expected keys are present and that the values are tensors
211
+ if all(key in batch for key in ['input_ids', 'segment_label', 'labels']):
212
+ if all(isinstance(batch[key], torch.Tensor) for key in batch):
213
+ try:
214
+ print(f"Batch Structure: {batch}") # Debugging batch before iteration
215
+ trainer.iteration(epoch_idx, batch)
216
+ except Exception as e:
217
+ print(f"Error during batch processing: {e}")
218
+ sys.stdout.flush()
219
+ raise e # Propagate the exception for better debugging
220
+ else:
221
+ print(f"Error: Expected all values in batch to be tensors, but got: {batch}")
222
+ raise ValueError("Batch contains non-tensor values.")
223
+ else:
224
+ print(f"Error: Batch missing expected keys. Batch keys: {batch.keys()}")
225
+ raise ValueError("Batch does not contain expected keys.")
226
+ else:
227
+ print(f"Error: Expected batch to be a dictionary but got {type(batch)} instead.")
228
+ raise ValueError(f"Invalid batch structure: {batch}")
229
+
230
+ # def main(opt):
231
+ # # device = torch.device("cpu")
232
+ # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
233
+
234
+ # vocab = Vocab(opt.vocab_file)
235
+ # vocab.load_vocab()
236
+
237
+ # input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=128)
238
+ # labels = preprocess_labels(opt.dataset)
239
+
240
+ # if input_ids is None or segment_labels is None or labels is None:
241
+ # print("Error in preprocessing data. Exiting.")
242
+ # return
243
+
244
+ # dataset = TensorDataset(input_ids, segment_labels, torch.tensor(labels, dtype=torch.long))
245
+ # val_size = len(dataset) - int(0.8 * len(dataset))
246
+ # val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size])
247
+
248
+ # train_dataloader = DataLoader(
249
+ # train_dataset,
250
+ # batch_size=32,
251
+ # shuffle=True,
252
+ # collate_fn=custom_collate_fn
253
+ # )
254
+ # val_dataloader = DataLoader(
255
+ # val_dataset,
256
+ # batch_size=32,
257
+ # shuffle=False,
258
+ # collate_fn=custom_collate_fn
259
+ # )
260
+
261
+ # custom_model = CustomBERTModel(
262
+ # vocab_size=len(vocab.vocab),
263
+ # output_dim=2,
264
+ # pre_trained_model_path=opt.pre_trained_model_path
265
+ # ).to(device)
266
+
267
+ # trainer = BERTFineTuneTrainer1(
268
+ # bert=custom_model.bert,
269
+ # vocab_size=len(vocab.vocab),
270
+ # train_dataloader=train_dataloader,
271
+ # test_dataloader=val_dataloader,
272
+ # lr=5e-5,
273
+ # num_labels=2,
274
+ # with_cuda=torch.cuda.is_available(),
275
+ # log_freq=10,
276
+ # workspace_name=opt.output_dir,
277
+ # log_folder_path=opt.log_folder_path
278
+ # )
279
+
280
+ # trainer.train(epoch=20)
281
+
282
+ # # os.makedirs(opt.output_dir, exist_ok=True)
283
+ # # output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model.pth')
284
+ # # torch.save(custom_model.state_dict(), output_model_file)
285
+ # # print(f'Model saved to {output_model_file}')
286
+
287
+ # os.makedirs(opt.output_dir, exist_ok=True)
288
+ # output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth')
289
+ # torch.save(custom_model, output_model_file)
290
+ # print(f'Model saved to {output_model_file}')
291
+
292
+
293
+ def main(opt):
294
+ # Set device to GPU if available, otherwise use CPU
295
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
296
+
297
+ print(torch.cuda.is_available()) # Should return True if GPU is available
298
+ print(torch.cuda.device_count())
299
+
300
+ # Load vocabulary
301
+ vocab = Vocab(opt.vocab_file)
302
+ vocab.load_vocab()
303
+
304
+ # Preprocess data and labels
305
+ input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=128)
306
+ labels = preprocess_labels(opt.dataset)
307
+
308
+ if input_ids is None or segment_labels is None or labels is None:
309
+ print("Error in preprocessing data. Exiting.")
310
+ return
311
+
312
+ # Transfer tensors to the correct device (GPU/CPU)
313
+ input_ids = input_ids.to(device)
314
+ segment_labels = segment_labels.to(device)
315
+ labels = torch.tensor(labels, dtype=torch.long).to(device)
316
+
317
+ # Create TensorDataset and split into train and validation sets
318
+ dataset = TensorDataset(input_ids, segment_labels, labels)
319
+ val_size = len(dataset) - int(0.8 * len(dataset))
320
+ val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size])
321
+
322
+ # Create DataLoaders for training and validation
323
+ train_dataloader = DataLoader(
324
+ train_dataset,
325
+ batch_size=32,
326
+ shuffle=True,
327
+ collate_fn=custom_collate_fn
328
+ )
329
+ val_dataloader = DataLoader(
330
+ val_dataset,
331
+ batch_size=32,
332
+ shuffle=False,
333
+ collate_fn=custom_collate_fn
334
+ )
335
+
336
+ # Initialize custom BERT model and move it to the device
337
+ custom_model = CustomBERTModel(
338
+ vocab_size=len(vocab.vocab),
339
+ output_dim=2,
340
+ pre_trained_model_path=opt.pre_trained_model_path
341
+ ).to(device)
342
+
343
+ # Initialize the fine-tuning trainer
344
+ trainer = BERTFineTuneTrainer1(
345
+ bert=custom_model.bert,
346
+ vocab_size=len(vocab.vocab),
347
+ train_dataloader=train_dataloader,
348
+ test_dataloader=val_dataloader,
349
+ lr=5e-5,
350
+ num_labels=2,
351
+ with_cuda=torch.cuda.is_available(),
352
+ log_freq=10,
353
+ workspace_name=opt.output_dir,
354
+ log_folder_path=opt.log_folder_path
355
+ )
356
+
357
+ # Train the model
358
+ trainer.train(epoch=20)
359
+
360
+ # Save the model to the specified output directory
361
+ # os.makedirs(opt.output_dir, exist_ok=True)
362
+ # output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth')
363
+ # torch.save(custom_model.state_dict(), output_model_file)
364
+ # print(f'Model saved to {output_model_file}')
365
+ os.makedirs(opt.output_dir, exist_ok=True)
366
+ output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_2.pth')
367
+ torch.save(custom_model, output_model_file)
368
+ print(f'Model saved to {output_model_file}')
369
+
370
+
371
+ if __name__ == '__main__':
372
+ parser = argparse.ArgumentParser(description='Fine-tune BERT model.')
373
+ parser.add_argument('--dataset', type=str, default='/home/jupyter/bert/dataset/hint_based/ratio_proportion_change_3/er/er_train.csv', help='Path to the dataset file.')
374
+ parser.add_argument('--data_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/gt/er.txt', help='Path to the input sequence file.')
375
+ parser.add_argument('--output_dir', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/output/hint_classification', help='Directory to save the fine-tuned model.')
376
+ parser.add_argument('--pre_trained_model_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/output/pretrain:1800ms:64hs:4l:8a:50s:64b:1000e:-5lr/bert_trained.seq_encoder.model.ep68', help='Path to the pre-trained BERT model.')
377
+ parser.add_argument('--vocab_file', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/pretraining/vocab.txt', help='Path to the vocabulary file.')
378
+ parser.add_argument('--log_folder_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct_logs', help='Path to the folder for saving logs.')
379
+
380
+
381
+ opt = parser.parse_args()
382
+ main(opt)
main.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from torch.utils.data import DataLoader
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ from src.bert import BERT
8
+ from src.pretrainer import BERTTrainer, BERTFineTuneTrainer, BERTAttention
9
+ from src.dataset import PretrainerDataset, TokenizerDataset
10
+ from src.vocab import Vocab
11
+
12
+ import time
13
+ import os
14
+ import tqdm
15
+ import pickle
16
+
17
+ def train():
18
+ parser = argparse.ArgumentParser()
19
+
20
+ parser.add_argument('-workspace_name', type=str, default=None)
21
+ parser.add_argument('-code', type=str, default=None, help="folder for pretraining outputs and logs")
22
+ parser.add_argument('-finetune_task', type=str, default=None, help="folder inside finetuning")
23
+ parser.add_argument("-attention", type=bool, default=False, help="analyse attention scores")
24
+ parser.add_argument("-diff_test_folder", type=bool, default=False, help="use for different test folder")
25
+ parser.add_argument("-embeddings", type=bool, default=False, help="get and analyse embeddings")
26
+ parser.add_argument('-embeddings_file_name', type=str, default=None, help="file name of embeddings")
27
+ parser.add_argument("-pretrain", type=bool, default=False, help="pretraining: true, or false")
28
+ # parser.add_argument('-opts', nargs='+', type=str, default=None, help='List of optional steps')
29
+ parser.add_argument("-max_mask", type=int, default=0.15, help="% of input tokens selected for masking")
30
+ # parser.add_argument("-p", "--pretrain_dataset", type=str, default="pretraining/pretrain.txt", help="pretraining dataset for bert")
31
+ # parser.add_argument("-pv", "--pretrain_val_dataset", type=str, default="pretraining/test.txt", help="pretraining validation dataset for bert")
32
+ # default="finetuning/test.txt",
33
+ parser.add_argument("-vocab_path", type=str, default="pretraining/vocab.txt", help="built vocab model path with bert-vocab")
34
+
35
+ parser.add_argument("-train_dataset_path", type=str, default="train.txt", help="fine tune train dataset for progress classifier")
36
+ parser.add_argument("-val_dataset_path", type=str, default="val.txt", help="test set for evaluate fine tune train set")
37
+ parser.add_argument("-test_dataset_path", type=str, default="test.txt", help="test set for evaluate fine tune train set")
38
+ parser.add_argument("-num_labels", type=int, default=2, help="Number of labels")
39
+ parser.add_argument("-train_label_path", type=str, default="train_label.txt", help="fine tune train dataset for progress classifier")
40
+ parser.add_argument("-val_label_path", type=str, default="val_label.txt", help="test set for evaluate fine tune train set")
41
+ parser.add_argument("-test_label_path", type=str, default="test_label.txt", help="test set for evaluate fine tune train set")
42
+ ##### change Checkpoint for finetuning
43
+ parser.add_argument("-pretrained_bert_checkpoint", type=str, default=None, help="checkpoint of saved pretrained bert model") #."output_feb09/bert_trained.model.ep40"
44
+ parser.add_argument('-check_epoch', type=int, default=None)
45
+
46
+ parser.add_argument("-hs", "--hidden", type=int, default=64, help="hidden size of transformer model") #64
47
+ parser.add_argument("-l", "--layers", type=int, default=4, help="number of layers") #4
48
+ parser.add_argument("-a", "--attn_heads", type=int, default=4, help="number of attention heads") #8
49
+ parser.add_argument("-s", "--seq_len", type=int, default=50, help="maximum sequence length")
50
+
51
+ parser.add_argument("-b", "--batch_size", type=int, default=500, help="number of batch_size") #64
52
+ parser.add_argument("-e", "--epochs", type=int, default=50)#1501, help="number of epochs") #501
53
+ # Use 50 for pretrain, and 10 for fine tune
54
+ parser.add_argument("-w", "--num_workers", type=int, default=4, help="dataloader worker size")
55
+
56
+ # Later run with cuda
57
+ parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false")
58
+ parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n")
59
+ # parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus")
60
+ parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids")
61
+ # parser.add_argument("--on_memory", type=bool, default=False, help="Loading on memory: true or false")
62
+
63
+ parser.add_argument("--dropout", type=float, default=0.1, help="dropout of network")
64
+ parser.add_argument("--lr", type=float, default=1e-05, help="learning rate of adam") #1e-3
65
+ parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam")
66
+ parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
67
+ parser.add_argument("--adam_beta2", type=float, default=0.98, help="adam first beta value") #0.999
68
+
69
+ parser.add_argument("-o", "--output_path", type=str, default="bert_trained.seq_encoder.model", help="ex)output/bert.model")
70
+ # parser.add_argument("-o", "--output_path", type=str, default="output/bert_fine_tuned.model", help="ex)output/bert.model")
71
+
72
+ args = parser.parse_args()
73
+ for k,v in vars(args).items():
74
+ if 'path' in k:
75
+ if v:
76
+ if k == "output_path":
77
+ if args.code:
78
+ setattr(args, f"{k}", args.workspace_name+f"/output/{args.code}/"+v)
79
+ elif args.finetune_task:
80
+ setattr(args, f"{k}", args.workspace_name+f"/output/{args.finetune_task}/"+v)
81
+ else:
82
+ setattr(args, f"{k}", args.workspace_name+"/output/"+v)
83
+ elif k != "vocab_path":
84
+ if args.pretrain:
85
+ setattr(args, f"{k}", args.workspace_name+"/pretraining/"+v)
86
+ else:
87
+ if args.code:
88
+ setattr(args, f"{k}", args.workspace_name+f"/{args.code}/"+v)
89
+ elif args.finetune_task:
90
+ if args.diff_test_folder and "test" in k:
91
+ setattr(args, f"{k}", args.workspace_name+f"/finetuning/"+v)
92
+ else:
93
+ setattr(args, f"{k}", args.workspace_name+f"/finetuning/{args.finetune_task}/"+v)
94
+ else:
95
+ setattr(args, f"{k}", args.workspace_name+"/finetuning/"+v)
96
+ else:
97
+ setattr(args, f"{k}", args.workspace_name+"/"+v)
98
+
99
+ print(f"args.{k} : {getattr(args, f'{k}')}")
100
+
101
+ print("Loading Vocab", args.vocab_path)
102
+ vocab_obj = Vocab(args.vocab_path)
103
+ vocab_obj.load_vocab()
104
+ print("Vocab Size: ", len(vocab_obj.vocab))
105
+
106
+ if args.attention:
107
+ print(f"Attention aggregate...... code: {args.code}, dataset: {args.finetune_task}")
108
+ if args.code:
109
+ new_folder = f"{args.workspace_name}/plots/{args.code}/"
110
+ if not os.path.exists(new_folder):
111
+ os.makedirs(new_folder)
112
+
113
+ train_dataset = TokenizerDataset(args.train_dataset_path, None, vocab_obj, seq_len=args.seq_len)
114
+ train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
115
+ print("Load Pre-trained BERT model")
116
+ cuda_condition = torch.cuda.is_available() and args.with_cuda
117
+ device = torch.device("cuda:0" if cuda_condition else "cpu")
118
+ bert = torch.load(args.pretrained_bert_checkpoint, map_location=device)
119
+ trainer = BERTAttention(bert, vocab_obj, train_dataloader = train_data_loader, workspace_name = args.workspace_name, code=args.code, finetune_task = args.finetune_task)
120
+ trainer.getAttention()
121
+
122
+ elif args.embeddings:
123
+ print("Get embeddings... and cluster... ")
124
+ train_dataset = TokenizerDataset(args.test_dataset_path, None, vocab_obj, seq_len=args.seq_len)
125
+ train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
126
+ print("Load Pre-trained BERT model")
127
+ cuda_condition = torch.cuda.is_available() and args.with_cuda
128
+ device = torch.device("cuda:0" if cuda_condition else "cpu")
129
+ bert = torch.load(args.pretrained_bert_checkpoint).to(device)
130
+ available_gpus = list(range(torch.cuda.device_count()))
131
+ if torch.cuda.device_count() > 1:
132
+ print("Using %d GPUS for BERT" % torch.cuda.device_count())
133
+ bert = nn.DataParallel(bert, device_ids=available_gpus)
134
+
135
+ data_iter = tqdm.tqdm(enumerate(train_data_loader),
136
+ desc="Model: %s" % (args.pretrained_bert_checkpoint.split("/")[-1]),
137
+ total=len(train_data_loader), bar_format="{l_bar}{r_bar}")
138
+ all_embeddings = []
139
+ for i, data in data_iter:
140
+ data = {key: value.to(device) for key, value in data.items()}
141
+ embedding = bert(data["input"], data["segment_label"])
142
+ # print(embedding.shape, embedding[:, 0].shape)
143
+ embeddings = [h for h in embedding[:,0].cpu().detach().numpy()]
144
+ all_embeddings.extend(embeddings)
145
+
146
+ new_emb_folder = f"{args.workspace_name}/embeddings"
147
+ if not os.path.exists(new_emb_folder):
148
+ os.makedirs(new_emb_folder)
149
+ pickle.dump(all_embeddings, open(f"{new_emb_folder}/{args.embeddings_file_name}.pkl", "wb"))
150
+ else:
151
+ if args.pretrain:
152
+ print("Pre-training......")
153
+ print("Loading Pretraining Train Dataset", args.train_dataset_path)
154
+ print(f"Workspace: {args.workspace_name}")
155
+ pretrain_dataset = PretrainerDataset(args.train_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask)
156
+
157
+ print("Loading Pretraining Validation Dataset", args.val_dataset_path)
158
+ pretrain_valid_dataset = PretrainerDataset(args.val_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask) \
159
+ if args.val_dataset_path is not None else None
160
+
161
+ print("Loading Pretraining Test Dataset", args.test_dataset_path)
162
+ pretrain_test_dataset = PretrainerDataset(args.test_dataset_path, vocab_obj, seq_len=args.seq_len, max_mask = args.max_mask) \
163
+ if args.test_dataset_path is not None else None
164
+
165
+ print("Creating Dataloader")
166
+ pretrain_data_loader = DataLoader(pretrain_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
167
+ pretrain_val_data_loader = DataLoader(pretrain_valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers)\
168
+ if pretrain_valid_dataset is not None else None
169
+ pretrain_test_data_loader = DataLoader(pretrain_test_dataset, batch_size=args.batch_size, num_workers=args.num_workers)\
170
+ if pretrain_test_dataset is not None else None
171
+
172
+ print("Building BERT model")
173
+ bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads, dropout=args.dropout)
174
+
175
+ if args.pretrained_bert_checkpoint:
176
+ print(f"BERT model : {args.pretrained_bert_checkpoint}")
177
+ bert = torch.load(args.pretrained_bert_checkpoint)
178
+
179
+ new_log_folder = f"{args.workspace_name}/logs"
180
+ new_output_folder = f"{args.workspace_name}/output"
181
+ if args.code: # is sent almost all the time
182
+ new_log_folder = f"{args.workspace_name}/logs/{args.code}"
183
+ new_output_folder = f"{args.workspace_name}/output/{args.code}"
184
+
185
+ if not os.path.exists(new_log_folder):
186
+ os.makedirs(new_log_folder)
187
+ if not os.path.exists(new_output_folder):
188
+ os.makedirs(new_output_folder)
189
+
190
+ print(f"Creating BERT Trainer .... masking: True, max_mask: {args.max_mask}")
191
+ trainer = BERTTrainer(bert, len(vocab_obj.vocab), train_dataloader=pretrain_data_loader,
192
+ val_dataloader=pretrain_val_data_loader, test_dataloader=pretrain_test_data_loader,
193
+ lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
194
+ with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq,
195
+ log_folder_path=new_log_folder)
196
+
197
+ start_time = time.time()
198
+ print(f'Pretraining Starts, Time: {time.strftime("%D %T", time.localtime(start_time))}')
199
+ # if need to pretrain from a check-point, need :check_epoch
200
+ repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs)
201
+ counter = 0
202
+ patience = 20
203
+ for epoch in repoch:
204
+ print(f'Training Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
205
+ trainer.train(epoch)
206
+ print(f'Training Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
207
+
208
+ if pretrain_val_data_loader is not None:
209
+ print(f'Validation Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
210
+ trainer.val(epoch)
211
+ print(f'Validation Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
212
+
213
+ if trainer.save_model: # or epoch%10 == 0 and epoch > 4
214
+ trainer.save(epoch, args.output_path)
215
+ counter = 0
216
+ if pretrain_test_data_loader is not None:
217
+ print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
218
+ trainer.test(epoch)
219
+ print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
220
+ else:
221
+ counter +=1
222
+ if counter >= patience:
223
+ print(f"Early stopping at epoch {epoch}")
224
+ break
225
+
226
+ end_time = time.time()
227
+ print("Time Taken to pretrain model = ", end_time - start_time)
228
+ print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}')
229
+ else:
230
+ print("Fine Tuning......")
231
+ print("Loading Train Dataset", args.train_dataset_path)
232
+ train_dataset = TokenizerDataset(args.train_dataset_path, args.train_label_path, vocab_obj, seq_len=args.seq_len)
233
+
234
+ # print("Loading Validation Dataset", args.val_dataset_path)
235
+ # val_dataset = TokenizerDataset(args.val_dataset_path, args.val_label_path, vocab_obj, seq_len=args.seq_len) \
236
+ # if args.val_dataset_path is not None else None
237
+
238
+ print("Loading Test Dataset", args.test_dataset_path)
239
+ test_dataset = TokenizerDataset(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len) \
240
+ if args.test_dataset_path is not None else None
241
+
242
+ print("Creating Dataloader...")
243
+ train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
244
+ # val_data_loader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
245
+ # if val_dataset is not None else None
246
+ test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
247
+ if test_dataset is not None else None
248
+
249
+ print("Load Pre-trained BERT model")
250
+ # bert = BERT(len(vocab_obj.vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads)
251
+ cuda_condition = torch.cuda.is_available() and args.with_cuda
252
+ device = torch.device("cuda:0" if cuda_condition else "cpu")
253
+ bert = torch.load(args.pretrained_bert_checkpoint, map_location=device)
254
+
255
+ # if args.finetune_task == "SL":
256
+ # if args.workspace_name == "ratio_proportion_change4":
257
+ # num_labels = 9
258
+ # elif args.workspace_name == "ratio_proportion_change3":
259
+ # num_labels = 9
260
+ # elif args.workspace_name == "scale_drawings_3":
261
+ # num_labels = 9
262
+ # elif args.workspace_name == "sales_tax_discounts_two_rates":
263
+ # num_labels = 3
264
+ # else:
265
+ # num_labels = 2
266
+ # # num_labels = 1
267
+ # print(f"Number of Labels : {args.num_labels}")
268
+ new_log_folder = f"{args.workspace_name}/logs"
269
+ new_output_folder = f"{args.workspace_name}/output"
270
+ if args.finetune_task: # is sent almost all the time
271
+ new_log_folder = f"{args.workspace_name}/logs/{args.finetune_task}"
272
+ new_output_folder = f"{args.workspace_name}/output/{args.finetune_task}"
273
+
274
+ if not os.path.exists(new_log_folder):
275
+ os.makedirs(new_log_folder)
276
+ if not os.path.exists(new_output_folder):
277
+ os.makedirs(new_output_folder)
278
+
279
+ print("Creating BERT Fine Tune Trainer")
280
+ trainer = BERTFineTuneTrainer(bert, len(vocab_obj.vocab),
281
+ train_dataloader=train_data_loader, test_dataloader=test_data_loader,
282
+ lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
283
+ with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq,
284
+ workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder)
285
+
286
+ print("Fine-tune training Start....")
287
+ start_time = time.time()
288
+ repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs)
289
+ counter = 0
290
+ patience = 10
291
+ for epoch in repoch:
292
+ print(f'Training Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
293
+ trainer.train(epoch)
294
+ print(f'Training Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
295
+
296
+ if test_data_loader is not None:
297
+ print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
298
+ trainer.test(epoch)
299
+ # pickle.dump(trainer.probability_list, open(f"{args.workspace_name}/output/aaai/change4_mid_prob_{epoch}.pkl","wb"))
300
+ print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
301
+
302
+ # if val_data_loader is not None:
303
+ # print(f'Validation Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
304
+ # trainer.val(epoch)
305
+ # print(f'Validation Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
306
+
307
+ if trainer.save_model: # or epoch%10 == 0
308
+ trainer.save(epoch, args.output_path)
309
+ counter = 0
310
+ else:
311
+ counter +=1
312
+ if counter >= patience:
313
+ print(f"Early stopping at epoch {epoch}")
314
+ break
315
+
316
+ end_time = time.time()
317
+ print("Time Taken to fine-tune model = ", end_time - start_time)
318
+ print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}')
319
+
320
+
321
+ if __name__ == "__main__":
322
+ train()
metrics.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from scipy.special import softmax
3
+
4
+
5
+ class CELoss(object):
6
+
7
+ def compute_bin_boundaries(self, probabilities = np.array([])):
8
+
9
+ #uniform bin spacing
10
+ if probabilities.size == 0:
11
+ bin_boundaries = np.linspace(0, 1, self.n_bins + 1)
12
+ self.bin_lowers = bin_boundaries[:-1]
13
+ self.bin_uppers = bin_boundaries[1:]
14
+ else:
15
+ #size of bins
16
+ bin_n = int(self.n_data/self.n_bins)
17
+
18
+ bin_boundaries = np.array([])
19
+
20
+ probabilities_sort = np.sort(probabilities)
21
+
22
+ for i in range(0,self.n_bins):
23
+ bin_boundaries = np.append(bin_boundaries,probabilities_sort[i*bin_n])
24
+ bin_boundaries = np.append(bin_boundaries,1.0)
25
+
26
+ self.bin_lowers = bin_boundaries[:-1]
27
+ self.bin_uppers = bin_boundaries[1:]
28
+
29
+
30
+ def get_probabilities(self, output, labels, logits):
31
+ #If not probabilities apply softmax!
32
+ if logits:
33
+ self.probabilities = softmax(output, axis=1)
34
+ else:
35
+ self.probabilities = output
36
+
37
+ self.labels = labels
38
+ self.confidences = np.max(self.probabilities, axis=1)
39
+ self.predictions = np.argmax(self.probabilities, axis=1)
40
+ self.accuracies = np.equal(self.predictions,labels)
41
+
42
+ def binary_matrices(self):
43
+ idx = np.arange(self.n_data)
44
+ #make matrices of zeros
45
+ pred_matrix = np.zeros([self.n_data,self.n_class])
46
+ label_matrix = np.zeros([self.n_data,self.n_class])
47
+ #self.acc_matrix = np.zeros([self.n_data,self.n_class])
48
+ pred_matrix[idx,self.predictions] = 1
49
+ label_matrix[idx,self.labels] = 1
50
+
51
+ self.acc_matrix = np.equal(pred_matrix, label_matrix)
52
+
53
+
54
+ def compute_bins(self, index = None):
55
+ self.bin_prop = np.zeros(self.n_bins)
56
+ self.bin_acc = np.zeros(self.n_bins)
57
+ self.bin_conf = np.zeros(self.n_bins)
58
+ self.bin_score = np.zeros(self.n_bins)
59
+
60
+ if index == None:
61
+ confidences = self.confidences
62
+ accuracies = self.accuracies
63
+ else:
64
+ confidences = self.probabilities[:,index]
65
+ accuracies = self.acc_matrix[:,index]
66
+
67
+
68
+ for i, (bin_lower, bin_upper) in enumerate(zip(self.bin_lowers, self.bin_uppers)):
69
+ # Calculated |confidence - accuracy| in each bin
70
+ in_bin = np.greater(confidences,bin_lower.item()) * np.less_equal(confidences,bin_upper.item())
71
+ self.bin_prop[i] = np.mean(in_bin)
72
+
73
+ if self.bin_prop[i].item() > 0:
74
+ self.bin_acc[i] = np.mean(accuracies[in_bin])
75
+ self.bin_conf[i] = np.mean(confidences[in_bin])
76
+ self.bin_score[i] = np.abs(self.bin_conf[i] - self.bin_acc[i])
77
+
78
+ class MaxProbCELoss(CELoss):
79
+ def loss(self, output, labels, n_bins = 15, logits = True):
80
+ self.n_bins = n_bins
81
+ super().compute_bin_boundaries()
82
+ super().get_probabilities(output, labels, logits)
83
+ super().compute_bins()
84
+
85
+ #http://people.cs.pitt.edu/~milos/research/AAAI_Calibration.pdf
86
+ class ECELoss(MaxProbCELoss):
87
+
88
+ def loss(self, output, labels, n_bins = 15, logits = True):
89
+ super().loss(output, labels, n_bins, logits)
90
+ return np.dot(self.bin_prop,self.bin_score)
91
+
92
+ class MCELoss(MaxProbCELoss):
93
+
94
+ def loss(self, output, labels, n_bins = 15, logits = True):
95
+ super().loss(output, labels, n_bins, logits)
96
+ return np.max(self.bin_score)
97
+
98
+ #https://arxiv.org/abs/1905.11001
99
+ #Overconfidence Loss (Good in high risk applications where confident but wrong predictions can be especially harmful)
100
+ class OELoss(MaxProbCELoss):
101
+
102
+ def loss(self, output, labels, n_bins = 15, logits = True):
103
+ super().loss(output, labels, n_bins, logits)
104
+ return np.dot(self.bin_prop,self.bin_conf * np.maximum(self.bin_conf-self.bin_acc,np.zeros(self.n_bins)))
105
+
106
+
107
+ #https://arxiv.org/abs/1904.01685
108
+ class SCELoss(CELoss):
109
+
110
+ def loss(self, output, labels, n_bins = 15, logits = True):
111
+ sce = 0.0
112
+ self.n_bins = n_bins
113
+ self.n_data = len(output)
114
+ self.n_class = len(output[0])
115
+
116
+ super().compute_bin_boundaries()
117
+ super().get_probabilities(output, labels, logits)
118
+ super().binary_matrices()
119
+
120
+ for i in range(self.n_class):
121
+ super().compute_bins(i)
122
+ sce += np.dot(self.bin_prop,self.bin_score)
123
+
124
+ return sce/self.n_class
125
+
126
+ class TACELoss(CELoss):
127
+
128
+ def loss(self, output, labels, threshold = 0.01, n_bins = 15, logits = True):
129
+ tace = 0.0
130
+ self.n_bins = n_bins
131
+ self.n_data = len(output)
132
+ self.n_class = len(output[0])
133
+
134
+ super().get_probabilities(output, labels, logits)
135
+ self.probabilities[self.probabilities < threshold] = 0
136
+ super().binary_matrices()
137
+
138
+ for i in range(self.n_class):
139
+ super().compute_bin_boundaries(self.probabilities[:,i])
140
+ super().compute_bins(i)
141
+ tace += np.dot(self.bin_prop,self.bin_score)
142
+
143
+ return tace/self.n_class
144
+
145
+ #create TACELoss with threshold fixed at 0
146
+ class ACELoss(TACELoss):
147
+
148
+ def loss(self, output, labels, n_bins = 15, logits = True):
149
+ return super().loss(output, labels, 0.0 , n_bins, logits)
new_fine_tuning/.DS_Store ADDED
Binary file (6.15 kB). View file
 
new_fine_tuning/README.md ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Pre-training Data
2
+
3
+ ### ratio_proportion_change3 : Calculating Percent Change and Final Amounts
4
+ > clear;python3 prepare_pretraining_input_vocab_file.py -analyze_dataset_by_section True -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain1000.txt -train_info_path pretraining/pretrain1000_info.txt -test_file_path pretraining/test1000.txt -test_info_path pretraining/test1000_info.txt
5
+
6
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain2000.txt -train_info_path pretraining/pretrain2000_info.txt -test_file_path pretraining/test2000.txt -test_info_path pretraining/test2000_info.txt
7
+
8
+ #### Test simple
9
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code full -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path full.txt -train_info_path full_info.txt
10
+
11
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code gt -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path er.txt -train_info_path er_info.txt -test_file_path me.txt -test_info_path me_info.txt
12
+
13
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code correct -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path correct.txt -train_info_path correct_info.txt -test_file_path incorrect.txt -test_info_path incorrect_info.txt -final_step FinalAnswer
14
+
15
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -code progress -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path graduated.txt -train_info_path graduated_info.txt -test_file_path promoted.txt -test_info_path promoted_info.txt
16
+
17
+ ### ratio_proportion_change4 : Using Percents and Percent Change
18
+ > clear;python3 prepare_pretraining_input_vocab_file.py -analyze_dataset_by_section True -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain1000.txt -train_info_path pretraining/pretrain1000_info.txt -test_file_path pretraining/test1000.txt -test_info_path pretraining/test1000_info.txt
19
+
20
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -pretrain True -train_file_path pretraining/pretrain2000.txt -train_info_path pretraining/pretrain2000_info.txt -test_file_path pretraining/test2000.txt -test_info_path pretraining/test2000_info.txt
21
+
22
+ #### Test simple
23
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code full -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path full.txt -train_info_path full_info.txt
24
+
25
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code gt -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path er.txt -train_info_path er_info.txt -test_file_path me.txt -test_info_path me_info.txt
26
+
27
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code correct -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path correct.txt -train_info_path correct_info.txt -test_file_path incorrect.txt -test_info_path incorrect_info.txt -final_step FinalAnswer
28
+
29
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -code progress -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -train_file_path graduated.txt -train_info_path graduated_info.txt -test_file_path promoted.txt -test_info_path promoted_info.txt
30
+
31
+ ## Pretraining
32
+
33
+ ### ratio_proportion_change3 : Calculating Percent Change and Final Amounts
34
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3_1920 -code pretrain1000 --pretrain_dataset pretraining/pretrain1000.txt --pretrain_val_dataset pretraining/test1000.txt
35
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000 --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt
36
+
37
+ #### Test simple models
38
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 1
39
+
40
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 2
41
+
42
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 2
43
+
44
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 4
45
+
46
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 4
47
+
48
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 8
49
+
50
+
51
+
52
+ ### ratio_proportion_change4 : Using Percents and Percent Change
53
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain1000 --pretrain_dataset pretraining/pretrain1000.txt --pretrain_val_dataset pretraining/test1000.txt
54
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000 --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt
55
+
56
+ #### Test simple models
57
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_1l1h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 1
58
+
59
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_1l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 1 --attn_heads 2
60
+
61
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_2l2h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 2
62
+
63
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_2l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 2 --attn_heads 4
64
+
65
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_4l4h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 4
66
+
67
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -code pretrain2000_4l8h-5lr --pretrain_dataset pretraining/pretrain2000.txt --pretrain_val_dataset pretraining/test2000.txt --layers 4 --attn_heads 8
68
+
69
+
70
+ ## Preparing Fine Tuning Data
71
+
72
+ ### ratio_proportion_change3 : Calculating Percent Change and Final Amounts
73
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change3 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -final_step FinalAnswer
74
+
75
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task check2 --train_dataset finetuning/check2/train.txt --test_dataset finetuning/check2/test.txt --train_label finetuning/check2/train_label.txt --test_label finetuning/check2/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
76
+
77
+ #### Attention Head Check
78
+ <!-- > PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_1 EquationAnswer NumeratorFactor EquationAnswer NumeratorFactor EquationAnswer NumeratorFactor DenominatorFactor NumeratorFactor DenominatorFactor NumeratorFactor DenominatorFactor FirstRow1:2 FirstRow1:1 FirstRow2:1 FirstRow2:2 FirstRow2:1 SecondRow ThirdRow FinalAnswerDirection ThirdRow FinalAnswer -->
79
+
80
+
81
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task er ;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task correct ;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep598 --attention True -finetune_task promoted
82
+
83
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep823 --attention True -finetune_task promoted
84
+
85
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l2h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l2h-5lr/bert_trained.seq_encoder.model.ep1045 --attention True -finetune_task promoted
86
+
87
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_2l4h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_2l4h-5lr/bert_trained.seq_encoder.model.ep1336 --attention True -finetune_task promoted
88
+
89
+ <!-- > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep923 --attention True -->
90
+
91
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l4h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l4h-5lr/bert_trained.seq_encoder.model.ep871 --attention True -finetune_task promoted
92
+
93
+ clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset full/full_attn.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task full
94
+
95
+
96
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset full/full.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task full;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset gt/er.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task er;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset gt/me.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task me;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset correct/correct.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task correct;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset correct/incorrect.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task incorrect;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset progress/graduated.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task graduated;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_4l8h-5lr --train_dataset progress/promoted.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_4l8h-5lr/bert_trained.seq_encoder.model.ep1349 --attention True -finetune_task promoted
97
+
98
+
99
+ <!-- PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_2 FirstRow2:1 FirstRow2:2 FirstRow1:1 SecondRow ThirdRow FinalAnswer FinalAnswerDirection --> me
100
+
101
+ <!-- PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_1 DenominatorFactor NumeratorFactor OptionalTask_2 EquationAnswer FirstRow1:1 FirstRow1:2 FirstRow2:2 FirstRow2:1 FirstRow1:2 SecondRow ThirdRow FinalAnswer --> er
102
+
103
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l1h-5lr --train_dataset pretraining/attention_train.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l1h-5lr/bert_trained.seq_encoder.model.ep273 --attention True
104
+
105
+ <!-- PercentChange NumeratorQuantity2 NumeratorQuantity1 DenominatorQuantity1 OptionalTask_1 DenominatorFactor NumeratorFactor OptionalTask_2 EquationAnswer FirstRow1:1 FirstRow1:2 FirstRow2:2 FirstRow2:1 FirstRow1:2 SecondRow ThirdRow FinalAnswer -->
106
+
107
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -code pretrain2000_1l2h-5lr --train_dataset pretraining/attention_train.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000_1l2h-5lr/bert_trained.seq_encoder.model.ep1021 --attention True
108
+
109
+
110
+
111
+ ### ratio_proportion_change4 : Using Percents and Percent Change
112
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name ratio_proportion_change4 -opt_step1 OptionalTask_1 EquationAnswer NumeratorFactor DenominatorFactor NumeratorLabel1 DenominatorLabel1 -opt_step2 OptionalTask_2 FirstRow1:1 FirstRow1:2 FirstRow2:1 FirstRow2:2 SecondRow ThirdRow -final_step FinalAnswer
113
+
114
+ ### scale_drawings_3 : Calculating Measurements Using a Scale
115
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name scale_drawings_3 -opt_step1 opt1-check opt1-ratio-L-n opt1-ratio-L-d opt1-ratio-R-n opt1-ratio-R-d opt1-me2-top-3 opt1-me2-top-4 opt1-me2-top-2 opt1-me2-top-1 opt1-me2-middle-1 opt1-me2-bottom-1 -opt_step2 opt2-check opt2-ratio-L-n opt2-ratio-L-d opt2-ratio-R-n opt2-ratio-R-d opt2-me2-top-3 opt2-me2-top-4 opt2-me2-top-1 opt2-me2-top-2 opt2-me2-middle-1 opt2-me2-bottom-1 -final_step unk-value1 unk-value2
116
+
117
+ ### sales_tax_discounts_two_rates : Solving Problems with Both Sales Tax and Discounts
118
+ > clear;python3 prepare_pretraining_input_vocab_file.py -workspace_name sales_tax_discounts_two_rates -opt_step1 optionalTaskGn salestaxFactor2 discountFactor2 multiplyOrderStatementGn -final_step totalCost1
119
+
120
+
121
+ # Fine Tuning Pre-trained model
122
+
123
+ ## ratio_proportion_change3 : Calculating Percent Change and Final Amounts
124
+ > Selected Pretrained model: **ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279**
125
+ > New **bert/ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731**
126
+
127
+ ### 10per
128
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51
129
+
130
+ ### IS
131
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task IS --train_dataset finetuning/IS/train.txt --test_dataset finetuning/FS/train.txt --train_label finetuning/IS/train_label.txt --test_label finetuning/FS/train_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51
132
+
133
+ ### FS
134
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task FS --train_dataset finetuning/FS/train.txt --test_dataset finetuning/IS/train.txt --train_label finetuning/FS/train_label.txt --test_label finetuning/IS/train_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/pretrain2000/bert_trained.seq_encoder.model.ep731 --epochs 51
135
+
136
+ ### correctness
137
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
138
+
139
+ ### SL
140
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
141
+
142
+ ### effectiveness
143
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change3 -finetune_task effectiveness --train_dataset finetuning/effectiveness/train.txt --test_dataset finetuning/effectiveness/test.txt --train_label finetuning/effectiveness/train_label.txt --test_label finetuning/effectiveness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change3/output/bert_trained.seq_encoder.model.ep279 --epochs 51
144
+
145
+
146
+ ## ratio_proportion_change4 : Using Percents and Percent Change
147
+ > Selected Pretrained model: **ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287**
148
+ ### 10per
149
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
150
+
151
+ ### IS
152
+
153
+ ### FS
154
+
155
+ ### correctness
156
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
157
+
158
+ ### SL
159
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
160
+
161
+ ### effectiveness
162
+ > clear;python3 src/main.py -workspace_name ratio_proportion_change4 -finetune_task effectiveness --train_dataset finetuning/effectiveness/train.txt --test_dataset finetuning/effectiveness/test.txt --train_label finetuning/effectiveness/train_label.txt --test_label finetuning/effectiveness/test_label.txt --pretrained_bert_checkpoint ratio_proportion_change4/output/bert_trained.seq_encoder.model.ep287 --epochs 51
163
+
164
+
165
+ ## scale_drawings_3 : Calculating Measurements Using a Scale
166
+ > Selected Pretrained model: **scale_drawings_3/output/bert_trained.seq_encoder.model.ep252**
167
+ ### 10per
168
+ > clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51
169
+
170
+ ### IS
171
+
172
+ ### FS
173
+
174
+ ### correctness
175
+ > clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51
176
+
177
+ ### SL
178
+ > clear;python3 src/main.py -workspace_name scale_drawings_3 -finetune_task SL --train_dataset finetuning/SL/train.txt --test_dataset finetuning/SL/test.txt --train_label finetuning/SL/train_label.txt --test_label finetuning/SL/test_label.txt --pretrained_bert_checkpoint scale_drawings_3/output/bert_trained.seq_encoder.model.ep252 --epochs 51
179
+
180
+ ### effectiveness
181
+
182
+ ## sales_tax_discounts_two_rates : Solving Problems with Both Sales Tax and Discounts
183
+ > Selected Pretrained model: **sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255**
184
+
185
+ ### 10per
186
+ > clear;python3 src/main.py -workspace_name sales_tax_discounts_two_rates -finetune_task 10per --train_dataset finetuning/10per/train.txt --test_dataset finetuning/10per/test.txt --train_label finetuning/10per/train_label.txt --test_label finetuning/10per/test_label.txt --pretrained_bert_checkpoint sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255 --epochs 51
187
+
188
+ ### IS
189
+
190
+ ### FS
191
+
192
+ ### correctness
193
+ > clear;python3 src/main.py -workspace_name sales_tax_discounts_two_rates -finetune_task correctness --train_dataset finetuning/correctness/train.txt --test_dataset finetuning/correctness/test.txt --train_label finetuning/correctness/train_label.txt --test_label finetuning/correctness/test_label.txt --pretrained_bert_checkpoint sales_tax_discounts_two_rates/output/bert_trained.seq_encoder.model.ep255 --epochs 51
194
+
195
+ ### SL
196
+
197
+ ### effectiveness
new_fine_tuning/__pycache__/metrics.cpython-312.pyc ADDED
Binary file (9.16 kB). View file
 
new_fine_tuning/__pycache__/recalibration.cpython-312.pyc ADDED
Binary file (5.51 kB). View file
 
new_fine_tuning/__pycache__/visualization.cpython-312.pyc ADDED
Binary file (5.28 kB). View file
 
new_hint_fine_tuned.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.utils.data import DataLoader, random_split, TensorDataset
6
+ from src.dataset import TokenizerDataset
7
+ from src.bert import BERT
8
+ from src.pretrainer import BERTFineTuneTrainer1
9
+ from src.vocab import Vocab
10
+ import pandas as pd
11
+
12
+ def preprocess_labels(label_csv_path):
13
+ try:
14
+ labels_df = pd.read_csv(label_csv_path)
15
+ labels = labels_df['last_hint_class'].values.astype(int)
16
+ return torch.tensor(labels, dtype=torch.long)
17
+ except Exception as e:
18
+ print(f"Error reading dataset file: {e}")
19
+ return None
20
+
21
+ def preprocess_data(data_path, vocab, max_length=128):
22
+ try:
23
+ with open(data_path, 'r') as f:
24
+ sequences = f.readlines()
25
+ except Exception as e:
26
+ print(f"Error reading data file: {e}")
27
+ return None, None
28
+
29
+ tokenized_sequences = []
30
+ for sequence in sequences:
31
+ sequence = sequence.strip()
32
+ if sequence:
33
+ encoded = vocab.to_seq(sequence, seq_len=max_length)
34
+ encoded = encoded[:max_length] + [vocab.vocab.get('[PAD]', 0)] * (max_length - len(encoded))
35
+ segment_label = [0] * max_length
36
+
37
+ tokenized_sequences.append({
38
+ 'input_ids': torch.tensor(encoded),
39
+ 'segment_label': torch.tensor(segment_label)
40
+ })
41
+
42
+ input_ids = torch.cat([t['input_ids'].unsqueeze(0) for t in tokenized_sequences], dim=0)
43
+ segment_labels = torch.cat([t['segment_label'].unsqueeze(0) for t in tokenized_sequences], dim=0)
44
+
45
+ print(f"Input IDs shape: {input_ids.shape}")
46
+ print(f"Segment labels shape: {segment_labels.shape}")
47
+
48
+ return input_ids, segment_labels
49
+
50
+ def custom_collate_fn(batch):
51
+ inputs = [item['input_ids'].unsqueeze(0) for item in batch]
52
+ labels = [item['label'].unsqueeze(0) for item in batch]
53
+ segment_labels = [item['segment_label'].unsqueeze(0) for item in batch]
54
+
55
+ inputs = torch.cat(inputs, dim=0)
56
+ labels = torch.cat(labels, dim=0)
57
+ segment_labels = torch.cat(segment_labels, dim=0)
58
+
59
+ return {
60
+ 'input': inputs,
61
+ 'label': labels,
62
+ 'segment_label': segment_labels
63
+ }
64
+
65
+ def main(opt):
66
+ # Set device to GPU if available, otherwise use CPU
67
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
68
+
69
+ # Load vocabulary
70
+ vocab = Vocab(opt.vocab_file)
71
+ vocab.load_vocab()
72
+
73
+ # Preprocess data and labels
74
+ input_ids, segment_labels = preprocess_data(opt.data_path, vocab, max_length=50) # Using sequence length 50
75
+ labels = preprocess_labels(opt.dataset)
76
+
77
+ if input_ids is None or segment_labels is None or labels is None:
78
+ print("Error in preprocessing data. Exiting.")
79
+ return
80
+
81
+ # Create TensorDataset and split into train and validation sets
82
+ dataset = TensorDataset(input_ids, segment_labels, labels)
83
+ val_size = len(dataset) - int(0.8 * len(dataset))
84
+ val_dataset, train_dataset = random_split(dataset, [val_size, len(dataset) - val_size])
85
+
86
+ # Create DataLoaders for training and validation
87
+ train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
88
+ val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)
89
+
90
+ # Initialize custom BERT model and move it to the device
91
+ custom_model = CustomBERTModel(
92
+ vocab_size=len(vocab.vocab),
93
+ output_dim=2,
94
+ pre_trained_model_path=opt.pre_trained_model_path
95
+ ).to(device)
96
+
97
+ # Initialize the fine-tuning trainer
98
+ trainer = BERTFineTuneTrainer1(
99
+ bert=custom_model,
100
+ vocab_size=len(vocab.vocab),
101
+ train_dataloader=train_dataloader,
102
+ test_dataloader=val_dataloader,
103
+ lr=1e-5, # Using learning rate 10^-5 as specified
104
+ num_labels=2,
105
+ with_cuda=torch.cuda.is_available(),
106
+ log_freq=10,
107
+ workspace_name=opt.output_dir,
108
+ log_folder_path=opt.log_folder_path
109
+ )
110
+
111
+ # Train the model
112
+ trainer.train(epoch=20)
113
+
114
+ # Save the model
115
+ os.makedirs(opt.output_dir, exist_ok=True)
116
+ output_model_file = os.path.join(opt.output_dir, 'fine_tuned_model_3.pth')
117
+ torch.save(custom_model, output_model_file)
118
+ print(f'Model saved to {output_model_file}')
119
+
120
+ if __name__ == '__main__':
121
+ parser = argparse.ArgumentParser(description='Fine-tune BERT model.')
122
+ parser.add_argument('--dataset', type=str, default='/home/jupyter/bert/dataset/hint_based/ratio_proportion_change_3/er/er_train.csv', help='Path to the dataset file.')
123
+ parser.add_argument('--data_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/gt/er.txt', help='Path to the input sequence file.')
124
+ parser.add_argument('--output_dir', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/output/hint_classification', help='Directory to save the fine-tuned model.')
125
+ parser.add_argument('--pre_trained_model_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/output/pretrain:1800ms:64hs:4l:8a:50s:64b:1000e:-5lr/bert_trained.seq_encoder.model.ep68', help='Path to the pre-trained BERT model.')
126
+ parser.add_argument('--vocab_file', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/_Aug23/pretraining/vocab.txt', help='Path to the vocabulary file.')
127
+ parser.add_argument('--log_folder_path', type=str, default='/home/jupyter/bert/ratio_proportion_change3_1920/logs/oct', help='Path to the folder for saving logs.')
128
+
129
+
130
+ opt = parser.parse_args()
131
+ main(opt)
new_test_saved_finetuned_model.py ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.optim import Adam
6
+ from torch.utils.data import DataLoader
7
+ import pickle
8
+ print("here1",os.getcwd())
9
+ from src.dataset import TokenizerDataset, TokenizerDatasetForCalibration
10
+ from src.vocab import Vocab
11
+ print("here3",os.getcwd())
12
+ from src.bert import BERT
13
+ from src.seq_model import BERTSM
14
+ from src.classifier_model import BERTForClassification, BERTForClassificationWithFeats
15
+ # from src.new_finetuning.optim_schedule import ScheduledOptim
16
+ import metrics, recalibration, visualization
17
+ from recalibration import ModelWithTemperature
18
+ import tqdm
19
+ import sys
20
+ import time
21
+ import numpy as np
22
+
23
+ from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score
24
+ import matplotlib.pyplot as plt
25
+ import seaborn as sns
26
+ import pandas as pd
27
+ from collections import defaultdict
28
+ print("here3",os.getcwd())
29
+ class BERTFineTuneTrainer:
30
+
31
+ def __init__(self, bertFinetunedClassifierwithFeats: BERT, #BERTForClassificationWithFeats
32
+ vocab_size: int, test_dataloader: DataLoader = None,
33
+ lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
34
+ with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
35
+ num_labels=2, log_folder_path: str = None):
36
+ """
37
+ :param bert: BERT model which you want to train
38
+ :param vocab_size: total word vocab size
39
+ :param test_dataloader: test dataset data loader [can be None]
40
+ :param lr: learning rate of optimizer
41
+ :param betas: Adam optimizer betas
42
+ :param weight_decay: Adam optimizer weight decay param
43
+ :param with_cuda: traning with cuda
44
+ :param log_freq: logging frequency of the batch iteration
45
+ """
46
+
47
+ # Setup cuda device for BERT training, argument -c, --cuda should be true
48
+ # cuda_condition = torch.cuda.is_available() and with_cuda
49
+ # self.device = torch.device("cuda:0" if cuda_condition else "cpu")
50
+ self.device = torch.device("cpu") #torch.device("cuda:0" if cuda_condition else "cpu")
51
+ # print(cuda_condition, " Device used = ", self.device)
52
+ print(" Device used = ", self.device)
53
+
54
+ # available_gpus = list(range(torch.cuda.device_count()))
55
+
56
+ # This BERT model will be saved every epoch
57
+ self.model = bertFinetunedClassifierwithFeats.to("cpu")
58
+ print(self.model.parameters())
59
+ for param in self.model.parameters():
60
+ param.requires_grad = False
61
+ # Initialize the BERT Language Model, with BERT model
62
+ # self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
63
+ # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
64
+ # self.model = bertFinetunedClassifierwithFeats
65
+ # print(self.model.bert.parameters())
66
+ # for param in self.model.bert.parameters():
67
+ # param.requires_grad = False
68
+ # BERTForClassificationWithFeats(self.bert, num_labels, 18).to(self.device)
69
+
70
+ # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device)
71
+ # Distributed GPU training if CUDA can detect more than 1 GPU
72
+ # if with_cuda and torch.cuda.device_count() > 1:
73
+ # print("Using %d GPUS for BERT" % torch.cuda.device_count())
74
+ # self.model = nn.DataParallel(self.model, device_ids=available_gpus)
75
+
76
+ # Setting the train, validation and test data loader
77
+ # self.train_data = train_dataloader
78
+ # self.val_data = val_dataloader
79
+ self.test_data = test_dataloader
80
+
81
+ # self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9
82
+ self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
83
+ # self.optim_schedule = ScheduledOptim(self.optim, self.model.bert.hidden, n_warmup_steps=warmup_steps)
84
+ # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
85
+ self.criterion = nn.CrossEntropyLoss()
86
+
87
+ # if num_labels == 1:
88
+ # self.criterion = nn.MSELoss()
89
+ # elif num_labels == 2:
90
+ # self.criterion = nn.BCEWithLogitsLoss()
91
+ # # self.criterion = nn.CrossEntropyLoss()
92
+ # elif num_labels > 2:
93
+ # self.criterion = nn.CrossEntropyLoss()
94
+ # self.criterion = nn.BCEWithLogitsLoss()
95
+
96
+
97
+ self.log_freq = log_freq
98
+ self.log_folder_path = log_folder_path
99
+ # self.workspace_name = workspace_name
100
+ # self.finetune_task = finetune_task
101
+ # self.save_model = False
102
+ # self.avg_loss = 10000
103
+ self.start_time = time.time()
104
+ # self.probability_list = []
105
+ for fi in ['test']: #'val',
106
+ f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
107
+ f.close()
108
+ print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
109
+
110
+ # def train(self, epoch):
111
+ # self.iteration(epoch, self.train_data)
112
+
113
+ # def val(self, epoch):
114
+ # self.iteration(epoch, self.val_data, phase="val")
115
+
116
+ def test(self, epoch):
117
+ # if epoch == 0:
118
+ # self.avg_loss = 10000
119
+ self.iteration(epoch, self.test_data, phase="test")
120
+
121
+ def iteration(self, epoch, data_loader, phase="train"):
122
+ """
123
+ loop over the data_loader for training or testing
124
+ if on train status, backward operation is activated
125
+ and also auto save the model every peoch
126
+
127
+ :param epoch: current epoch index
128
+ :param data_loader: torch.utils.data.DataLoader for iteration
129
+ :param train: boolean value of is train or test
130
+ :return: None
131
+ """
132
+
133
+ # Setting the tqdm progress bar
134
+ data_iter = tqdm.tqdm(enumerate(data_loader),
135
+ desc="EP_%s:%d" % (phase, epoch),
136
+ total=len(data_loader),
137
+ bar_format="{l_bar}{r_bar}")
138
+
139
+ avg_loss = 0.0
140
+ total_correct = 0
141
+ total_element = 0
142
+ plabels = []
143
+ tlabels = []
144
+ probabs = []
145
+ positive_class_probs=[]
146
+ if phase == "train":
147
+ self.model.train()
148
+ else:
149
+ self.model.eval()
150
+ # self.probability_list = []
151
+
152
+ with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f:
153
+ sys.stdout = f
154
+ for i, data in data_iter:
155
+ # 0. batch_data will be sent into the device(GPU or cpu)
156
+ data = {key: value.to(self.device) for key, value in data.items()}
157
+ if phase == "train":
158
+ logits = self.model.forward(data["input"], data["segment_label"], data["feat"])
159
+ else:
160
+ with torch.no_grad():
161
+ logits = self.model.forward(data["input"].cpu(), data["segment_label"].cpu(), data["feat"].cpu())
162
+
163
+ logits = logits.cpu()
164
+ loss = self.criterion(logits, data["label"])
165
+ # if torch.cuda.device_count() > 1:
166
+ # loss = loss.mean()
167
+
168
+ # 3. backward and optimization only in train
169
+ # if phase == "train":
170
+ # self.optim_schedule.zero_grad()
171
+ # loss.backward()
172
+ # self.optim_schedule.step_and_update_lr()
173
+
174
+ # prediction accuracy
175
+ probs = nn.Softmax(dim=-1)(logits) # Probabilities
176
+ probabs.extend(probs.detach().cpu().numpy().tolist())
177
+ predicted_labels = torch.argmax(probs, dim=-1) #correct
178
+ # self.probability_list.append(probs)
179
+ # true_labels = torch.argmax(data["label"], dim=-1)
180
+ plabels.extend(predicted_labels.cpu().numpy())
181
+ tlabels.extend(data['label'].cpu().numpy())
182
+ positive_class_probs = [prob[1] for prob in probabs]
183
+ # Compare predicted labels to true labels and calculate accuracy
184
+ correct = (data['label'] == predicted_labels).sum().item()
185
+
186
+ avg_loss += loss.item()
187
+ total_correct += correct
188
+ # total_element += true_labels.nelement()
189
+ total_element += data["label"].nelement()
190
+ # print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element)
191
+
192
+ post_fix = {
193
+ "epoch": epoch,
194
+ "iter": i,
195
+ "avg_loss": avg_loss / (i + 1),
196
+ "avg_acc": total_correct / total_element * 100 if total_element != 0 else 0,
197
+ "loss": loss.item()
198
+ }
199
+ if i % self.log_freq == 0:
200
+ data_iter.write(str(post_fix))
201
+
202
+ precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0)
203
+ recalls = recall_score(tlabels, plabels, average="weighted")
204
+ f1_scores = f1_score(tlabels, plabels, average="weighted")
205
+ cmatrix = confusion_matrix(tlabels, plabels)
206
+ end_time = time.time()
207
+ auc_score = roc_auc_score(tlabels, positive_class_probs)
208
+ final_msg = {
209
+ "avg_loss": avg_loss / len(data_iter),
210
+ "total_acc": total_correct * 100.0 / total_element,
211
+ "precisions": precisions,
212
+ "recalls": recalls,
213
+ "f1_scores": f1_scores,
214
+ # "confusion_matrix": f"{cmatrix}",
215
+ # "true_labels": f"{tlabels}",
216
+ # "predicted_labels": f"{plabels}",
217
+ "time_taken_from_start": end_time - self.start_time,
218
+ "auc_score":auc_score
219
+ }
220
+ with open("result.txt", 'w') as file:
221
+ for key, value in final_msg.items():
222
+ file.write(f"{key}: {value}\n")
223
+ print(final_msg)
224
+ fpr, tpr, thresholds = roc_curve(tlabels, positive_class_probs)
225
+ with open("roc_data.pkl", "wb") as f:
226
+ pickle.dump((fpr, tpr, thresholds), f)
227
+ print(final_msg)
228
+ f.close()
229
+ with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1:
230
+ sys.stdout = f1
231
+ final_msg = {
232
+ "epoch": f"EP{epoch}_{phase}",
233
+ "confusion_matrix": f"{cmatrix}",
234
+ "true_labels": f"{tlabels if epoch == 0 else ''}",
235
+ "predicted_labels": f"{plabels}",
236
+ "probabilities": f"{probabs}",
237
+ "time_taken_from_start": end_time - self.start_time
238
+ }
239
+ print(final_msg)
240
+ f1.close()
241
+ sys.stdout = sys.__stdout__
242
+ sys.stdout = sys.__stdout__
243
+
244
+
245
+
246
+ class BERTFineTuneCalibratedTrainer:
247
+
248
+ def __init__(self, bertFinetunedClassifierwithFeats: BERT, #BERTForClassificationWithFeats
249
+ vocab_size: int, test_dataloader: DataLoader = None,
250
+ lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
251
+ with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
252
+ num_labels=2, log_folder_path: str = None):
253
+ """
254
+ :param bert: BERT model which you want to train
255
+ :param vocab_size: total word vocab size
256
+ :param test_dataloader: test dataset data loader [can be None]
257
+ :param lr: learning rate of optimizer
258
+ :param betas: Adam optimizer betas
259
+ :param weight_decay: Adam optimizer weight decay param
260
+ :param with_cuda: traning with cuda
261
+ :param log_freq: logging frequency of the batch iteration
262
+ """
263
+
264
+ # Setup cuda device for BERT training, argument -c, --cuda should be true
265
+ cuda_condition = torch.cuda.is_available() and with_cuda
266
+ self.device = torch.device("cuda:0" if cuda_condition else "cpu")
267
+ print(cuda_condition, " Device used = ", self.device)
268
+
269
+ # available_gpus = list(range(torch.cuda.device_count()))
270
+
271
+ # This BERT model will be saved every epoch
272
+ self.model = bertFinetunedClassifierwithFeats
273
+ print(self.model.parameters())
274
+ for param in self.model.parameters():
275
+ param.requires_grad = False
276
+ # Initialize the BERT Language Model, with BERT model
277
+ # self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
278
+ # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
279
+ # self.model = bertFinetunedClassifierwithFeats
280
+ # print(self.model.bert.parameters())
281
+ # for param in self.model.bert.parameters():
282
+ # param.requires_grad = False
283
+ # BERTForClassificationWithFeats(self.bert, num_labels, 18).to(self.device)
284
+
285
+ # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 1).to(self.device)
286
+ # Distributed GPU training if CUDA can detect more than 1 GPU
287
+ # if with_cuda and torch.cuda.device_count() > 1:
288
+ # print("Using %d GPUS for BERT" % torch.cuda.device_count())
289
+ # self.model = nn.DataParallel(self.model, device_ids=available_gpus)
290
+
291
+ # Setting the train, validation and test data loader
292
+ # self.train_data = train_dataloader
293
+ # self.val_data = val_dataloader
294
+ self.test_data = test_dataloader
295
+
296
+ # self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay) #, eps=1e-9
297
+ self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
298
+ # self.optim_schedule = ScheduledOptim(self.optim, self.model.bert.hidden, n_warmup_steps=warmup_steps)
299
+ # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
300
+ self.criterion = nn.CrossEntropyLoss()
301
+
302
+ # if num_labels == 1:
303
+ # self.criterion = nn.MSELoss()
304
+ # elif num_labels == 2:
305
+ # self.criterion = nn.BCEWithLogitsLoss()
306
+ # # self.criterion = nn.CrossEntropyLoss()
307
+ # elif num_labels > 2:
308
+ # self.criterion = nn.CrossEntropyLoss()
309
+ # self.criterion = nn.BCEWithLogitsLoss()
310
+
311
+
312
+ self.log_freq = log_freq
313
+ self.log_folder_path = log_folder_path
314
+ # self.workspace_name = workspace_name
315
+ # self.finetune_task = finetune_task
316
+ # self.save_model = False
317
+ # self.avg_loss = 10000
318
+ self.start_time = time.time()
319
+ # self.probability_list = []
320
+ for fi in ['test']: #'val',
321
+ f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
322
+ f.close()
323
+ print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
324
+
325
+ # def train(self, epoch):
326
+ # self.iteration(epoch, self.train_data)
327
+
328
+ # def val(self, epoch):
329
+ # self.iteration(epoch, self.val_data, phase="val")
330
+
331
+ def test(self, epoch):
332
+ # if epoch == 0:
333
+ # self.avg_loss = 10000
334
+ self.iteration(epoch, self.test_data, phase="test")
335
+
336
+ def iteration(self, epoch, data_loader, phase="train"):
337
+ """
338
+ loop over the data_loader for training or testing
339
+ if on train status, backward operation is activated
340
+ and also auto save the model every peoch
341
+
342
+ :param epoch: current epoch index
343
+ :param data_loader: torch.utils.data.DataLoader for iteration
344
+ :param train: boolean value of is train or test
345
+ :return: None
346
+ """
347
+
348
+ # Setting the tqdm progress bar
349
+ data_iter = tqdm.tqdm(enumerate(data_loader),
350
+ desc="EP_%s:%d" % (phase, epoch),
351
+ total=len(data_loader),
352
+ bar_format="{l_bar}{r_bar}")
353
+
354
+ avg_loss = 0.0
355
+ total_correct = 0
356
+ total_element = 0
357
+ plabels = []
358
+ tlabels = []
359
+ probabs = []
360
+
361
+ if phase == "train":
362
+ self.model.train()
363
+ else:
364
+ self.model.eval()
365
+ # self.probability_list = []
366
+
367
+ with open(self.log_folder_path+f"/log_{phase}_finetuned.txt", 'a') as f:
368
+ sys.stdout = f
369
+ for i, data in data_iter:
370
+ # 0. batch_data will be sent into the device(GPU or cpu)
371
+ # print(data_pair[0])
372
+ data = {key: value.to(self.device) for key, value in data[0].items()}
373
+ # print(f"data : {data}")
374
+ # data = {key: value.to(self.device) for key, value in data.items()}
375
+
376
+ # if phase == "train":
377
+ # logits = self.model.forward(data["input"], data["segment_label"], data["feat"])
378
+ # else:
379
+ with torch.no_grad():
380
+ # logits = self.model.forward(data["input"], data["segment_label"], data["feat"])
381
+ logits = self.model.forward(data)
382
+
383
+ loss = self.criterion(logits, data["label"])
384
+ if torch.cuda.device_count() > 1:
385
+ loss = loss.mean()
386
+
387
+ # 3. backward and optimization only in train
388
+ # if phase == "train":
389
+ # self.optim_schedule.zero_grad()
390
+ # loss.backward()
391
+ # self.optim_schedule.step_and_update_lr()
392
+
393
+ # prediction accuracy
394
+ probs = nn.Softmax(dim=-1)(logits) # Probabilities
395
+ probabs.extend(probs.detach().cpu().numpy().tolist())
396
+ predicted_labels = torch.argmax(probs, dim=-1) #correct
397
+ # self.probability_list.append(probs)
398
+ # true_labels = torch.argmax(data["label"], dim=-1)
399
+ plabels.extend(predicted_labels.cpu().numpy())
400
+ tlabels.extend(data['label'].cpu().numpy())
401
+ positive_class_probs = [prob[1] for prob in probabs]
402
+
403
+ # Compare predicted labels to true labels and calculate accuracy
404
+ correct = (data['label'] == predicted_labels).sum().item()
405
+
406
+ avg_loss += loss.item()
407
+ total_correct += correct
408
+ # total_element += true_labels.nelement()
409
+ total_element += data["label"].nelement()
410
+ # print(">>>>>>>>>>>>>>", predicted_labels, true_labels, correct, total_correct, total_element)
411
+
412
+ post_fix = {
413
+ "epoch": epoch,
414
+ "iter": i,
415
+ "avg_loss": avg_loss / (i + 1),
416
+ "avg_acc": total_correct / total_element * 100 if total_element != 0 else 0,
417
+ "loss": loss.item()
418
+ }
419
+ if i % self.log_freq == 0:
420
+ data_iter.write(str(post_fix))
421
+
422
+ precisions = precision_score(tlabels, plabels, average="weighted", zero_division=0)
423
+ recalls = recall_score(tlabels, plabels, average="weighted")
424
+ f1_scores = f1_score(tlabels, plabels, average="weighted")
425
+ cmatrix = confusion_matrix(tlabels, plabels)
426
+ auc_score = roc_auc_score(tlabels, positive_class_probs)
427
+ end_time = time.time()
428
+ final_msg = {
429
+ "avg_loss": avg_loss / len(data_iter),
430
+ "total_acc": total_correct * 100.0 / total_element,
431
+ "precisions": precisions,
432
+ "recalls": recalls,
433
+ "f1_scores": f1_scores,
434
+ "auc_score":auc_score,
435
+ # "confusion_matrix": f"{cmatrix}",
436
+ # "true_labels": f"{tlabels}",
437
+ # "predicted_labels": f"{plabels}",
438
+ "time_taken_from_start": end_time - self.start_time
439
+ }
440
+ with open("result.txt", 'w') as file:
441
+ for key, value in final_msg.items():
442
+ file.write(f"{key}: {value}\n")
443
+ with open("plabels.txt","w") as file:
444
+ file.write(plabels)
445
+
446
+ print(final_msg)
447
+ fpr, tpr, thresholds = roc_curve(tlabels, positive_class_probs)
448
+ f.close()
449
+ with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1:
450
+ sys.stdout = f1
451
+ final_msg = {
452
+
453
+ "confusion_matrix": f"{cmatrix}",
454
+ "true_labels": f"{tlabels if epoch == 0 else ''}",
455
+ "predicted_labels": f"{plabels}",
456
+ "probabilities": f"{probabs}",
457
+ "time_taken_from_start": end_time - self.start_time
458
+ }
459
+ print(final_msg)
460
+ f1.close()
461
+ sys.stdout = sys.__stdout__
462
+ sys.stdout = sys.__stdout__
463
+
464
+
465
+
466
+ def train():
467
+ parser = argparse.ArgumentParser()
468
+
469
+ parser.add_argument('-workspace_name', type=str, default=None)
470
+ parser.add_argument('-code', type=str, default=None, help="folder for pretraining outputs and logs")
471
+ parser.add_argument('-finetune_task', type=str, default=None, help="folder inside finetuning")
472
+ parser.add_argument("-attention", type=bool, default=False, help="analyse attention scores")
473
+ parser.add_argument("-diff_test_folder", type=bool, default=False, help="use for different test folder")
474
+ parser.add_argument("-embeddings", type=bool, default=False, help="get and analyse embeddings")
475
+ parser.add_argument('-embeddings_file_name', type=str, default=None, help="file name of embeddings")
476
+ parser.add_argument("-pretrain", type=bool, default=False, help="pretraining: true, or false")
477
+ # parser.add_argument('-opts', nargs='+', type=str, default=None, help='List of optional steps')
478
+ parser.add_argument("-max_mask", type=int, default=0.15, help="% of input tokens selected for masking")
479
+ # parser.add_argument("-p", "--pretrain_dataset", type=str, default="pretraining/pretrain.txt", help="pretraining dataset for bert")
480
+ # parser.add_argument("-pv", "--pretrain_val_dataset", type=str, default="pretraining/test.txt", help="pretraining validation dataset for bert")
481
+ # default="finetuning/test.txt",
482
+ parser.add_argument("-vocab_path", type=str, default="pretraining/vocab.txt", help="built vocab model path with bert-vocab")
483
+
484
+ parser.add_argument("-train_dataset_path", type=str, default="train.txt", help="fine tune train dataset for progress classifier")
485
+ parser.add_argument("-val_dataset_path", type=str, default="val.txt", help="test set for evaluate fine tune train set")
486
+ parser.add_argument("-test_dataset_path", type=str, default="test.txt", help="test set for evaluate fine tune train set")
487
+ parser.add_argument("-num_labels", type=int, default=2, help="Number of labels")
488
+ parser.add_argument("-train_label_path", type=str, default="train_label.txt", help="fine tune train dataset for progress classifier")
489
+ parser.add_argument("-val_label_path", type=str, default="val_label.txt", help="test set for evaluate fine tune train set")
490
+ parser.add_argument("-test_label_path", type=str, default="test_label.txt", help="test set for evaluate fine tune train set")
491
+ ##### change Checkpoint for finetuning
492
+ parser.add_argument("-pretrained_bert_checkpoint", type=str, default=None, help="checkpoint of saved pretrained bert model")
493
+ parser.add_argument("-finetuned_bert_classifier_checkpoint", type=str, default=None, help="checkpoint of saved finetuned bert model") #."output_feb09/bert_trained.model.ep40"
494
+ #."output_feb09/bert_trained.model.ep40"
495
+ parser.add_argument('-check_epoch', type=int, default=None)
496
+
497
+ parser.add_argument("-hs", "--hidden", type=int, default=64, help="hidden size of transformer model") #64
498
+ parser.add_argument("-l", "--layers", type=int, default=4, help="number of layers") #4
499
+ parser.add_argument("-a", "--attn_heads", type=int, default=4, help="number of attention heads") #8
500
+ parser.add_argument("-s", "--seq_len", type=int, default=128, help="maximum sequence length")
501
+
502
+ parser.add_argument("-b", "--batch_size", type=int, default=500, help="number of batch_size") #64
503
+ parser.add_argument("-e", "--epochs", type=int, default=1)#1501, help="number of epochs") #501
504
+ # Use 50 for pretrain, and 10 for fine tune
505
+ parser.add_argument("-w", "--num_workers", type=int, default=0, help="dataloader worker size")
506
+
507
+ # Later run with cuda
508
+ parser.add_argument("--with_cuda", type=bool, default=False, help="training with CUDA: true, or false")
509
+ parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n")
510
+ # parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus")
511
+ parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids")
512
+ # parser.add_argument("--on_memory", type=bool, default=False, help="Loading on memory: true or false")
513
+
514
+ parser.add_argument("--dropout", type=float, default=0.1, help="dropout of network")
515
+ parser.add_argument("--lr", type=float, default=1e-05, help="learning rate of adam") #1e-3
516
+ parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam")
517
+ parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
518
+ parser.add_argument("--adam_beta2", type=float, default=0.98, help="adam first beta value") #0.999
519
+
520
+ parser.add_argument("-o", "--output_path", type=str, default="bert_trained.seq_encoder.model", help="ex)output/bert.model")
521
+ # parser.add_argument("-o", "--output_path", type=str, default="output/bert_fine_tuned.model", help="ex)output/bert.model")
522
+
523
+ args = parser.parse_args()
524
+ for k,v in vars(args).items():
525
+ if 'path' in k:
526
+ if v:
527
+ if k == "output_path":
528
+ if args.code:
529
+ setattr(args, f"{k}", args.workspace_name+f"/output/{args.code}/"+v)
530
+ elif args.finetune_task:
531
+ setattr(args, f"{k}", args.workspace_name+f"/output/{args.finetune_task}/"+v)
532
+ else:
533
+ setattr(args, f"{k}", args.workspace_name+"/output/"+v)
534
+ elif k != "vocab_path":
535
+ if args.pretrain:
536
+ setattr(args, f"{k}", args.workspace_name+"/pretraining/"+v)
537
+ else:
538
+ if args.code:
539
+ setattr(args, f"{k}", args.workspace_name+f"/{args.code}/"+v)
540
+ elif args.finetune_task:
541
+ if args.diff_test_folder and "test" in k:
542
+ setattr(args, f"{k}", args.workspace_name+f"/finetuning/"+v)
543
+ else:
544
+ setattr(args, f"{k}", args.workspace_name+f"/finetuning/{args.finetune_task}/"+v)
545
+ else:
546
+ setattr(args, f"{k}", args.workspace_name+"/finetuning/"+v)
547
+ else:
548
+ setattr(args, f"{k}", args.workspace_name+"/"+v)
549
+
550
+ print(f"args.{k} : {getattr(args, f'{k}')}")
551
+
552
+ print("Loading Vocab", args.vocab_path)
553
+ vocab_obj = Vocab(args.vocab_path)
554
+ vocab_obj.load_vocab()
555
+ print("Vocab Size: ", len(vocab_obj.vocab))
556
+
557
+
558
+ print("Testing using finetuned model......")
559
+ print("Loading Test Dataset", args.test_dataset_path)
560
+ test_dataset = TokenizerDataset(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len)
561
+ # test_dataset = TokenizerDatasetForCalibration(args.test_dataset_path, args.test_label_path, vocab_obj, seq_len=args.seq_len)
562
+
563
+ print("Creating Dataloader...")
564
+ test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
565
+
566
+ print("Load fine-tuned BERT classifier model with feats")
567
+ # cuda_condition = torch.cuda.is_available() and args.with_cuda
568
+ device = torch.device("cpu") #torch.device("cuda:0" if cuda_condition else "cpu")
569
+ finetunedBERTclassifier = torch.load(args.finetuned_bert_classifier_checkpoint, map_location=device)
570
+ if isinstance(finetunedBERTclassifier, torch.nn.DataParallel):
571
+ finetunedBERTclassifier = finetunedBERTclassifier.module
572
+
573
+ new_log_folder = f"{args.workspace_name}/logs"
574
+ new_output_folder = f"{args.workspace_name}/output"
575
+ if args.finetune_task: # is sent almost all the time
576
+ new_log_folder = f"{args.workspace_name}/logs/{args.finetune_task}"
577
+ new_output_folder = f"{args.workspace_name}/output/{args.finetune_task}"
578
+
579
+ if not os.path.exists(new_log_folder):
580
+ os.makedirs(new_log_folder)
581
+ if not os.path.exists(new_output_folder):
582
+ os.makedirs(new_output_folder)
583
+
584
+ print("Creating BERT Fine Tuned Test Trainer")
585
+ trainer = BERTFineTuneTrainer(finetunedBERTclassifier,
586
+ len(vocab_obj.vocab), test_dataloader=test_data_loader,
587
+ lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
588
+ with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq,
589
+ workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder)
590
+
591
+ # trainer = BERTFineTuneCalibratedTrainer(finetunedBERTclassifier,
592
+ # len(vocab_obj.vocab), test_dataloader=test_data_loader,
593
+ # lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
594
+ # with_cuda=args.with_cuda, cuda_devices = args.cuda_devices, log_freq=args.log_freq,
595
+ # workspace_name = args.workspace_name, num_labels=args.num_labels, log_folder_path=new_log_folder)
596
+ print("Testing fine-tuned model Start....")
597
+ start_time = time.time()
598
+ repoch = range(args.check_epoch, args.epochs) if args.check_epoch else range(args.epochs)
599
+ counter = 0
600
+ # patience = 10
601
+ for epoch in repoch:
602
+ print(f'Test Epoch {epoch} Starts, Time: {time.strftime("%D %T", time.localtime(time.time()))}')
603
+ trainer.test(epoch)
604
+ # pickle.dump(trainer.probability_list, open(f"{args.workspace_name}/output/aaai/change4_mid_prob_{epoch}.pkl","wb"))
605
+ print(f'Test Epoch {epoch} Ends, Time: {time.strftime("%D %T", time.localtime(time.time()))} \n')
606
+ end_time = time.time()
607
+ print("Time Taken to fine-tune model = ", end_time - start_time)
608
+ print(f'Pretraining Ends, Time: {time.strftime("%D %T", time.localtime(end_time))}')
609
+
610
+
611
+
612
+ if __name__ == "__main__":
613
+ train()
plot.png ADDED
prepare_pretraining_input_vocab_file.py ADDED
The diff for this file is too large to render. See raw diff
 
ratio_proportion_change3/finetuning/test.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da39d07824b2cfc3a41445694ff65018b1ffdf3e9b844d464cdba3c0ad6a8b87
3
+ size 6876678
ratio_proportion_change3/finetuning/test_in.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5029b0f965c4f6f1d5dd981778daf0b8f0f778dd71ecad7eb984e8461fa85b9
3
+ size 1318665
ratio_proportion_change3/finetuning/test_in_info.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044b445c06dbdecb8663e5db8d6f270799240f1b433a169c335c15e566dbba20
3
+ size 1660506
ratio_proportion_change3/finetuning/test_in_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c035490a97515200f23348bf01bd3c16def88046a7c2215d9ef169ffc089d0d
3
+ size 17202
ratio_proportion_change3/finetuning/test_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6fee24daa1523d1a8d7615c415fac559d0bf85ace5ab18d9db1a8dff533ff68
3
+ size 79424
ratio_proportion_change3/finetuning/testr_in_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95faf33529a8cdbcedfca3853be88f917e730c79261731c4860f0d57909f13f
3
+ size 97701
ratio_proportion_change3/finetuning/testr_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29519e69e1ec480ae0440e23dcb57f97bbb33cdd9b91d18e5e999d3e7e58288c
3
+ size 549160
ratio_proportion_change3/finetuning/train.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b397618386eb7cd21cf59251b4d48c1880330477c3186375a039047f181beae
3
+ size 775465
ratio_proportion_change3/finetuning/train_in.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b935dcf7dbbe3ad66c2616ae3e6c342d9d1b162c4931c7a291386c5ce609ce0
3
+ size 1656785
ratio_proportion_change3/finetuning/train_in_info.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5663b5706417ba65ec10abecf405f5644dfa637683fe1198ea937b8838cba6a
3
+ size 2411977
ratio_proportion_change3/finetuning/train_in_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e38fd99af6313174626b81cad3f5a6b6e88711f9f66f57cb5c3b0e6bc2e8b4c
3
+ size 17202
ratio_proportion_change3/finetuning/train_info.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9968e038b75a633b4957602e37d57b7c0cb561f9ae3c2b17ad0f9eb48b554c21
3
+ size 1080190
ratio_proportion_change3/finetuning/train_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de505197183cefe6a1c5ff4f5cd8e07dc14ed1b601951d7c3e02947d603e58c6
3
+ size 8932
ratio_proportion_change3/finetuning/trainr_in_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e450636dcb476a258439c94249f1078e9186bfe00d8e70da7b9c339f4f728c
3
+ size 129011
ratio_proportion_change3/finetuning/trainr_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0294122c85237764e51d69d2efc5233d2c3a0d1027b31b4f510ca68bd6e46bc1
3
+ size 61542
ratio_proportion_change3/logs/masked/log_test_10per_finetuned.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d42d75c9a38be298f8ee1f022a544fe49804b72979a734b42aea08f7b31fb52
3
+ size 671476
ratio_proportion_change3/logs/masked/log_test_FS_finetuned.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:858fc5350a9bf0c75d46b8af1dc3b0f310bab1a0afa92ca8bca1e829b57d0b73
3
+ size 149839
ratio_proportion_change3/logs/masked/log_test_IS_finetuned.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66fcb29fc9f3d92bed511d4a91530ad79a13860b93e418f0b8c6c1be0e54169
3
+ size 149828
ratio_proportion_change3/logs/masked/log_test_pretrained.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1193ce0490717b442303f51da68869c6419f461ce5044b5a275b40e7bfb368
3
+ size 1055582
ratio_proportion_change3/logs/masked/log_train_10per_finetuned.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf58b6b3ba0d0a9562cfd510ce1a7bff20a4bb0ee1faa907397314333d26dcd2
3
+ size 88900
ratio_proportion_change3/logs/masked/log_train_FS_finetuned.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92c0a8722c7b21b36f5028493692ccf32b473c20d3f6027d54e5fd822960432
3
+ size 167286
ratio_proportion_change3/logs/masked/log_train_IS_finetuned.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644994580015b35979dce25d0e2b3be7b6ef6d02193a1b0ea6d10411412c5495
3
+ size 167148
ratio_proportion_change3/logs/masked/log_train_pretrained.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6095cbd3be17925bc64b05902281c01c2c3255df63ea2e5cd48b5d402c06033b
3
+ size 4116343
ratio_proportion_change3/output/FS/train.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:367628f1b9aa5047a07d5eb6e574e166e12d533d18a1634045424736bff9cc42
3
+ size 1699339
ratio_proportion_change3/output/FS/train_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1eeaf1d96b6010aec2db568d20170e79d5e53bb790e250074f877931ab23d3
3
+ size 20636
ratio_proportion_change3/output/IS/train.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc804d5d3a54d0cbe69b295464378609916a2c5b2a8c0696757d20be185e1427
3
+ size 1361007
ratio_proportion_change3/output/IS/train_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a394ba9e86a56b82422fd9b7a7212bde72eae95fbd8d899e0e9fb9c21132a605
3
+ size 20636
ratio_proportion_change3/output/correctness/bert_fine_tuned.model.ep48 ADDED
Binary file (974 kB). View file
 
ratio_proportion_change3/output/correctness/test.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0faf7af6b63c26cb29b586e087c84881365a94b22d71f1a8587bfa979f2d5794
3
+ size 6253326
ratio_proportion_change3/output/correctness/test_label.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708d181754296d2bbbe56ce509eb896ca69bd2d7a418839c0a09836bf1c31541
3
+ size 75023
ratio_proportion_change3/output/effectiveness/bert_fine_tuned.model.ep28 ADDED
Binary file (974 kB). View file