botcon commited on
Commit
bd4e4a3
·
1 Parent(s): a8b5bd2

Upload meta.py

Browse files
Files changed (1) hide show
  1. meta.py +30 -13
meta.py CHANGED
@@ -1,12 +1,13 @@
1
  import torch.nn as nn
2
  import torch
3
- from transformers import AutoTokenizer, BertForSequenceClassification, PreTrainedModel, PretrainedConfig
4
  from transformers.modeling_outputs import SequenceClassifierOutput
5
  from torch.nn import CrossEntropyLoss
6
  from torch.optim import AdamW
7
  from LUKE_pipe import generate
8
  from datasets import load_dataset
9
  from accelerate import Accelerator
 
10
 
11
  MAX_BEAM = 10
12
 
@@ -16,7 +17,6 @@ class ClassifierAdapter(nn.Module):
16
  def __init__(self, l1=3):
17
  super().__init__()
18
  self.linear1 = nn.Linear(l1, 1)
19
-
20
  self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
21
  self.bert = BertForSequenceClassification.from_pretrained("botcon/right_span_bert")
22
  self.relu = nn.ReLU()
@@ -52,27 +52,36 @@ class HuggingWrapper(PreTrainedModel):
52
  loss_fn = CrossEntropyLoss(ignore_index=MAX_BEAM)
53
  loss = loss_fn(output, labels)
54
  return SequenceClassifierOutput(logits=output, loss=loss)
55
-
 
56
  model = HuggingWrapper.from_pretrained("botcon/special_bert").to(device)
57
-
58
- accelerator = Accelerator()
59
  optimizer = AdamW(model.parameters())
60
-
61
- num_epoch = 2
62
-
63
  raw_datasets = load_dataset("squad")
64
  raw_train = raw_datasets["train"]
65
- batch_size = 2
 
 
 
 
 
 
 
 
 
 
66
 
67
  for epoch in range(num_epoch):
68
  start = 0
69
  end = batch_size
70
-
 
71
  training_data = raw_train
72
  model.train()
73
  while start < len(training_data):
74
  optimizer.zero_grad()
75
- batch_data = raw_train.select(range(start, min(end, len(training_data))))
76
  with torch.no_grad():
77
  res = generate(batch_data)
78
  prediction = []
@@ -95,10 +104,18 @@ for epoch in range(num_epoch):
95
  labels = torch.LongTensor(labels).to(device)
96
  classifier_out = model(questions=batch_data["question"] , answers=prediction, logits=predicted_logit, labels=labels)
97
  loss = classifier_out.loss
98
- print(loss.item())
99
- loss.backward()
 
 
100
  optimizer.step()
 
 
101
  start += batch_size
102
  end += batch_size
 
 
 
 
103
 
104
  model.push_to_hub("some_fake_bert")
 
1
  import torch.nn as nn
2
  import torch
3
+ from transformers import AutoTokenizer, BertForSequenceClassification, PreTrainedModel, PretrainedConfig, AutoModelForQuestionAnswering, get_scheduler
4
  from transformers.modeling_outputs import SequenceClassifierOutput
5
  from torch.nn import CrossEntropyLoss
6
  from torch.optim import AdamW
7
  from LUKE_pipe import generate
8
  from datasets import load_dataset
9
  from accelerate import Accelerator
10
+ from tqdm import tqdm
11
 
12
  MAX_BEAM = 10
13
 
 
17
  def __init__(self, l1=3):
18
  super().__init__()
19
  self.linear1 = nn.Linear(l1, 1)
 
20
  self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
21
  self.bert = BertForSequenceClassification.from_pretrained("botcon/right_span_bert")
22
  self.relu = nn.ReLU()
 
52
  loss_fn = CrossEntropyLoss(ignore_index=MAX_BEAM)
53
  loss = loss_fn(output, labels)
54
  return SequenceClassifierOutput(logits=output, loss=loss)
55
+
56
+ accelerator = Accelerator(mixed_precision="fp16")
57
  model = HuggingWrapper.from_pretrained("botcon/special_bert").to(device)
 
 
58
  optimizer = AdamW(model.parameters())
59
+ model, optimizer = accelerator.prepare(model, optimizer)
60
+ batch_size = 2
 
61
  raw_datasets = load_dataset("squad")
62
  raw_train = raw_datasets["train"]
63
+ num_updates = len(raw_train) // batch_size
64
+ num_epoch = 2
65
+ num_training_steps = num_updates * num_epoch
66
+ lr_scheduler = get_scheduler(
67
+ "linear",
68
+ optimizer=optimizer,
69
+ num_warmup_steps=0,
70
+ num_training_steps=num_training_steps,
71
+ )
72
+
73
+ progress_bar = tqdm(range(num_training_steps))
74
 
75
  for epoch in range(num_epoch):
76
  start = 0
77
  end = batch_size
78
+ steps = 0
79
+ cumu_loss = 0
80
  training_data = raw_train
81
  model.train()
82
  while start < len(training_data):
83
  optimizer.zero_grad()
84
+ batch_data = raw_train.select(range(start, min(end, len(raw_train))))
85
  with torch.no_grad():
86
  res = generate(batch_data)
87
  prediction = []
 
104
  labels = torch.LongTensor(labels).to(device)
105
  classifier_out = model(questions=batch_data["question"] , answers=prediction, logits=predicted_logit, labels=labels)
106
  loss = classifier_out.loss
107
+ if not torch.isnan(loss).item():
108
+ cumu_loss += loss.item()
109
+ steps += 1
110
+ accelerator.backward(loss)
111
  optimizer.step()
112
+ lr_scheduler.step()
113
+ progress_bar.update(1)
114
  start += batch_size
115
  end += batch_size
116
+ # every 100 steps
117
+ if steps % 100 == 0:
118
+ print("Cumu loss: {}".format(cumu_loss / 100))
119
+ cumu_loss = 0
120
 
121
  model.push_to_hub("some_fake_bert")