nroggendorff commited on
Commit
3cd0c96
·
verified ·
1 Parent(s): fcbe4cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -9,7 +9,7 @@ import requests as rq
9
  import gc
10
  from tokenizers import ByteLevelBPETokenizer
11
 
12
- dataset = load_dataset("nroggendorff/openhermes", split="train").select(range(int(1e+5)))
13
 
14
  def get_training_corpus():
15
  for i in range(0, len(dataset), 1000):
@@ -97,8 +97,8 @@ print(dataset['text'][2])
97
 
98
  args = TrainingArguments(
99
  output_dir="mayo",
100
- num_train_epochs=4,
101
- gradient_accumulation_steps=4,
102
  per_device_train_batch_size=32,
103
  learning_rate=1e-5,
104
  save_steps=100000,
 
9
  import gc
10
  from tokenizers import ByteLevelBPETokenizer
11
 
12
+ dataset = load_dataset("nroggendorff/openhermes", split="train").select(range(int(2e+5)))
13
 
14
  def get_training_corpus():
15
  for i in range(0, len(dataset), 1000):
 
97
 
98
  args = TrainingArguments(
99
  output_dir="mayo",
100
+ num_train_epochs=2,
101
+ gradient_accumulation_steps=1,
102
  per_device_train_batch_size=32,
103
  learning_rate=1e-5,
104
  save_steps=100000,