nroggendorff commited on
Commit
5ed2bdf
·
verified ·
1 Parent(s): 4f9862c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -14,10 +14,11 @@ LEARNING_RATE = 1e-4
14
  FP16 = True
15
  FACTOR = 8
16
  VOCAB_SIZE = 3200
17
- DATASET = "nroggendorff/elephant"
 
18
 
19
  def load_data():
20
- dataset = load_dataset("nroggendorff/elephant", split="train")
21
  return dataset
22
 
23
  def create_tokenizer(training_corpus):
@@ -92,7 +93,7 @@ def configure_tokenizer(tokenizer):
92
 
93
  def train_model(model, tokenizer, dataset):
94
  args = TrainingArguments(
95
- output_dir="mayo",
96
  num_train_epochs=EPOCHS,
97
  per_device_train_batch_size=BATCH_SIZE,
98
  learning_rate=LEARNING_RATE,
@@ -113,7 +114,7 @@ def train_model(model, tokenizer, dataset):
113
  trained_model = trainer.model
114
  trained_tokenizer = trainer.tokenizer
115
 
116
- repo_id = "makeshift-mayo"
117
  trained_model.push_to_hub(repo_id)
118
  trained_tokenizer.push_to_hub(repo_id)
119
 
 
14
  FP16 = True
15
  FACTOR = 8
16
  VOCAB_SIZE = 3200
17
+ INPUT_DATASET = "nroggendorff/elephant"
18
+ OUTPUT_REPO = "smallama"
19
 
20
  def load_data():
21
+ dataset = load_dataset(INPUT_DATASET, split="train")
22
  return dataset
23
 
24
  def create_tokenizer(training_corpus):
 
93
 
94
  def train_model(model, tokenizer, dataset):
95
  args = TrainingArguments(
96
+ output_dir="model",
97
  num_train_epochs=EPOCHS,
98
  per_device_train_batch_size=BATCH_SIZE,
99
  learning_rate=LEARNING_RATE,
 
114
  trained_model = trainer.model
115
  trained_tokenizer = trainer.tokenizer
116
 
117
+ repo_id = OUTPUT_REPO
118
  trained_model.push_to_hub(repo_id)
119
  trained_tokenizer.push_to_hub(repo_id)
120