Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,10 +14,11 @@ LEARNING_RATE = 1e-4
|
|
14 |
FP16 = True
|
15 |
FACTOR = 8
|
16 |
VOCAB_SIZE = 3200
|
17 |
-
|
|
|
18 |
|
19 |
def load_data():
|
20 |
-
dataset = load_dataset(
|
21 |
return dataset
|
22 |
|
23 |
def create_tokenizer(training_corpus):
|
@@ -92,7 +93,7 @@ def configure_tokenizer(tokenizer):
|
|
92 |
|
93 |
def train_model(model, tokenizer, dataset):
|
94 |
args = TrainingArguments(
|
95 |
-
output_dir="
|
96 |
num_train_epochs=EPOCHS,
|
97 |
per_device_train_batch_size=BATCH_SIZE,
|
98 |
learning_rate=LEARNING_RATE,
|
@@ -113,7 +114,7 @@ def train_model(model, tokenizer, dataset):
|
|
113 |
trained_model = trainer.model
|
114 |
trained_tokenizer = trainer.tokenizer
|
115 |
|
116 |
-
repo_id =
|
117 |
trained_model.push_to_hub(repo_id)
|
118 |
trained_tokenizer.push_to_hub(repo_id)
|
119 |
|
|
|
14 |
FP16 = True
|
15 |
FACTOR = 8
|
16 |
VOCAB_SIZE = 3200
|
17 |
+
INPUT_DATASET = "nroggendorff/elephant"
|
18 |
+
OUTPUT_REPO = "smallama"
|
19 |
|
20 |
def load_data():
|
21 |
+
dataset = load_dataset(INPUT_DATASET, split="train")
|
22 |
return dataset
|
23 |
|
24 |
def create_tokenizer(training_corpus):
|
|
|
93 |
|
94 |
def train_model(model, tokenizer, dataset):
|
95 |
args = TrainingArguments(
|
96 |
+
output_dir="model",
|
97 |
num_train_epochs=EPOCHS,
|
98 |
per_device_train_batch_size=BATCH_SIZE,
|
99 |
learning_rate=LEARNING_RATE,
|
|
|
114 |
trained_model = trainer.model
|
115 |
trained_tokenizer = trainer.tokenizer
|
116 |
|
117 |
+
repo_id = OUTPUT_REPO
|
118 |
trained_model.push_to_hub(repo_id)
|
119 |
trained_tokenizer.push_to_hub(repo_id)
|
120 |
|