Spaces:
Paused
Paused
Update train.py
Browse files
train.py
CHANGED
|
@@ -10,8 +10,8 @@ from tokenizers import ByteLevelBPETokenizer
|
|
| 10 |
MAX_SEQ_LENGTH = 128
|
| 11 |
BATCH_SIZE = 16
|
| 12 |
EPOCHS = 1
|
| 13 |
-
LEARNING_RATE = 1e-
|
| 14 |
-
FACTOR =
|
| 15 |
VOCAB_SIZE = 3200
|
| 16 |
INPUT_DATASET = "nroggendorff/elephant"
|
| 17 |
OUTPUT_REPO = "smallama"
|
|
@@ -54,9 +54,9 @@ def create_model(tokenizer):
|
|
| 54 |
config = LlamaConfig(
|
| 55 |
vocab_size=tokenizer.vocab_size,
|
| 56 |
hidden_size=FACTOR,
|
| 57 |
-
intermediate_size=FACTOR *
|
| 58 |
-
num_hidden_layers=
|
| 59 |
-
num_attention_heads=
|
| 60 |
max_position_embeddings=MAX_SEQ_LENGTH,
|
| 61 |
rms_norm_eps=1e-6,
|
| 62 |
initializer_range=0.02,
|
|
|
|
| 10 |
MAX_SEQ_LENGTH = 128
|
| 11 |
BATCH_SIZE = 16
|
| 12 |
EPOCHS = 1
|
| 13 |
+
LEARNING_RATE = 1e-5
|
| 14 |
+
FACTOR = 600
|
| 15 |
VOCAB_SIZE = 3200
|
| 16 |
INPUT_DATASET = "nroggendorff/elephant"
|
| 17 |
OUTPUT_REPO = "smallama"
|
|
|
|
| 54 |
config = LlamaConfig(
|
| 55 |
vocab_size=tokenizer.vocab_size,
|
| 56 |
hidden_size=FACTOR,
|
| 57 |
+
intermediate_size=FACTOR * 4,
|
| 58 |
+
num_hidden_layers=FACTOR // 32,
|
| 59 |
+
num_attention_heads=FACTOR // 64,
|
| 60 |
max_position_embeddings=MAX_SEQ_LENGTH,
|
| 61 |
rms_norm_eps=1e-6,
|
| 62 |
initializer_range=0.02,
|