Spaces:
Starting
on
L40S
Starting
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,7 @@ def create_tokenizer(training_corpus):
|
|
30 |
)
|
31 |
return tokenizer
|
32 |
|
33 |
-
def get_training_corpus():
|
34 |
for i in range(0, len(dataset), 1000):
|
35 |
yield dataset[i : i + 1000]["text"]
|
36 |
|
@@ -118,7 +118,7 @@ def train_model(model, tokenizer, dataset):
|
|
118 |
|
119 |
def main():
|
120 |
dataset = load_data()
|
121 |
-
training_corpus = get_training_corpus()
|
122 |
tokenizer = create_tokenizer(training_corpus)
|
123 |
configure_tokenizer(tokenizer)
|
124 |
model = create_model(tokenizer, FACTOR)
|
|
|
30 |
)
|
31 |
return tokenizer
|
32 |
|
33 |
+
def get_training_corpus(dataset):
|
34 |
for i in range(0, len(dataset), 1000):
|
35 |
yield dataset[i : i + 1000]["text"]
|
36 |
|
|
|
118 |
|
119 |
def main():
|
120 |
dataset = load_data()
|
121 |
+
training_corpus = get_training_corpus(dataset)
|
122 |
tokenizer = create_tokenizer(training_corpus)
|
123 |
configure_tokenizer(tokenizer)
|
124 |
model = create_model(tokenizer, FACTOR)
|