Kevin Fink
commited on
Commit
·
da5b30a
1
Parent(s):
30deac6
dev
Browse files
app.py
CHANGED
@@ -146,7 +146,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
146 |
del dataset['train']
|
147 |
del dataset['validation']
|
148 |
test_set = tokenize_function(dataset['test'])
|
149 |
-
|
|
|
150 |
return 'TRAINING DONE'
|
151 |
|
152 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
@@ -161,7 +162,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
161 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
162 |
third_third = dataset['train'].select(range(third_size*2, train_size))
|
163 |
dataset['train'] = third_third
|
164 |
-
train_set_3 = tokenize_function(dataset['train'])
|
165 |
dataset['train'] = concatenate_datasets([saved_dataset, train_set_3])
|
166 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
167 |
return 'THIRD THIRD LOADED'
|
@@ -180,9 +181,10 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
180 |
del dataset['test']
|
181 |
train_set_2 = tokenize_function(dataset['train'])
|
182 |
validation_set = tokenize_function(dataset['validation'])
|
|
|
183 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], train_set_2])
|
184 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
185 |
-
|
186 |
|
187 |
return 'SECOND THIRD LOADED'
|
188 |
|
@@ -199,7 +201,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
199 |
del dataset['test']
|
200 |
del dataset['validation']
|
201 |
train_set = tokenize_function(dataset['train'])
|
202 |
-
|
|
|
203 |
print('DONE')
|
204 |
return 'RUN AGAIN TO LOAD REST OF DATA'
|
205 |
|
|
|
146 |
del dataset['train']
|
147 |
del dataset['validation']
|
148 |
test_set = tokenize_function(dataset['test'])
|
149 |
+
dataset['test'] =test_set
|
150 |
+
dataset['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
|
151 |
return 'TRAINING DONE'
|
152 |
|
153 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
|
|
162 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
163 |
third_third = dataset['train'].select(range(third_size*2, train_size))
|
164 |
dataset['train'] = third_third
|
165 |
+
train_set_3 = tokenize_function(dataset['train'])
|
166 |
dataset['train'] = concatenate_datasets([saved_dataset, train_set_3])
|
167 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
168 |
return 'THIRD THIRD LOADED'
|
|
|
181 |
del dataset['test']
|
182 |
train_set_2 = tokenize_function(dataset['train'])
|
183 |
validation_set = tokenize_function(dataset['validation'])
|
184 |
+
dataset['validation'] = validation_set
|
185 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], train_set_2])
|
186 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
187 |
+
dataset['validation'].save_to_disk(f'/data/{hub_id.strip()}_validation_dataset')
|
188 |
|
189 |
return 'SECOND THIRD LOADED'
|
190 |
|
|
|
201 |
del dataset['test']
|
202 |
del dataset['validation']
|
203 |
train_set = tokenize_function(dataset['train'])
|
204 |
+
dataset['train'] = train_set
|
205 |
+
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
206 |
print('DONE')
|
207 |
return 'RUN AGAIN TO LOAD REST OF DATA'
|
208 |
|