Kevin Fink
commited on
Commit
·
30deac6
1
Parent(s):
82bf175
dev
Browse files
app.py
CHANGED
@@ -145,8 +145,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
145 |
dataset['test'] = dataset['test'].select(range(50))
|
146 |
del dataset['train']
|
147 |
del dataset['validation']
|
148 |
-
test_set = dataset
|
149 |
-
test_set
|
150 |
return 'TRAINING DONE'
|
151 |
|
152 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
@@ -161,9 +161,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
161 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
162 |
third_third = dataset['train'].select(range(third_size*2, train_size))
|
163 |
dataset['train'] = third_third
|
164 |
-
|
165 |
-
|
166 |
-
dataset['train'] = concatenate_datasets([saved_dataset, tokenized_second_half['train']])
|
167 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
168 |
return 'THIRD THIRD LOADED'
|
169 |
|
@@ -179,10 +178,12 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
179 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
180 |
dataset['train'] = second_third
|
181 |
del dataset['test']
|
182 |
-
|
183 |
-
|
|
|
184 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
185 |
-
|
|
|
186 |
return 'SECOND THIRD LOADED'
|
187 |
|
188 |
except Exception as e:
|
@@ -197,9 +198,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
197 |
dataset['train'] = first_third
|
198 |
del dataset['test']
|
199 |
del dataset['validation']
|
200 |
-
|
201 |
-
|
202 |
-
tokenized_first_third.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
203 |
print('DONE')
|
204 |
return 'RUN AGAIN TO LOAD REST OF DATA'
|
205 |
|
|
|
145 |
dataset['test'] = dataset['test'].select(range(50))
|
146 |
del dataset['train']
|
147 |
del dataset['validation']
|
148 |
+
test_set = tokenize_function(dataset['test'])
|
149 |
+
test_set.save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
|
150 |
return 'TRAINING DONE'
|
151 |
|
152 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
|
|
161 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
162 |
third_third = dataset['train'].select(range(third_size*2, train_size))
|
163 |
dataset['train'] = third_third
|
164 |
+
train_set_3 = tokenize_function(dataset['train'])
|
165 |
+
dataset['train'] = concatenate_datasets([saved_dataset, train_set_3])
|
|
|
166 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
167 |
return 'THIRD THIRD LOADED'
|
168 |
|
|
|
178 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
179 |
dataset['train'] = second_third
|
180 |
del dataset['test']
|
181 |
+
train_set_2 = tokenize_function(dataset['train'])
|
182 |
+
validation_set = tokenize_function(dataset['validation'])
|
183 |
+
dataset['train'] = concatenate_datasets([saved_dataset['train'], train_set_2])
|
184 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
185 |
+
validation_set.save_to_disk(f'/data/{hub_id.strip()}_validation_dataset')
|
186 |
+
|
187 |
return 'SECOND THIRD LOADED'
|
188 |
|
189 |
except Exception as e:
|
|
|
198 |
dataset['train'] = first_third
|
199 |
del dataset['test']
|
200 |
del dataset['validation']
|
201 |
+
train_set = tokenize_function(dataset['train'])
|
202 |
+
train_set.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
|
|
203 |
print('DONE')
|
204 |
return 'RUN AGAIN TO LOAD REST OF DATA'
|
205 |
|