Spaces:

Vishwas1
/

LLMTrainingPro

Sleeping

Vishwas1 commited on Sep 18, 2024

Commit

958029a

verified ·

1 Parent(s): 27f2ab5

Update train_model.py

Files changed (1) hide show

train_model.py CHANGED Viewed

@@ -65,12 +65,14 @@ def load_and_prepare_dataset(task, dataset_name, tokenizer, sequence_length):
     logging.info(f"Loading dataset '{dataset_name}' for task '{task}'...")
     try:
         if task == "generation":
-            dataset = load_dataset(dataset_name, split='train',use_auth_token=True)
             logging.info("Dataset loaded successfully for generation task.")
             def tokenize_function(examples):
                 return tokenizer(examples['text'], truncation=True, max_length=sequence_length)
         elif task == "classification":
-            dataset = load_dataset(dataset_name, split='train')
             logging.info("Dataset loaded successfully for classification task.")
             # Assuming the dataset has 'text' and 'label' columns
             def tokenize_function(examples):

     logging.info(f"Loading dataset '{dataset_name}' for task '{task}'...")
     try:
         if task == "generation":
+            train_dataset = load_dataset(dataset_name,use_auth_token=True)
+            dataset = train_dataset['train'].shuffle(seed=42).select(range(500))
             logging.info("Dataset loaded successfully for generation task.")
             def tokenize_function(examples):
                 return tokenizer(examples['text'], truncation=True, max_length=sequence_length)
         elif task == "classification":
+            train_dataset = load_dataset(dataset_name,use_auth_token=True)
+            dataset = train_dataset['train'].shuffle(seed=42).select(range(500))
             logging.info("Dataset loaded successfully for classification task.")
             # Assuming the dataset has 'text' and 'label' columns
             def tokenize_function(examples):