dad1909 commited on
Commit
3b49ded
·
verified ·
1 Parent(s): 39f56bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -17,15 +17,15 @@ current_num = os.getenv("NUM")
17
  print(f"stage ${current_num}")
18
 
19
  api = HfApi(token=hf_token)
20
- models = f"dad1909/cybersentinal-2.0-{current_num}"
21
 
22
- # model_base = "dad1909/cybersentinal-2.0"
23
 
24
  print("Starting model and tokenizer loading...")
25
 
26
  # Load the model and tokenizer
27
  model, tokenizer = FastLanguageModel.from_pretrained(
28
- model_name=models,
29
  max_seq_length=max_seq_length,
30
  dtype=dtype,
31
  load_in_4bit=load_in_4bit,
@@ -119,14 +119,13 @@ trainer = SFTTrainer(
119
  dataset_num_proc=2,
120
  packing=False,
121
  args=TrainingArguments(
122
- per_device_train_batch_size=5,
123
- gradient_accumulation_steps=5,
124
  learning_rate=2e-4,
125
  fp16=not is_bfloat16_supported(),
126
  bf16=is_bfloat16_supported(),
127
  warmup_steps=5,
128
  logging_steps=10,
129
- max_steps=200,
130
  optim="adamw_8bit",
131
  weight_decay=0.01,
132
  lr_scheduler_type="linear",
@@ -143,7 +142,7 @@ print("Training completed.")
143
  num = int(current_num)
144
  num += 1
145
 
146
- uploads_models = f"cybersentinal-2.0-{str(num)}"
147
 
148
  print("Saving the trained model...")
149
  model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
 
17
  print(f"stage ${current_num}")
18
 
19
  api = HfApi(token=hf_token)
20
+ # models = f"dad1909/cybersentinal-2.0-{current_num}"
21
 
22
+ model_base = "unsloth/llama-3-8b-Instruct-bnb-4bit"
23
 
24
  print("Starting model and tokenizer loading...")
25
 
26
  # Load the model and tokenizer
27
  model, tokenizer = FastLanguageModel.from_pretrained(
28
+ model_name=model_base,
29
  max_seq_length=max_seq_length,
30
  dtype=dtype,
31
  load_in_4bit=load_in_4bit,
 
119
  dataset_num_proc=2,
120
  packing=False,
121
  args=TrainingArguments(
122
+ per_device_train_batch_size=20,
123
+ gradient_accumulation_steps=20,
124
  learning_rate=2e-4,
125
  fp16=not is_bfloat16_supported(),
126
  bf16=is_bfloat16_supported(),
127
  warmup_steps=5,
128
  logging_steps=10,
 
129
  optim="adamw_8bit",
130
  weight_decay=0.01,
131
  lr_scheduler_type="linear",
 
142
  num = int(current_num)
143
  num += 1
144
 
145
+ uploads_models = f"cybersentinal-3.0"
146
 
147
  print("Saving the trained model...")
148
  model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")