Twelve2five commited on
Commit
0591b3c
·
verified ·
1 Parent(s): e302645

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -114
app.py CHANGED
@@ -207,90 +207,95 @@ def load_model():
207
  log.append(f"Alternative loading also failed: {e2}")
208
  return "\n".join(log)
209
 
210
- # --- Load Tokenizer ---
211
  progress(0.3, desc="Loading tokenizer...")
 
 
 
 
 
 
 
 
 
212
  try:
213
- log.append("Loading a compatible tokenizer...")
214
- # Use the tokenizer from Meta's official Llama models - should be compatible with Llama 3.2
215
- tokenizer_id = "meta-llama/Llama-3-1B" # This is a reliable source for a Llama tokenizer
 
 
 
 
 
 
 
 
 
 
216
 
217
- # Try with specified tokenizer first
218
  try:
219
  tokenizer = AutoTokenizer.from_pretrained(
220
- tokenizer_id,
221
- use_fast=True,
222
  padding_side="right",
223
- trust_remote_code=True
 
224
  )
225
- log.append(f"Successfully loaded tokenizer from {tokenizer_id}")
226
- except Exception as e:
227
- log.append(f"Could not load from {tokenizer_id}: {e}")
228
- # Fallback to Llama-2 tokenizer
 
229
  try:
 
230
  tokenizer = AutoTokenizer.from_pretrained(
231
- "meta-llama/Llama-2-7b-hf",
232
- use_fast=True,
233
- padding_side="right"
234
- )
235
- log.append("Loaded Llama-2 tokenizer as fallback")
236
- except Exception as e2:
237
- # If that fails too, try the most basic option
238
- from transformers import LlamaTokenizer
239
- tokenizer = LlamaTokenizer.from_pretrained(
240
- "hf-internal-testing/llama-tokenizer",
241
- use_fast=False,
242
- padding_side="right"
243
  )
244
- log.append("Loaded basic Llama tokenizer from testing repo")
245
-
246
- # Set pad token if not already set
247
- if tokenizer.pad_token is None:
248
- tokenizer.pad_token = tokenizer.eos_token
249
- log.append("Set pad_token to eos_token")
250
-
251
- # Make sure we have necessary special tokens
252
- if tokenizer.bos_token is None:
253
- tokenizer.bos_token = "<s>"
254
- log.append("Set bos_token to <s>")
255
-
256
- if tokenizer.eos_token is None:
257
- tokenizer.eos_token = "</s>"
258
- log.append("Set eos_token to </s>")
259
-
260
- log.append(f"Loaded model vocab size: {len(tokenizer)}")
261
-
262
- except Exception as e:
263
- error_msg = f"All attempts to load a tokenizer failed: {e}"
264
- log.append(error_msg)
265
- return "\n".join(log)
266
-
267
- # Print information about input embeddings
268
- print(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
269
 
270
  # Prepare model for k-bit training
271
  model = prepare_model_for_kbit_training(model)
272
 
273
- # Define LoRA configuration
274
  lora_config = LoraConfig(
275
- r=16,
276
- lora_alpha=32,
277
- target_modules=[
278
- "q_proj",
279
- "k_proj",
280
- "v_proj",
281
- "o_proj",
282
- "gate_proj",
283
- "up_proj",
284
- "down_proj",
285
- ],
286
  lora_dropout=0.05,
287
  bias="none",
288
- task_type=TaskType.CAUSAL_LM
289
  )
290
 
291
  # Apply LoRA to model
292
- model = get_peft_model(model, lora_config)
293
- model.print_trainable_parameters()
 
 
 
294
 
295
  return model, tokenizer # Return both model and tokenizer
296
 
@@ -373,9 +378,10 @@ def train_model(
373
  model_repo_name,
374
  dataset_repo_name,
375
  epochs=1,
376
- batch_size=4,
377
- grad_accum_steps=2,
378
  learning_rate=2e-4,
 
379
  progress=gr.Progress()
380
  ):
381
  progress(0, desc="Setting up environment...")
@@ -522,58 +528,66 @@ def train_model(
522
  torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
523
  )
524
 
525
- # --- Load Tokenizer (using public models) ---
526
  progress(0.3, desc="Loading tokenizer...")
527
 
 
 
 
 
 
 
 
528
  # Try to load a compatible tokenizer
529
  try:
530
- # First try TinyLlama which is open and uses Llama tokenizer
 
 
 
531
  tokenizer = AutoTokenizer.from_pretrained(
532
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0", # Public model with Llama tokenizer
533
  padding_side="right",
534
  use_fast=True,
 
535
  )
536
- log.append("Loaded tokenizer from TinyLlama/TinyLlama-1.1B-Chat-v1.0")
537
  except Exception as e1:
538
- log.append(f"Couldn't load TinyLlama tokenizer: {e1}")
 
 
539
  try:
540
- # Try Phi-2 which is also public
541
  tokenizer = AutoTokenizer.from_pretrained(
542
- "microsoft/phi-2",
543
  padding_side="right",
544
- use_fast=True
 
545
  )
546
- log.append("Loaded Phi-2 tokenizer as fallback")
547
  except Exception as e2:
548
- log.append(f"Couldn't load Phi-2 tokenizer: {e2}")
 
 
549
  try:
550
- # Try CodeLlama which is popular and public
551
  tokenizer = AutoTokenizer.from_pretrained(
552
- "codellama/CodeLlama-7b-hf",
553
- padding_side="right"
 
554
  )
555
- log.append("Loaded CodeLlama tokenizer as fallback")
556
  except Exception as e3:
557
- log.append(f"Couldn't load any standard tokenizers. Using a basic tokenizer instead.")
558
-
559
- # Create a minimal tokenizer that works with our format
560
- # Assuming the vocab size is 2048 (from the RVQ token count)
561
- from transformers import PreTrainedTokenizerFast
562
-
563
- # Create a very basic tokenizer
564
- tokenizer = PreTrainedTokenizerFast(
565
- tokenizer_file=None, # No file needed
566
- bos_token="<s>",
567
- eos_token="</s>",
568
- unk_token="<unk>",
569
- pad_token="<pad>",
570
- model_max_length=2048 # Safe default value
571
- )
572
-
573
- # Add vocabulary - creating a minimal vocab for the RVQ tokens
574
- vocab = {f"<token_{i}>": i for i in range(model.config.vocab_size)}
575
- tokenizer.add_tokens(list(vocab.keys()))
576
- log.append(f"Created basic tokenizer with {len(tokenizer)} tokens")
577
 
578
  # Set pad token if not already set
579
  if tokenizer.pad_token is None:
@@ -589,14 +603,13 @@ def train_model(
589
  model = prepare_model_for_kbit_training(model)
590
  log.append("Model prepared for k-bit training")
591
 
592
- # Define LoRA configuration
593
- # Based on your reference code
594
  lora_config = LoraConfig(
595
  task_type=TaskType.CAUSAL_LM,
596
- r=16, # Rank
597
- lora_alpha=32, # Alpha parameter
598
- lora_dropout=0.05, # Dropout probability
599
- bias="none", # Bias type
600
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
601
  )
602
 
@@ -604,6 +617,8 @@ def train_model(
604
  progress(0.4, desc="Applying LoRA to model...")
605
  model_to_train = get_peft_model(model, lora_config)
606
  log.append("LoRA applied to model")
 
 
607
 
608
  # Cleanup to free up memory
609
  gc.collect()
@@ -709,17 +724,17 @@ def train_model(
709
  output_dir = f"./results_{model_repo_name}"
710
  os.makedirs(output_dir, exist_ok=True)
711
 
712
- # Optimize settings for A100
713
  training_args = TrainingArguments(
714
  output_dir=output_dir,
715
  num_train_epochs=float(epochs),
716
- per_device_train_batch_size=batch_size,
717
- gradient_accumulation_steps=grad_accum_steps,
718
  learning_rate=learning_rate,
719
  weight_decay=0.01,
720
  logging_dir=f"{output_dir}/logs",
721
  logging_steps=10,
722
- save_steps=100,
723
  save_total_limit=3,
724
  remove_unused_columns=False,
725
  push_to_hub=False,
@@ -727,7 +742,8 @@ def train_model(
727
  warmup_ratio=0.03,
728
  lr_scheduler_type="cosine",
729
  report_to="tensorboard",
730
- bf16=True if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else False,
 
731
  gradient_checkpointing=True, # Still useful for efficiency
732
  gradient_checkpointing_kwargs={'use_reentrant': False},
733
  ddp_find_unused_parameters=False,
@@ -791,11 +807,16 @@ def create_interface():
791
  hf_username = gr.Textbox(label="HuggingFace Username", value="Twelve2five")
792
  model_repo = gr.Textbox(label="Model Repository Name", value="llama-3.2-1b-rvq")
793
  dataset_repo = gr.Textbox(label="Dataset Repository Name", value="podcast-dialogue-rvq-pairs-3items")
 
 
 
 
 
794
 
795
  with gr.Column():
796
  epochs = gr.Number(label="Number of Epochs", value=3, minimum=1, maximum=10)
797
- batch_size = gr.Number(label="Batch Size per Device", value=4, minimum=1, maximum=16)
798
- grad_accum = gr.Number(label="Gradient Accumulation Steps", value=2, minimum=1, maximum=16)
799
  lr = gr.Number(label="Learning Rate", value=2e-4)
800
 
801
  start_btn = gr.Button("Start Training")
@@ -803,7 +824,7 @@ def create_interface():
803
 
804
  start_btn.click(
805
  fn=train_model,
806
- inputs=[hf_username, model_repo, dataset_repo, epochs, batch_size, grad_accum, lr],
807
  outputs=output
808
  )
809
 
 
207
  log.append(f"Alternative loading also failed: {e2}")
208
  return "\n".join(log)
209
 
210
+ # --- Load Tokenizer (prioritizing Llama 3.2 1B) ---
211
  progress(0.3, desc="Loading tokenizer...")
212
+
213
+ # Set up token for authentication
214
+ token_param = {"token": hf_token} if hf_token and hf_token.strip() else {}
215
+ if token_param:
216
+ log.append("Using provided Hugging Face token for authentication")
217
+ else:
218
+ log.append("No token provided, using Space's default authentication")
219
+
220
+ # Try to load a compatible tokenizer
221
  try:
222
+ # First try the actual Llama 3.2 1B tokenizer
223
+ tokenizer_repo = "meta-llama/Llama-3.2-1B" # The official 1B model
224
+ log.append(f"Attempting to load tokenizer from {tokenizer_repo}...")
225
+
226
+ tokenizer = AutoTokenizer.from_pretrained(
227
+ tokenizer_repo,
228
+ padding_side="right",
229
+ use_fast=True,
230
+ **token_param # Pass token if provided
231
+ )
232
+ log.append(f"Successfully loaded tokenizer from {tokenizer_repo}")
233
+ except Exception as e1:
234
+ log.append(f"Couldn't load {tokenizer_repo} tokenizer: {e1}")
235
 
236
+ # Try the model repo directly (in case it has a tokenizer)
237
  try:
238
  tokenizer = AutoTokenizer.from_pretrained(
239
+ hf_model_repo_id, # The RVQ model repo
 
240
  padding_side="right",
241
+ use_fast=True,
242
+ **token_param # Pass token if provided
243
  )
244
+ log.append(f"Loaded tokenizer from the model repo: {hf_model_repo_id}")
245
+ except Exception as e2:
246
+ log.append(f"Couldn't load model repo tokenizer: {e2}")
247
+
248
+ # Continue with our fallbacks (public models don't need token)
249
  try:
250
+ # Try TinyLlama (public)
251
  tokenizer = AutoTokenizer.from_pretrained(
252
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
253
+ padding_side="right",
254
+ use_fast=True
 
 
 
 
 
 
 
 
 
255
  )
256
+ log.append("Loaded TinyLlama tokenizer as fallback")
257
+ except Exception as e3:
258
+ log.append(f"Couldn't load TinyLlama tokenizer: {e3}")
259
+ # Last resort - other public models
260
+ try:
261
+ tokenizer = AutoTokenizer.from_pretrained(
262
+ "microsoft/phi-2", # Public model
263
+ padding_side="right"
264
+ )
265
+ log.append("Loaded Phi-2 tokenizer as last resort")
266
+ except Exception as e4:
267
+ error_msg = f"Failed to load any compatible tokenizer after multiple attempts: {e4}"
268
+ log.append(error_msg)
269
+ return "\n".join(log)
270
+
271
+ # Set pad token if not already set
272
+ if tokenizer.pad_token is None:
273
+ tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token is not None else "<pad>"
274
+ log.append("Set pad_token to eos_token or <pad>")
275
+
276
+ log.append(f"Tokenizer loaded with vocab size: {len(tokenizer)}")
277
+ log.append(f"Model vocab size: {model.config.vocab_size}")
278
+ log.append(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
 
 
279
 
280
  # Prepare model for k-bit training
281
  model = prepare_model_for_kbit_training(model)
282
 
283
+ # Define LoRA configuration - adjusted for 1B model
284
  lora_config = LoraConfig(
285
+ task_type=TaskType.CAUSAL_LM,
286
+ r=8, # Smaller rank for 1B model (vs 16 for larger models)
287
+ lora_alpha=16, # Adjusted alpha (vs 32 for larger models)
 
 
 
 
 
 
 
 
288
  lora_dropout=0.05,
289
  bias="none",
290
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
291
  )
292
 
293
  # Apply LoRA to model
294
+ progress(0.4, desc="Applying LoRA to model...")
295
+ model_to_train = get_peft_model(model, lora_config)
296
+ log.append("LoRA applied to model")
297
+ log.append(f"LoRA rank: 8, alpha: 16 (optimized for 1B model)")
298
+ model_to_train.print_trainable_parameters()
299
 
300
  return model, tokenizer # Return both model and tokenizer
301
 
 
378
  model_repo_name,
379
  dataset_repo_name,
380
  epochs=1,
381
+ batch_size=8,
382
+ grad_accum_steps=1,
383
  learning_rate=2e-4,
384
+ hf_token=None, # New parameter for token
385
  progress=gr.Progress()
386
  ):
387
  progress(0, desc="Setting up environment...")
 
528
  torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
529
  )
530
 
531
+ # --- Load Tokenizer (prioritizing Llama 3.2 1B) ---
532
  progress(0.3, desc="Loading tokenizer...")
533
 
534
+ # Set up token for authentication
535
+ token_param = {"token": hf_token} if hf_token and hf_token.strip() else {}
536
+ if token_param:
537
+ log.append("Using provided Hugging Face token for authentication")
538
+ else:
539
+ log.append("No token provided, using Space's default authentication")
540
+
541
  # Try to load a compatible tokenizer
542
  try:
543
+ # First try the actual Llama 3.2 1B tokenizer
544
+ tokenizer_repo = "meta-llama/Llama-3.2-1B" # The official 1B model
545
+ log.append(f"Attempting to load tokenizer from {tokenizer_repo}...")
546
+
547
  tokenizer = AutoTokenizer.from_pretrained(
548
+ tokenizer_repo,
549
  padding_side="right",
550
  use_fast=True,
551
+ **token_param # Pass token if provided
552
  )
553
+ log.append(f"Successfully loaded tokenizer from {tokenizer_repo}")
554
  except Exception as e1:
555
+ log.append(f"Couldn't load {tokenizer_repo} tokenizer: {e1}")
556
+
557
+ # Try the model repo directly (in case it has a tokenizer)
558
  try:
 
559
  tokenizer = AutoTokenizer.from_pretrained(
560
+ hf_model_repo_id, # The RVQ model repo
561
  padding_side="right",
562
+ use_fast=True,
563
+ **token_param # Pass token if provided
564
  )
565
+ log.append(f"Loaded tokenizer from the model repo: {hf_model_repo_id}")
566
  except Exception as e2:
567
+ log.append(f"Couldn't load model repo tokenizer: {e2}")
568
+
569
+ # Continue with our fallbacks (public models don't need token)
570
  try:
571
+ # Try TinyLlama (public)
572
  tokenizer = AutoTokenizer.from_pretrained(
573
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
574
+ padding_side="right",
575
+ use_fast=True
576
  )
577
+ log.append("Loaded TinyLlama tokenizer as fallback")
578
  except Exception as e3:
579
+ log.append(f"Couldn't load TinyLlama tokenizer: {e3}")
580
+ # Last resort - other public models
581
+ try:
582
+ tokenizer = AutoTokenizer.from_pretrained(
583
+ "microsoft/phi-2", # Public model
584
+ padding_side="right"
585
+ )
586
+ log.append("Loaded Phi-2 tokenizer as last resort")
587
+ except Exception as e4:
588
+ error_msg = f"Failed to load any compatible tokenizer after multiple attempts: {e4}"
589
+ log.append(error_msg)
590
+ return "\n".join(log)
 
 
 
 
 
 
 
 
591
 
592
  # Set pad token if not already set
593
  if tokenizer.pad_token is None:
 
603
  model = prepare_model_for_kbit_training(model)
604
  log.append("Model prepared for k-bit training")
605
 
606
+ # Define LoRA configuration - adjusted for 1B model
 
607
  lora_config = LoraConfig(
608
  task_type=TaskType.CAUSAL_LM,
609
+ r=8, # Smaller rank for 1B model (vs 16 for larger models)
610
+ lora_alpha=16, # Adjusted alpha (vs 32 for larger models)
611
+ lora_dropout=0.05,
612
+ bias="none",
613
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
614
  )
615
 
 
617
  progress(0.4, desc="Applying LoRA to model...")
618
  model_to_train = get_peft_model(model, lora_config)
619
  log.append("LoRA applied to model")
620
+ log.append(f"LoRA rank: 8, alpha: 16 (optimized for 1B model)")
621
+ model_to_train.print_trainable_parameters()
622
 
623
  # Cleanup to free up memory
624
  gc.collect()
 
724
  output_dir = f"./results_{model_repo_name}"
725
  os.makedirs(output_dir, exist_ok=True)
726
 
727
+ # For 1B model on A100, we can increase batch size and reduce gradient accumulation
728
  training_args = TrainingArguments(
729
  output_dir=output_dir,
730
  num_train_epochs=float(epochs),
731
+ per_device_train_batch_size=8, # Larger batch size for 1B model
732
+ gradient_accumulation_steps=1, # Reduced for 1B model
733
  learning_rate=learning_rate,
734
  weight_decay=0.01,
735
  logging_dir=f"{output_dir}/logs",
736
  logging_steps=10,
737
+ save_steps=50,
738
  save_total_limit=3,
739
  remove_unused_columns=False,
740
  push_to_hub=False,
 
742
  warmup_ratio=0.03,
743
  lr_scheduler_type="cosine",
744
  report_to="tensorboard",
745
+ bf16=True if torch.cuda.is_bf16_supported() else False,
746
+ fp16=False, # Using BF16 instead
747
  gradient_checkpointing=True, # Still useful for efficiency
748
  gradient_checkpointing_kwargs={'use_reentrant': False},
749
  ddp_find_unused_parameters=False,
 
807
  hf_username = gr.Textbox(label="HuggingFace Username", value="Twelve2five")
808
  model_repo = gr.Textbox(label="Model Repository Name", value="llama-3.2-1b-rvq")
809
  dataset_repo = gr.Textbox(label="Dataset Repository Name", value="podcast-dialogue-rvq-pairs-3items")
810
+ hf_token = gr.Textbox(
811
+ label="Hugging Face Token (Optional)",
812
+ placeholder="Enter your HF token to access gated models",
813
+ type="password"
814
+ )
815
 
816
  with gr.Column():
817
  epochs = gr.Number(label="Number of Epochs", value=3, minimum=1, maximum=10)
818
+ batch_size = gr.Number(label="Batch Size per Device", value=8, minimum=1, maximum=16)
819
+ grad_accum = gr.Number(label="Gradient Accumulation Steps", value=1, minimum=1, maximum=16)
820
  lr = gr.Number(label="Learning Rate", value=2e-4)
821
 
822
  start_btn = gr.Button("Start Training")
 
824
 
825
  start_btn.click(
826
  fn=train_model,
827
+ inputs=[hf_username, model_repo, dataset_repo, epochs, batch_size, grad_accum, lr, hf_token],
828
  outputs=output
829
  )
830