Twelve2five commited on
Commit
154b3c1
·
verified ·
1 Parent(s): af0160e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -22
app.py CHANGED
@@ -212,7 +212,7 @@ def load_model():
212
  try:
213
  log.append("Loading a compatible tokenizer...")
214
  # Use the tokenizer from Meta's official Llama models - should be compatible with Llama 3.2
215
- tokenizer_id = "meta-llama/Llama-3-8B" # This is a reliable source for a Llama tokenizer
216
 
217
  # Try with specified tokenizer first
218
  try:
@@ -504,38 +504,77 @@ def train_model(
504
  quantization_config=bnb_config,
505
  device_map="auto",
506
  use_cache=False, # Needed for gradient checkpointing
 
507
  torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
508
  )
509
 
510
- # Load the tokenizer
511
- tokenizer = AutoTokenizer.from_pretrained(
512
- local_model_path,
513
- padding_side="right",
514
- use_fast=True,
515
- )
516
- tokenizer.pad_token = tokenizer.eos_token
517
 
518
- # Find model's architecture type
519
- model_type = model.config.model_type
520
- log.append(f"Model architecture type: {model_type}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
 
522
- # PEFT Configuration (Smaller LoRA for faster iteration)
 
 
 
 
523
  model = prepare_model_for_kbit_training(model)
524
  log.append("Model prepared for k-bit training")
525
 
 
 
526
  lora_config = LoraConfig(
527
  task_type=TaskType.CAUSAL_LM,
528
- r=16, # Keeping higher rank for A100
529
- lora_alpha=32,
530
- lora_dropout=0.05,
531
- bias="none",
532
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] # Fewer modules for faster training
533
  )
534
- peft_model = get_peft_model(model, lora_config)
535
- trainable_params = peft_model.print_trainable_parameters()
536
- log.append(f"LoRA applied to model")
537
- model_to_train = peft_model
538
-
 
 
 
 
 
 
539
  except Exception as e:
540
  error_msg = f"Error preparing model for training: {str(e)}"
541
  log.append(error_msg)
 
212
  try:
213
  log.append("Loading a compatible tokenizer...")
214
  # Use the tokenizer from Meta's official Llama models - should be compatible with Llama 3.2
215
+ tokenizer_id = "meta-llama/Llama-3-1B" # This is a reliable source for a Llama tokenizer
216
 
217
  # Try with specified tokenizer first
218
  try:
 
504
  quantization_config=bnb_config,
505
  device_map="auto",
506
  use_cache=False, # Needed for gradient checkpointing
507
+ trust_remote_code=True, # Following reference code
508
  torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
509
  )
510
 
511
+ # --- Load Tokenizer (from a compatible model) ---
512
+ # Following the pattern from reference code
513
+ progress(0.3, desc="Loading tokenizer...")
 
 
 
 
514
 
515
+ # Try to load a compatible tokenizer
516
+ try:
517
+ # First try loading from standard Llama 3 model
518
+ tokenizer = AutoTokenizer.from_pretrained(
519
+ "meta-llama/Llama-3-8B", # Using standard Llama 3 tokenizer
520
+ padding_side="right",
521
+ use_fast=True,
522
+ trust_remote_code=True
523
+ )
524
+ log.append("Loaded tokenizer from meta-llama/Llama-3-8B")
525
+ except Exception as e1:
526
+ log.append(f"Couldn't load Llama-3 tokenizer: {e1}")
527
+ try:
528
+ # Fallback to Llama 2
529
+ tokenizer = AutoTokenizer.from_pretrained(
530
+ "meta-llama/Llama-2-7b-hf",
531
+ padding_side="right",
532
+ use_fast=True
533
+ )
534
+ log.append("Loaded Llama-2 tokenizer as fallback")
535
+ except Exception as e2:
536
+ log.append(f"Couldn't load Llama-2 tokenizer: {e2}")
537
+ # Final fallback
538
+ from transformers import LlamaTokenizer
539
+ tokenizer = LlamaTokenizer.from_pretrained(
540
+ "hf-internal-testing/llama-tokenizer",
541
+ padding_side="right"
542
+ )
543
+ log.append("Loaded testing Llama tokenizer as final fallback")
544
+
545
+ # Set pad token and ensure it's usable
546
+ if tokenizer.pad_token is None:
547
+ tokenizer.pad_token = tokenizer.eos_token
548
 
549
+ log.append(f"Loaded model vocab size: {model.config.vocab_size}")
550
+ log.append(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
551
+
552
+ # --- QLoRA Preparation ---
553
+ progress(0.35, desc="Preparing model for k-bit training...")
554
  model = prepare_model_for_kbit_training(model)
555
  log.append("Model prepared for k-bit training")
556
 
557
+ # Define LoRA configuration
558
+ # Based on your reference code
559
  lora_config = LoraConfig(
560
  task_type=TaskType.CAUSAL_LM,
561
+ r=16, # Rank
562
+ lora_alpha=32, # Alpha parameter
563
+ lora_dropout=0.05, # Dropout probability
564
+ bias="none", # Bias type
565
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
566
  )
567
+
568
+ # Apply LoRA to model
569
+ progress(0.4, desc="Applying LoRA to model...")
570
+ model_to_train = get_peft_model(model, lora_config)
571
+ log.append("LoRA applied to model")
572
+
573
+ # Cleanup to free up memory
574
+ gc.collect()
575
+ if torch.cuda.is_available():
576
+ torch.cuda.empty_cache()
577
+
578
  except Exception as e:
579
  error_msg = f"Error preparing model for training: {str(e)}"
580
  log.append(error_msg)