Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -212,7 +212,7 @@ def load_model():
|
|
212 |
try:
|
213 |
log.append("Loading a compatible tokenizer...")
|
214 |
# Use the tokenizer from Meta's official Llama models - should be compatible with Llama 3.2
|
215 |
-
tokenizer_id = "meta-llama/Llama-3-
|
216 |
|
217 |
# Try with specified tokenizer first
|
218 |
try:
|
@@ -504,38 +504,77 @@ def train_model(
|
|
504 |
quantization_config=bnb_config,
|
505 |
device_map="auto",
|
506 |
use_cache=False, # Needed for gradient checkpointing
|
|
|
507 |
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
|
508 |
)
|
509 |
|
510 |
-
# Load
|
511 |
-
|
512 |
-
|
513 |
-
padding_side="right",
|
514 |
-
use_fast=True,
|
515 |
-
)
|
516 |
-
tokenizer.pad_token = tokenizer.eos_token
|
517 |
|
518 |
-
#
|
519 |
-
|
520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
521 |
|
522 |
-
|
|
|
|
|
|
|
|
|
523 |
model = prepare_model_for_kbit_training(model)
|
524 |
log.append("Model prepared for k-bit training")
|
525 |
|
|
|
|
|
526 |
lora_config = LoraConfig(
|
527 |
task_type=TaskType.CAUSAL_LM,
|
528 |
-
r=16, #
|
529 |
-
lora_alpha=32,
|
530 |
-
lora_dropout=0.05,
|
531 |
-
bias="none",
|
532 |
-
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"
|
533 |
)
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
model_to_train =
|
538 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
except Exception as e:
|
540 |
error_msg = f"Error preparing model for training: {str(e)}"
|
541 |
log.append(error_msg)
|
|
|
212 |
try:
|
213 |
log.append("Loading a compatible tokenizer...")
|
214 |
# Use the tokenizer from Meta's official Llama models - should be compatible with Llama 3.2
|
215 |
+
tokenizer_id = "meta-llama/Llama-3-1B" # This is a reliable source for a Llama tokenizer
|
216 |
|
217 |
# Try with specified tokenizer first
|
218 |
try:
|
|
|
504 |
quantization_config=bnb_config,
|
505 |
device_map="auto",
|
506 |
use_cache=False, # Needed for gradient checkpointing
|
507 |
+
trust_remote_code=True, # Following reference code
|
508 |
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
|
509 |
)
|
510 |
|
511 |
+
# --- Load Tokenizer (from a compatible model) ---
|
512 |
+
# Following the pattern from reference code
|
513 |
+
progress(0.3, desc="Loading tokenizer...")
|
|
|
|
|
|
|
|
|
514 |
|
515 |
+
# Try to load a compatible tokenizer
|
516 |
+
try:
|
517 |
+
# First try loading from standard Llama 3 model
|
518 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
519 |
+
"meta-llama/Llama-3-8B", # Using standard Llama 3 tokenizer
|
520 |
+
padding_side="right",
|
521 |
+
use_fast=True,
|
522 |
+
trust_remote_code=True
|
523 |
+
)
|
524 |
+
log.append("Loaded tokenizer from meta-llama/Llama-3-8B")
|
525 |
+
except Exception as e1:
|
526 |
+
log.append(f"Couldn't load Llama-3 tokenizer: {e1}")
|
527 |
+
try:
|
528 |
+
# Fallback to Llama 2
|
529 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
530 |
+
"meta-llama/Llama-2-7b-hf",
|
531 |
+
padding_side="right",
|
532 |
+
use_fast=True
|
533 |
+
)
|
534 |
+
log.append("Loaded Llama-2 tokenizer as fallback")
|
535 |
+
except Exception as e2:
|
536 |
+
log.append(f"Couldn't load Llama-2 tokenizer: {e2}")
|
537 |
+
# Final fallback
|
538 |
+
from transformers import LlamaTokenizer
|
539 |
+
tokenizer = LlamaTokenizer.from_pretrained(
|
540 |
+
"hf-internal-testing/llama-tokenizer",
|
541 |
+
padding_side="right"
|
542 |
+
)
|
543 |
+
log.append("Loaded testing Llama tokenizer as final fallback")
|
544 |
+
|
545 |
+
# Set pad token and ensure it's usable
|
546 |
+
if tokenizer.pad_token is None:
|
547 |
+
tokenizer.pad_token = tokenizer.eos_token
|
548 |
|
549 |
+
log.append(f"Loaded model vocab size: {model.config.vocab_size}")
|
550 |
+
log.append(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
|
551 |
+
|
552 |
+
# --- QLoRA Preparation ---
|
553 |
+
progress(0.35, desc="Preparing model for k-bit training...")
|
554 |
model = prepare_model_for_kbit_training(model)
|
555 |
log.append("Model prepared for k-bit training")
|
556 |
|
557 |
+
# Define LoRA configuration
|
558 |
+
# Based on your reference code
|
559 |
lora_config = LoraConfig(
|
560 |
task_type=TaskType.CAUSAL_LM,
|
561 |
+
r=16, # Rank
|
562 |
+
lora_alpha=32, # Alpha parameter
|
563 |
+
lora_dropout=0.05, # Dropout probability
|
564 |
+
bias="none", # Bias type
|
565 |
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
566 |
)
|
567 |
+
|
568 |
+
# Apply LoRA to model
|
569 |
+
progress(0.4, desc="Applying LoRA to model...")
|
570 |
+
model_to_train = get_peft_model(model, lora_config)
|
571 |
+
log.append("LoRA applied to model")
|
572 |
+
|
573 |
+
# Cleanup to free up memory
|
574 |
+
gc.collect()
|
575 |
+
if torch.cuda.is_available():
|
576 |
+
torch.cuda.empty_cache()
|
577 |
+
|
578 |
except Exception as e:
|
579 |
error_msg = f"Error preparing model for training: {str(e)}"
|
580 |
log.append(error_msg)
|