Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -207,90 +207,95 @@ def load_model():
|
|
207 |
log.append(f"Alternative loading also failed: {e2}")
|
208 |
return "\n".join(log)
|
209 |
|
210 |
-
# --- Load Tokenizer ---
|
211 |
progress(0.3, desc="Loading tokenizer...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
try:
|
213 |
-
|
214 |
-
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
-
# Try
|
218 |
try:
|
219 |
tokenizer = AutoTokenizer.from_pretrained(
|
220 |
-
|
221 |
-
use_fast=True,
|
222 |
padding_side="right",
|
223 |
-
|
|
|
224 |
)
|
225 |
-
log.append(f"
|
226 |
-
except Exception as
|
227 |
-
log.append(f"
|
228 |
-
|
|
|
229 |
try:
|
|
|
230 |
tokenizer = AutoTokenizer.from_pretrained(
|
231 |
-
"
|
232 |
-
|
233 |
-
|
234 |
-
)
|
235 |
-
log.append("Loaded Llama-2 tokenizer as fallback")
|
236 |
-
except Exception as e2:
|
237 |
-
# If that fails too, try the most basic option
|
238 |
-
from transformers import LlamaTokenizer
|
239 |
-
tokenizer = LlamaTokenizer.from_pretrained(
|
240 |
-
"hf-internal-testing/llama-tokenizer",
|
241 |
-
use_fast=False,
|
242 |
-
padding_side="right"
|
243 |
)
|
244 |
-
log.append("Loaded
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
# Print information about input embeddings
|
268 |
-
print(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
|
269 |
|
270 |
# Prepare model for k-bit training
|
271 |
model = prepare_model_for_kbit_training(model)
|
272 |
|
273 |
-
# Define LoRA configuration
|
274 |
lora_config = LoraConfig(
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
"q_proj",
|
279 |
-
"k_proj",
|
280 |
-
"v_proj",
|
281 |
-
"o_proj",
|
282 |
-
"gate_proj",
|
283 |
-
"up_proj",
|
284 |
-
"down_proj",
|
285 |
-
],
|
286 |
lora_dropout=0.05,
|
287 |
bias="none",
|
288 |
-
|
289 |
)
|
290 |
|
291 |
# Apply LoRA to model
|
292 |
-
|
293 |
-
model
|
|
|
|
|
|
|
294 |
|
295 |
return model, tokenizer # Return both model and tokenizer
|
296 |
|
@@ -373,9 +378,10 @@ def train_model(
|
|
373 |
model_repo_name,
|
374 |
dataset_repo_name,
|
375 |
epochs=1,
|
376 |
-
batch_size=
|
377 |
-
grad_accum_steps=
|
378 |
learning_rate=2e-4,
|
|
|
379 |
progress=gr.Progress()
|
380 |
):
|
381 |
progress(0, desc="Setting up environment...")
|
@@ -522,58 +528,66 @@ def train_model(
|
|
522 |
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
|
523 |
)
|
524 |
|
525 |
-
# --- Load Tokenizer (
|
526 |
progress(0.3, desc="Loading tokenizer...")
|
527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
# Try to load a compatible tokenizer
|
529 |
try:
|
530 |
-
# First try
|
|
|
|
|
|
|
531 |
tokenizer = AutoTokenizer.from_pretrained(
|
532 |
-
|
533 |
padding_side="right",
|
534 |
use_fast=True,
|
|
|
535 |
)
|
536 |
-
log.append("
|
537 |
except Exception as e1:
|
538 |
-
log.append(f"Couldn't load
|
|
|
|
|
539 |
try:
|
540 |
-
# Try Phi-2 which is also public
|
541 |
tokenizer = AutoTokenizer.from_pretrained(
|
542 |
-
|
543 |
padding_side="right",
|
544 |
-
use_fast=True
|
|
|
545 |
)
|
546 |
-
log.append("Loaded
|
547 |
except Exception as e2:
|
548 |
-
log.append(f"Couldn't load
|
|
|
|
|
549 |
try:
|
550 |
-
# Try
|
551 |
tokenizer = AutoTokenizer.from_pretrained(
|
552 |
-
"
|
553 |
-
padding_side="right"
|
|
|
554 |
)
|
555 |
-
log.append("Loaded
|
556 |
except Exception as e3:
|
557 |
-
log.append(f"Couldn't load
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
pad_token="<pad>",
|
570 |
-
model_max_length=2048 # Safe default value
|
571 |
-
)
|
572 |
-
|
573 |
-
# Add vocabulary - creating a minimal vocab for the RVQ tokens
|
574 |
-
vocab = {f"<token_{i}>": i for i in range(model.config.vocab_size)}
|
575 |
-
tokenizer.add_tokens(list(vocab.keys()))
|
576 |
-
log.append(f"Created basic tokenizer with {len(tokenizer)} tokens")
|
577 |
|
578 |
# Set pad token if not already set
|
579 |
if tokenizer.pad_token is None:
|
@@ -589,14 +603,13 @@ def train_model(
|
|
589 |
model = prepare_model_for_kbit_training(model)
|
590 |
log.append("Model prepared for k-bit training")
|
591 |
|
592 |
-
# Define LoRA configuration
|
593 |
-
# Based on your reference code
|
594 |
lora_config = LoraConfig(
|
595 |
task_type=TaskType.CAUSAL_LM,
|
596 |
-
r=
|
597 |
-
lora_alpha=
|
598 |
-
lora_dropout=0.05,
|
599 |
-
bias="none",
|
600 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
601 |
)
|
602 |
|
@@ -604,6 +617,8 @@ def train_model(
|
|
604 |
progress(0.4, desc="Applying LoRA to model...")
|
605 |
model_to_train = get_peft_model(model, lora_config)
|
606 |
log.append("LoRA applied to model")
|
|
|
|
|
607 |
|
608 |
# Cleanup to free up memory
|
609 |
gc.collect()
|
@@ -709,17 +724,17 @@ def train_model(
|
|
709 |
output_dir = f"./results_{model_repo_name}"
|
710 |
os.makedirs(output_dir, exist_ok=True)
|
711 |
|
712 |
-
#
|
713 |
training_args = TrainingArguments(
|
714 |
output_dir=output_dir,
|
715 |
num_train_epochs=float(epochs),
|
716 |
-
per_device_train_batch_size=
|
717 |
-
gradient_accumulation_steps=
|
718 |
learning_rate=learning_rate,
|
719 |
weight_decay=0.01,
|
720 |
logging_dir=f"{output_dir}/logs",
|
721 |
logging_steps=10,
|
722 |
-
save_steps=
|
723 |
save_total_limit=3,
|
724 |
remove_unused_columns=False,
|
725 |
push_to_hub=False,
|
@@ -727,7 +742,8 @@ def train_model(
|
|
727 |
warmup_ratio=0.03,
|
728 |
lr_scheduler_type="cosine",
|
729 |
report_to="tensorboard",
|
730 |
-
bf16=True if torch.cuda.
|
|
|
731 |
gradient_checkpointing=True, # Still useful for efficiency
|
732 |
gradient_checkpointing_kwargs={'use_reentrant': False},
|
733 |
ddp_find_unused_parameters=False,
|
@@ -791,11 +807,16 @@ def create_interface():
|
|
791 |
hf_username = gr.Textbox(label="HuggingFace Username", value="Twelve2five")
|
792 |
model_repo = gr.Textbox(label="Model Repository Name", value="llama-3.2-1b-rvq")
|
793 |
dataset_repo = gr.Textbox(label="Dataset Repository Name", value="podcast-dialogue-rvq-pairs-3items")
|
|
|
|
|
|
|
|
|
|
|
794 |
|
795 |
with gr.Column():
|
796 |
epochs = gr.Number(label="Number of Epochs", value=3, minimum=1, maximum=10)
|
797 |
-
batch_size = gr.Number(label="Batch Size per Device", value=
|
798 |
-
grad_accum = gr.Number(label="Gradient Accumulation Steps", value=
|
799 |
lr = gr.Number(label="Learning Rate", value=2e-4)
|
800 |
|
801 |
start_btn = gr.Button("Start Training")
|
@@ -803,7 +824,7 @@ def create_interface():
|
|
803 |
|
804 |
start_btn.click(
|
805 |
fn=train_model,
|
806 |
-
inputs=[hf_username, model_repo, dataset_repo, epochs, batch_size, grad_accum, lr],
|
807 |
outputs=output
|
808 |
)
|
809 |
|
|
|
207 |
log.append(f"Alternative loading also failed: {e2}")
|
208 |
return "\n".join(log)
|
209 |
|
210 |
+
# --- Load Tokenizer (prioritizing Llama 3.2 1B) ---
|
211 |
progress(0.3, desc="Loading tokenizer...")
|
212 |
+
|
213 |
+
# Set up token for authentication
|
214 |
+
token_param = {"token": hf_token} if hf_token and hf_token.strip() else {}
|
215 |
+
if token_param:
|
216 |
+
log.append("Using provided Hugging Face token for authentication")
|
217 |
+
else:
|
218 |
+
log.append("No token provided, using Space's default authentication")
|
219 |
+
|
220 |
+
# Try to load a compatible tokenizer
|
221 |
try:
|
222 |
+
# First try the actual Llama 3.2 1B tokenizer
|
223 |
+
tokenizer_repo = "meta-llama/Llama-3.2-1B" # The official 1B model
|
224 |
+
log.append(f"Attempting to load tokenizer from {tokenizer_repo}...")
|
225 |
+
|
226 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
227 |
+
tokenizer_repo,
|
228 |
+
padding_side="right",
|
229 |
+
use_fast=True,
|
230 |
+
**token_param # Pass token if provided
|
231 |
+
)
|
232 |
+
log.append(f"Successfully loaded tokenizer from {tokenizer_repo}")
|
233 |
+
except Exception as e1:
|
234 |
+
log.append(f"Couldn't load {tokenizer_repo} tokenizer: {e1}")
|
235 |
|
236 |
+
# Try the model repo directly (in case it has a tokenizer)
|
237 |
try:
|
238 |
tokenizer = AutoTokenizer.from_pretrained(
|
239 |
+
hf_model_repo_id, # The RVQ model repo
|
|
|
240 |
padding_side="right",
|
241 |
+
use_fast=True,
|
242 |
+
**token_param # Pass token if provided
|
243 |
)
|
244 |
+
log.append(f"Loaded tokenizer from the model repo: {hf_model_repo_id}")
|
245 |
+
except Exception as e2:
|
246 |
+
log.append(f"Couldn't load model repo tokenizer: {e2}")
|
247 |
+
|
248 |
+
# Continue with our fallbacks (public models don't need token)
|
249 |
try:
|
250 |
+
# Try TinyLlama (public)
|
251 |
tokenizer = AutoTokenizer.from_pretrained(
|
252 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
253 |
+
padding_side="right",
|
254 |
+
use_fast=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
)
|
256 |
+
log.append("Loaded TinyLlama tokenizer as fallback")
|
257 |
+
except Exception as e3:
|
258 |
+
log.append(f"Couldn't load TinyLlama tokenizer: {e3}")
|
259 |
+
# Last resort - other public models
|
260 |
+
try:
|
261 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
262 |
+
"microsoft/phi-2", # Public model
|
263 |
+
padding_side="right"
|
264 |
+
)
|
265 |
+
log.append("Loaded Phi-2 tokenizer as last resort")
|
266 |
+
except Exception as e4:
|
267 |
+
error_msg = f"Failed to load any compatible tokenizer after multiple attempts: {e4}"
|
268 |
+
log.append(error_msg)
|
269 |
+
return "\n".join(log)
|
270 |
+
|
271 |
+
# Set pad token if not already set
|
272 |
+
if tokenizer.pad_token is None:
|
273 |
+
tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token is not None else "<pad>"
|
274 |
+
log.append("Set pad_token to eos_token or <pad>")
|
275 |
+
|
276 |
+
log.append(f"Tokenizer loaded with vocab size: {len(tokenizer)}")
|
277 |
+
log.append(f"Model vocab size: {model.config.vocab_size}")
|
278 |
+
log.append(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
|
|
|
|
|
279 |
|
280 |
# Prepare model for k-bit training
|
281 |
model = prepare_model_for_kbit_training(model)
|
282 |
|
283 |
+
# Define LoRA configuration - adjusted for 1B model
|
284 |
lora_config = LoraConfig(
|
285 |
+
task_type=TaskType.CAUSAL_LM,
|
286 |
+
r=8, # Smaller rank for 1B model (vs 16 for larger models)
|
287 |
+
lora_alpha=16, # Adjusted alpha (vs 32 for larger models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
lora_dropout=0.05,
|
289 |
bias="none",
|
290 |
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
291 |
)
|
292 |
|
293 |
# Apply LoRA to model
|
294 |
+
progress(0.4, desc="Applying LoRA to model...")
|
295 |
+
model_to_train = get_peft_model(model, lora_config)
|
296 |
+
log.append("LoRA applied to model")
|
297 |
+
log.append(f"LoRA rank: 8, alpha: 16 (optimized for 1B model)")
|
298 |
+
model_to_train.print_trainable_parameters()
|
299 |
|
300 |
return model, tokenizer # Return both model and tokenizer
|
301 |
|
|
|
378 |
model_repo_name,
|
379 |
dataset_repo_name,
|
380 |
epochs=1,
|
381 |
+
batch_size=8,
|
382 |
+
grad_accum_steps=1,
|
383 |
learning_rate=2e-4,
|
384 |
+
hf_token=None, # New parameter for token
|
385 |
progress=gr.Progress()
|
386 |
):
|
387 |
progress(0, desc="Setting up environment...")
|
|
|
528 |
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
|
529 |
)
|
530 |
|
531 |
+
# --- Load Tokenizer (prioritizing Llama 3.2 1B) ---
|
532 |
progress(0.3, desc="Loading tokenizer...")
|
533 |
|
534 |
+
# Set up token for authentication
|
535 |
+
token_param = {"token": hf_token} if hf_token and hf_token.strip() else {}
|
536 |
+
if token_param:
|
537 |
+
log.append("Using provided Hugging Face token for authentication")
|
538 |
+
else:
|
539 |
+
log.append("No token provided, using Space's default authentication")
|
540 |
+
|
541 |
# Try to load a compatible tokenizer
|
542 |
try:
|
543 |
+
# First try the actual Llama 3.2 1B tokenizer
|
544 |
+
tokenizer_repo = "meta-llama/Llama-3.2-1B" # The official 1B model
|
545 |
+
log.append(f"Attempting to load tokenizer from {tokenizer_repo}...")
|
546 |
+
|
547 |
tokenizer = AutoTokenizer.from_pretrained(
|
548 |
+
tokenizer_repo,
|
549 |
padding_side="right",
|
550 |
use_fast=True,
|
551 |
+
**token_param # Pass token if provided
|
552 |
)
|
553 |
+
log.append(f"Successfully loaded tokenizer from {tokenizer_repo}")
|
554 |
except Exception as e1:
|
555 |
+
log.append(f"Couldn't load {tokenizer_repo} tokenizer: {e1}")
|
556 |
+
|
557 |
+
# Try the model repo directly (in case it has a tokenizer)
|
558 |
try:
|
|
|
559 |
tokenizer = AutoTokenizer.from_pretrained(
|
560 |
+
hf_model_repo_id, # The RVQ model repo
|
561 |
padding_side="right",
|
562 |
+
use_fast=True,
|
563 |
+
**token_param # Pass token if provided
|
564 |
)
|
565 |
+
log.append(f"Loaded tokenizer from the model repo: {hf_model_repo_id}")
|
566 |
except Exception as e2:
|
567 |
+
log.append(f"Couldn't load model repo tokenizer: {e2}")
|
568 |
+
|
569 |
+
# Continue with our fallbacks (public models don't need token)
|
570 |
try:
|
571 |
+
# Try TinyLlama (public)
|
572 |
tokenizer = AutoTokenizer.from_pretrained(
|
573 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
574 |
+
padding_side="right",
|
575 |
+
use_fast=True
|
576 |
)
|
577 |
+
log.append("Loaded TinyLlama tokenizer as fallback")
|
578 |
except Exception as e3:
|
579 |
+
log.append(f"Couldn't load TinyLlama tokenizer: {e3}")
|
580 |
+
# Last resort - other public models
|
581 |
+
try:
|
582 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
583 |
+
"microsoft/phi-2", # Public model
|
584 |
+
padding_side="right"
|
585 |
+
)
|
586 |
+
log.append("Loaded Phi-2 tokenizer as last resort")
|
587 |
+
except Exception as e4:
|
588 |
+
error_msg = f"Failed to load any compatible tokenizer after multiple attempts: {e4}"
|
589 |
+
log.append(error_msg)
|
590 |
+
return "\n".join(log)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
591 |
|
592 |
# Set pad token if not already set
|
593 |
if tokenizer.pad_token is None:
|
|
|
603 |
model = prepare_model_for_kbit_training(model)
|
604 |
log.append("Model prepared for k-bit training")
|
605 |
|
606 |
+
# Define LoRA configuration - adjusted for 1B model
|
|
|
607 |
lora_config = LoraConfig(
|
608 |
task_type=TaskType.CAUSAL_LM,
|
609 |
+
r=8, # Smaller rank for 1B model (vs 16 for larger models)
|
610 |
+
lora_alpha=16, # Adjusted alpha (vs 32 for larger models)
|
611 |
+
lora_dropout=0.05,
|
612 |
+
bias="none",
|
613 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
614 |
)
|
615 |
|
|
|
617 |
progress(0.4, desc="Applying LoRA to model...")
|
618 |
model_to_train = get_peft_model(model, lora_config)
|
619 |
log.append("LoRA applied to model")
|
620 |
+
log.append(f"LoRA rank: 8, alpha: 16 (optimized for 1B model)")
|
621 |
+
model_to_train.print_trainable_parameters()
|
622 |
|
623 |
# Cleanup to free up memory
|
624 |
gc.collect()
|
|
|
724 |
output_dir = f"./results_{model_repo_name}"
|
725 |
os.makedirs(output_dir, exist_ok=True)
|
726 |
|
727 |
+
# For 1B model on A100, we can increase batch size and reduce gradient accumulation
|
728 |
training_args = TrainingArguments(
|
729 |
output_dir=output_dir,
|
730 |
num_train_epochs=float(epochs),
|
731 |
+
per_device_train_batch_size=8, # Larger batch size for 1B model
|
732 |
+
gradient_accumulation_steps=1, # Reduced for 1B model
|
733 |
learning_rate=learning_rate,
|
734 |
weight_decay=0.01,
|
735 |
logging_dir=f"{output_dir}/logs",
|
736 |
logging_steps=10,
|
737 |
+
save_steps=50,
|
738 |
save_total_limit=3,
|
739 |
remove_unused_columns=False,
|
740 |
push_to_hub=False,
|
|
|
742 |
warmup_ratio=0.03,
|
743 |
lr_scheduler_type="cosine",
|
744 |
report_to="tensorboard",
|
745 |
+
bf16=True if torch.cuda.is_bf16_supported() else False,
|
746 |
+
fp16=False, # Using BF16 instead
|
747 |
gradient_checkpointing=True, # Still useful for efficiency
|
748 |
gradient_checkpointing_kwargs={'use_reentrant': False},
|
749 |
ddp_find_unused_parameters=False,
|
|
|
807 |
hf_username = gr.Textbox(label="HuggingFace Username", value="Twelve2five")
|
808 |
model_repo = gr.Textbox(label="Model Repository Name", value="llama-3.2-1b-rvq")
|
809 |
dataset_repo = gr.Textbox(label="Dataset Repository Name", value="podcast-dialogue-rvq-pairs-3items")
|
810 |
+
hf_token = gr.Textbox(
|
811 |
+
label="Hugging Face Token (Optional)",
|
812 |
+
placeholder="Enter your HF token to access gated models",
|
813 |
+
type="password"
|
814 |
+
)
|
815 |
|
816 |
with gr.Column():
|
817 |
epochs = gr.Number(label="Number of Epochs", value=3, minimum=1, maximum=10)
|
818 |
+
batch_size = gr.Number(label="Batch Size per Device", value=8, minimum=1, maximum=16)
|
819 |
+
grad_accum = gr.Number(label="Gradient Accumulation Steps", value=1, minimum=1, maximum=16)
|
820 |
lr = gr.Number(label="Learning Rate", value=2e-4)
|
821 |
|
822 |
start_btn = gr.Button("Start Training")
|
|
|
824 |
|
825 |
start_btn.click(
|
826 |
fn=train_model,
|
827 |
+
inputs=[hf_username, model_repo, dataset_repo, epochs, batch_size, grad_accum, lr, hf_token],
|
828 |
outputs=output
|
829 |
)
|
830 |
|