# /// script # requires-python = ">=3.10" # dependencies = [ # "datasets", # "httpx", # "huggingface - hub", # "setuptools", # "transformers", # "torch", # "accelerate", # "trl", # "peft", # "wandb", # "torchvision", # "torchaudio" # ] # /// """## Import libraries""" import torch from datasets import load_dataset from transformers import AutoModelForCausalLM, AutoTokenizer from trl import SFTConfig, SFTTrainer, setup_chat_format from peft import LoraConfig """# Load Dataset""" dataset_name = "allenai/tulu-3-sft-personas-code" # Example dataset # Load dataset dataset = load_dataset(dataset_name, split="train") print(f"Dataset loaded: {dataset}") # Let's look at a sample print("\nSample data:") print(dataset[0]) dataset = dataset.remove_columns("prompt") dataset = dataset.train_test_split(test_size=0.2) print( f"Train Samples: {len(dataset['train'])}\nTest Samples: {len(dataset['test'])}" ) """## Configuration Set up the configuration parameters for the fine-tuning process. """ # Model configuration model_name = "Qwen/Qwen3-30B-A3B" # You can change this to any model you want to fine-tune # # Other compatible Qwen3 models # model_name = "Qwen/Qwen3-32B" # model_name = "Qwen/Qwen3-14B" # model_name = "Qwen/Qwen3-8B" # model_name = "Qwen/Qwen3-4B" # model_name = "Qwen/Qwen3-1.7B" # model_name = "Qwen/Qwen3-0.6B" # Training configuration output_dir = "./tmp/sft-model" num_train_epochs = 1 per_device_train_batch_size = 1 gradient_accumulation_steps = 1 learning_rate = 2e-4 """## Load model and tokenizer""" # Load model model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.bfloat16, use_cache=False, # Disable KV cache during training device_map="auto", ) # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) # # Set up chat formatting (if the model doesn't have a chat template) # if tokenizer.chat_template is None: # model, tokenizer = setup_chat_format(model, tokenizer, format="chatml") # # Set padding token # if tokenizer.pad_token is None: # tokenizer.pad_token = tokenizer.eos_token """## Configure PEFT (if enabled)""" # Set up PEFT configuration if enabled peft_config = LoraConfig( r=32, # Rank lora_alpha=16, # Alpha parameter for LoRA scaling lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules="all-linear", ) """## Configure SFT Trainer""" # Training arguments training_args = SFTConfig( output_dir=output_dir, num_train_epochs=num_train_epochs, per_device_train_batch_size=per_device_train_batch_size, gradient_accumulation_steps=gradient_accumulation_steps, learning_rate=learning_rate, gradient_checkpointing=True, logging_steps=25, save_strategy="epoch", optim="adamw_torch", lr_scheduler_type="cosine", warmup_ratio=0.1, max_length=1024, packing=True, # Enable packing to increase training efficiency eos_token=tokenizer.eos_token, bf16=True, fp16=False, max_steps=1000, report_to="wandb", # Disable reporting to avoid wandb prompts ) """## Initialize and run the SFT Trainer""" # Create SFT Trainer trainer = SFTTrainer( model=model, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"] if "test" in dataset else None, peft_config=peft_config, processing_class=tokenizer, ) # Train the model trainer.train() """## Save the fine-tuned model""" # Save the model trainer.save_model(output_dir) """## Test the fine-tuned model""" from peft import PeftModel, PeftConfig # Load the base model base_model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.bfloat16 ) # Load the fine-tuned PEFT model model = PeftModel.from_pretrained(base_model, output_dir) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Test the model with an example prompt = """Write a function called is_palindrome that takes a single string as input and returns True if the string is a palindrome, and False otherwise. Palindrome Definition: A palindrome is a word, phrase, number, or other sequence of characters that reads the same forward and backward, ignoring spaces, punctuation, and capitalization. Example: ``` is_palindrome("racecar") # Returns True is_palindrome("hello") # Returns False is_palindrome("A man, a plan, a canal: Panama") # Returns True ``` """ # Format the chat prompt using the tokenizer's chat template messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, ] formatted_prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) print(f"Formatted prompt: {formatted_prompt}") # Generate response model.eval() inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=500, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) print("\nGenerated Response:") print(response) model.push_to_hub("burtenshaw/Qwen3-30B-A3B-python-code")