Twelve2five commited on
Commit
fe289fa
·
verified ·
1 Parent(s): e036f13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -45
app.py CHANGED
@@ -4,10 +4,12 @@ import glob
4
  import gc
5
  from transformers import (
6
  AutoModelForCausalLM,
 
7
  BitsAndBytesConfig,
8
  TrainingArguments,
9
  Trainer,
10
- DataCollatorForLanguageModeling
 
11
  )
12
  from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
13
  from datasets import Dataset
@@ -130,71 +132,71 @@ def prepare_for_dataset(batch):
130
  return output
131
 
132
  def load_model():
133
- clean_memory() # Start with clean memory
134
-
135
  print(f"Loading base model architecture from: {hf_model_repo_id}")
136
 
137
- # Even more extreme quantization
138
- bnb_config = BitsAndBytesConfig(
139
- load_in_4bit=True,
140
- bnb_4bit_quant_type="nf4",
141
- bnb_4bit_compute_dtype=torch.float16, # Use float16 instead of bfloat16
142
- bnb_4bit_use_double_quant=True,
143
- )
144
 
145
- # For 4-bit training, we need to load on a single device
146
- # Choose GPU with most available memory
147
- free_memory = []
148
  for i in range(torch.cuda.device_count()):
149
- total_memory = torch.cuda.get_device_properties(i).total_memory
150
- reserved_memory = torch.cuda.memory_reserved(i)
151
- free_memory.append((total_memory - reserved_memory) / 1e9) # Convert to GB
 
152
 
153
- # Choose the GPU with the most free memory
154
- target_gpu = free_memory.index(max(free_memory))
155
- print(f"Loading model on GPU {target_gpu} with {free_memory[target_gpu]:.2f}GB free memory")
156
 
157
- # Use target GPU for model loading (crucial for 4-bit training)
158
- device_map = {'': target_gpu}
 
 
 
 
 
159
 
160
- # Load model on the single target GPU
161
  model = AutoModelForCausalLM.from_pretrained(
162
  hf_model_repo_id,
163
  quantization_config=bnb_config,
164
- device_map=device_map, # Place entire model on one GPU
165
- trust_remote_code=True,
166
- use_cache=False,
167
- torch_dtype=torch.float16,
168
- low_cpu_mem_usage=True,
169
  )
170
 
171
- # Add print statement to check which device the model is on
172
- print(f"Model loaded on device: {next(model.parameters()).device}")
 
 
 
173
 
174
- # Continue with the LoRA config as before
175
- print(f"Loaded model vocab size: {model.get_input_embeddings().weight.shape[0]}")
176
  print(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
177
 
178
- # --- Configure PEFT/LoRA ---
 
 
 
179
  lora_config = LoraConfig(
180
- r=16, # rank
181
  lora_alpha=32,
 
 
 
 
 
 
 
 
 
182
  lora_dropout=0.05,
183
  bias="none",
184
- task_type=TaskType.CAUSAL_LM,
185
- target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
186
  )
187
 
188
- # Prepare model for k-bit training
189
- model = prepare_model_for_kbit_training(model)
190
-
191
- # Add LoRA adapters
192
  model = get_peft_model(model, lora_config)
193
-
194
- # Log number of trainable parameters
195
  model.print_trainable_parameters()
196
 
197
- return model
198
 
199
  def load_dataset():
200
  # --- Download the dataset repository files ---
@@ -275,7 +277,7 @@ def train_model(progress=gr.Progress()):
275
  clean_memory()
276
 
277
  # Load model with optimized memory settings
278
- model = load_model()
279
 
280
  # Load and prepare dataset
281
  progress(0.1, desc="Loading dataset...")
@@ -302,13 +304,14 @@ def train_model(progress=gr.Progress()):
302
  logging_first_step=True, # Force log on first step
303
  )
304
 
305
- # Create a simple trainer
306
  trainer = Trainer(
307
  model=model,
308
  args=training_args,
309
  train_dataset=train_dataset,
310
  data_collator=DataCollatorForLanguageModeling(
311
- tokenizer=None, mlm=False
 
312
  )
313
  )
314
 
 
4
  import gc
5
  from transformers import (
6
  AutoModelForCausalLM,
7
+ AutoTokenizer,
8
  BitsAndBytesConfig,
9
  TrainingArguments,
10
  Trainer,
11
+ DataCollatorForLanguageModeling,
12
+ AutoTokenizer
13
  )
14
  from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
15
  from datasets import Dataset
 
132
  return output
133
 
134
  def load_model():
 
 
135
  print(f"Loading base model architecture from: {hf_model_repo_id}")
136
 
137
+ # Get information about GPU with most free memory
138
+ gpu_id = 0 # Default to first GPU
139
+ max_free_memory = 0
 
 
 
 
140
 
 
 
 
141
  for i in range(torch.cuda.device_count()):
142
+ free_memory = torch.cuda.get_device_properties(i).total_memory - torch.cuda.memory_allocated(i)
143
+ if free_memory > max_free_memory:
144
+ max_free_memory = free_memory
145
+ gpu_id = i
146
 
147
+ print(f"Loading model on GPU {gpu_id} with {max_free_memory / 1e9:.2f}GB free memory")
 
 
148
 
149
+ # Configure quantization
150
+ bnb_config = BitsAndBytesConfig(
151
+ load_in_4bit=True,
152
+ bnb_4bit_use_double_quant=True,
153
+ bnb_4bit_quant_type="nf4",
154
+ bnb_4bit_compute_dtype=torch.bfloat16
155
+ )
156
 
157
+ # Load the model
158
  model = AutoModelForCausalLM.from_pretrained(
159
  hf_model_repo_id,
160
  quantization_config=bnb_config,
161
+ device_map={"": gpu_id},
162
+ torch_dtype=torch.bfloat16,
 
 
 
163
  )
164
 
165
+ print(f"Model loaded on device: cuda:{gpu_id}")
166
+
167
+ # Load tokenizer as well
168
+ tokenizer = AutoTokenizer.from_pretrained(hf_model_repo_id)
169
+ print(f"Loaded model vocab size: {len(tokenizer)}")
170
 
171
+ # Print information about input embeddings
 
172
  print(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
173
 
174
+ # Prepare model for k-bit training
175
+ model = prepare_model_for_kbit_training(model)
176
+
177
+ # Define LoRA configuration
178
  lora_config = LoraConfig(
179
+ r=16,
180
  lora_alpha=32,
181
+ target_modules=[
182
+ "q_proj",
183
+ "k_proj",
184
+ "v_proj",
185
+ "o_proj",
186
+ "gate_proj",
187
+ "up_proj",
188
+ "down_proj",
189
+ ],
190
  lora_dropout=0.05,
191
  bias="none",
192
+ task_type=TaskType.CAUSAL_LM
 
193
  )
194
 
195
+ # Apply LoRA to model
 
 
 
196
  model = get_peft_model(model, lora_config)
 
 
197
  model.print_trainable_parameters()
198
 
199
+ return model, tokenizer # Return both model and tokenizer
200
 
201
  def load_dataset():
202
  # --- Download the dataset repository files ---
 
277
  clean_memory()
278
 
279
  # Load model with optimized memory settings
280
+ model, tokenizer = load_model()
281
 
282
  # Load and prepare dataset
283
  progress(0.1, desc="Loading dataset...")
 
304
  logging_first_step=True, # Force log on first step
305
  )
306
 
307
+ # Create a simple trainer with the tokenizer
308
  trainer = Trainer(
309
  model=model,
310
  args=training_args,
311
  train_dataset=train_dataset,
312
  data_collator=DataCollatorForLanguageModeling(
313
+ tokenizer=tokenizer,
314
+ mlm=False
315
  )
316
  )
317