Twelve2five commited on
Commit
139f757
·
verified ·
1 Parent(s): d559082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -24
app.py CHANGED
@@ -23,6 +23,7 @@ from accelerate import Accelerator
23
  import subprocess
24
  import sys
25
  import json
 
26
 
27
  # --- Configuration ---
28
  YOUR_HF_USERNAME = "Twelve2five"
@@ -333,19 +334,52 @@ def train_model(
333
  learning_rate=1e-4,
334
  progress=gr.Progress()
335
  ):
336
- progress(0, desc="Installing dependencies...")
337
  log = []
338
 
339
- # Force reinstallation of transformers with specific version
340
- log.append("Installing dependencies with specific versions...")
341
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "transformers==4.36.2"])
342
- subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U", "accelerate", "bitsandbytes", "peft", "datasets", "huggingface_hub", "deepspeed"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
  # Now import everything after installation to ensure we use the correct versions
345
- from datasets import Dataset
346
- from huggingface_hub import snapshot_download
347
- from transformers import AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig, TrainingArguments, Trainer
348
- from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
 
 
 
 
 
 
 
 
 
 
349
 
350
  # --- Configuration ---
351
  progress(0.05, desc="Setting up configuration...")
@@ -371,8 +405,10 @@ def train_model(
371
  progress(0.1, desc="Loading base model...")
372
  try:
373
  # First try to download the repo without loading the model
374
- # to see what files are available
375
  local_model_path = "./model_files"
 
 
 
376
  snapshot_download(
377
  repo_id=hf_model_repo_id,
378
  local_dir=local_model_path,
@@ -387,29 +423,31 @@ def train_model(
387
  config_data = json.load(f)
388
  log.append(f"Model architecture type: {config_data.get('model_type', 'unknown')}")
389
 
390
- # Force model_type to llama if needed
391
- if "architectures" in config_data and "LlamaForCausalLM" in config_data["architectures"]:
392
- config_data["model_type"] = "llama"
393
- with open(os.path.join(local_model_path, "config.json"), "w") as f:
394
- json.dump(config_data, f)
395
- log.append("Updated config.json to use llama model_type")
 
 
396
 
397
- # Now try to load the config and model from local path
398
- config = AutoConfig.from_pretrained(
399
  local_model_path,
400
- trust_remote_code=False # Set to False to avoid custom model code loading
401
  )
402
 
403
  log.append(f"Successfully loaded config: {config.model_type}")
404
 
405
- # Load model with the config
406
- model = AutoModelForCausalLM.from_pretrained(
407
  local_model_path,
408
  config=config,
409
  quantization_config=bnb_config,
410
  device_map="auto",
411
- trust_remote_code=False,
412
- torch_dtype=torch.bfloat16
413
  )
414
 
415
  log.append(f"Loaded model vocab size: {model.config.vocab_size}")
@@ -417,7 +455,20 @@ def train_model(
417
  except Exception as e:
418
  error_msg = f"Error loading model: {str(e)}"
419
  log.append(error_msg)
420
- return "\n".join(log)
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
  # --- Prepare for K-bit Training & Apply LoRA ---
423
  progress(0.15, desc="Preparing model for fine-tuning...")
 
23
  import subprocess
24
  import sys
25
  import json
26
+ import shutil
27
 
28
  # --- Configuration ---
29
  YOUR_HF_USERNAME = "Twelve2five"
 
334
  learning_rate=1e-4,
335
  progress=gr.Progress()
336
  ):
337
+ progress(0, desc="Setting up environment...")
338
  log = []
339
 
340
+ # Completely clean up transformers installation
341
+ log.append("Completely reinstalling transformers and dependencies...")
342
+
343
+ # First uninstall any existing transformers
344
+ subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "transformers"])
345
+
346
+ # Clean any cached files that might be causing issues
347
+ cache_dirs = [
348
+ os.path.expanduser("~/.cache/huggingface"),
349
+ os.path.expanduser("~/.cache/pip")
350
+ ]
351
+
352
+ for cache_dir in cache_dirs:
353
+ if os.path.exists(cache_dir):
354
+ log.append(f"Cleaning cache directory: {cache_dir}")
355
+ try:
356
+ shutil.rmtree(cache_dir)
357
+ except Exception as e:
358
+ log.append(f"Warning: Could not clean {cache_dir}: {e}")
359
+
360
+ # Install a stable version of transformers known to work with Llama models
361
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers==4.35.2", "sentencepiece"])
362
+
363
+ # Install other dependencies
364
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
365
+ "accelerate", "bitsandbytes==0.41.1", "peft==0.6.1",
366
+ "datasets", "huggingface_hub", "deepspeed==0.12.3"])
367
 
368
  # Now import everything after installation to ensure we use the correct versions
369
+ try:
370
+ from datasets import Dataset
371
+ from huggingface_hub import snapshot_download
372
+ import torch
373
+ import transformers
374
+ from transformers import AutoModelForCausalLM, LlamaConfig, LlamaForCausalLM
375
+ from transformers import BitsAndBytesConfig, TrainingArguments, Trainer
376
+ from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
377
+
378
+ log.append(f"Transformers version: {transformers.__version__}")
379
+ log.append(f"PyTorch version: {torch.__version__}")
380
+ except ImportError as e:
381
+ log.append(f"Error importing libraries: {e}")
382
+ return "\n".join(log)
383
 
384
  # --- Configuration ---
385
  progress(0.05, desc="Setting up configuration...")
 
405
  progress(0.1, desc="Loading base model...")
406
  try:
407
  # First try to download the repo without loading the model
 
408
  local_model_path = "./model_files"
409
+ if os.path.exists(local_model_path):
410
+ shutil.rmtree(local_model_path) # Clean up any previous files
411
+
412
  snapshot_download(
413
  repo_id=hf_model_repo_id,
414
  local_dir=local_model_path,
 
423
  config_data = json.load(f)
424
  log.append(f"Model architecture type: {config_data.get('model_type', 'unknown')}")
425
 
426
+ # Force model_type to llama
427
+ config_data["model_type"] = "llama"
428
+ if "architectures" in config_data:
429
+ config_data["architectures"] = ["LlamaForCausalLM"]
430
+
431
+ with open(os.path.join(local_model_path, "config.json"), "w") as f:
432
+ json.dump(config_data, f)
433
+ log.append("Updated config.json to use llama model_type")
434
 
435
+ # Now try to load with explicit Llama classes
436
+ config = LlamaConfig.from_pretrained(
437
  local_model_path,
438
+ trust_remote_code=False
439
  )
440
 
441
  log.append(f"Successfully loaded config: {config.model_type}")
442
 
443
+ # Load model with specific Llama class
444
+ model = LlamaForCausalLM.from_pretrained(
445
  local_model_path,
446
  config=config,
447
  quantization_config=bnb_config,
448
  device_map="auto",
449
+ torch_dtype=torch.bfloat16,
450
+ low_cpu_mem_usage=True
451
  )
452
 
453
  log.append(f"Loaded model vocab size: {model.config.vocab_size}")
 
455
  except Exception as e:
456
  error_msg = f"Error loading model: {str(e)}"
457
  log.append(error_msg)
458
+
459
+ # Try a fallback approach
460
+ try:
461
+ log.append("Trying fallback approach with AutoModelForCausalLM...")
462
+ model = AutoModelForCausalLM.from_pretrained(
463
+ local_model_path,
464
+ device_map="auto",
465
+ torch_dtype=torch.bfloat16,
466
+ low_cpu_mem_usage=True
467
+ )
468
+ log.append(f"Fallback model loaded successfully")
469
+ except Exception as e2:
470
+ log.append(f"Fallback approach also failed: {str(e2)}")
471
+ return "\n".join(log)
472
 
473
  # --- Prepare for K-bit Training & Apply LoRA ---
474
  progress(0.15, desc="Preparing model for fine-tuning...")