Spaces:

Twelve2five
/

qlora-llama3-finetuning

Sleeping

App Files Files Community

Twelve2five commited on Apr 9

Commit

d559082

verified ·

1 Parent(s): 0cfd18e

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -76

app.py CHANGED Viewed

@@ -10,7 +10,8 @@ from transformers import (
     Trainer,
     DataCollatorForLanguageModeling,
     AutoTokenizer,
-    LlamaConfig
 )
 from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
 from datasets import Dataset
@@ -333,24 +334,24 @@ def train_model(
     progress=gr.Progress()
 ):
     progress(0, desc="Installing dependencies...")
-    # Install required packages if needed
-    try:
-        import transformers
-        import accelerate
-        import bitsandbytes
-        import peft
-        import deepspeed
-    except ImportError:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U",
-                          "transformers", "accelerate", "bitsandbytes", "peft",
-                          "torch", "datasets", "huggingface_hub", "deepspeed"])
     # --- Configuration ---
     progress(0.05, desc="Setting up configuration...")
     hf_model_repo_id = f"{hf_username}/{model_repo_name}"
     hf_dataset_repo_id = f"{hf_username}/{dataset_repo_name}"
-    log = []
     log.append(f"Model repo: {hf_model_repo_id}")
     log.append(f"Dataset repo: {hf_dataset_repo_id}")
@@ -369,83 +370,77 @@ def train_model(
     # --- Load Base Model (with quantization) ---
     progress(0.1, desc="Loading base model...")
     try:
-        # First update transformers to make sure we have latest version
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "transformers"])
-        # Now try loading with explicit config class to avoid auto-detection issues
-        from transformers import LlamaConfig
-        # Load config first
-        config = LlamaConfig.from_pretrained(
-            hf_model_repo_id,
-            trust_remote_code=True
         )
-        # Then load model with explicit config
         model = AutoModelForCausalLM.from_pretrained(
-            hf_model_repo_id,
             config=config,
             quantization_config=bnb_config,
             device_map="auto",
-            trust_remote_code=True
         )
         log.append(f"Loaded model vocab size: {model.config.vocab_size}")
         log.append(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
     except Exception as e:
-        error_msg = f"Error loading model from Hub: {e}"
         log.append(error_msg)
-        # Try with a fallback method
-        try:
-            log.append("Attempting alternative loading method...")
-            # Try loading without auto detection
-            model = AutoModelForCausalLM.from_pretrained(
-                hf_model_repo_id,
-                quantization_config=bnb_config,
-                device_map="auto",
-                trust_remote_code=True,
-                torch_dtype=torch.bfloat16,
-                # Add these to help with the loading
-                revision="main",
-                low_cpu_mem_usage=True,
-            )
-            log.append("Alternative loading successful!")
-            log.append(f"Loaded model vocab size: {model.config.vocab_size}")
-        except Exception as e2:
-            log.append(f"Alternative loading also failed: {e2}")
-            return "\n".join(log)
-    # Load the official Meta tokenizer for LLaMA 3
-    tokenizer = AutoTokenizer.from_pretrained(
-        "meta-llama/Llama-3-8B",  # Use the official Meta tokenizer
-        use_auth_token=os.environ.get("HF_TOKEN", None)  # In case it's needed
-    )
-    if tokenizer is None:
-        # Fallback to another common foundation model tokenizer
-        print("Falling back to another tokenizer as Meta tokenizer requires auth token")
-        tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
-    print(f"Loaded tokenizer vocabulary size: {len(tokenizer)}")
-    # Print information about input embeddings
-    print(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
-    # Prepare model for k-bit training
-    model = prepare_model_for_kbit_training(model)
-    # Define LoRA configuration
-    lora_config = LoraConfig(
-        task_type=TaskType.CAUSAL_LM,
-        r=16,
-        lora_alpha=32,
-        lora_dropout=0.05,
-        bias="none",
-        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
-    )
-    peft_model = get_peft_model(model, lora_config)
-    trainable_params = peft_model.print_trainable_parameters()
-    log.append(f"Trainable parameters: {trainable_params}")
-    model_to_train = peft_model
     # Cleanup
     gc.collect()

     Trainer,
     DataCollatorForLanguageModeling,
     AutoTokenizer,
+    LlamaConfig,
+    AutoConfig
 )
 from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
 from datasets import Dataset
     progress=gr.Progress()
 ):
     progress(0, desc="Installing dependencies...")
+    log = []
+    # Force reinstallation of transformers with specific version
+    log.append("Installing dependencies with specific versions...")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "transformers==4.36.2"])
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U", "accelerate", "bitsandbytes", "peft", "datasets", "huggingface_hub", "deepspeed"])
+    # Now import everything after installation to ensure we use the correct versions
+    from datasets import Dataset
+    from huggingface_hub import snapshot_download
+    from transformers import AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig, TrainingArguments, Trainer
+    from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
     # --- Configuration ---
     progress(0.05, desc="Setting up configuration...")
     hf_model_repo_id = f"{hf_username}/{model_repo_name}"
     hf_dataset_repo_id = f"{hf_username}/{dataset_repo_name}"
     log.append(f"Model repo: {hf_model_repo_id}")
     log.append(f"Dataset repo: {hf_dataset_repo_id}")
     # --- Load Base Model (with quantization) ---
     progress(0.1, desc="Loading base model...")
     try:
+        # First try to download the repo without loading the model
+        # to see what files are available
+        local_model_path = "./model_files"
+        snapshot_download(
+            repo_id=hf_model_repo_id,
+            local_dir=local_model_path,
+            local_dir_use_symlinks=False
+        )
+        log.append(f"Model files downloaded to {local_model_path}")
+        # Check if this is a Llama model by looking at config.json
+        if os.path.exists(os.path.join(local_model_path, "config.json")):
+            with open(os.path.join(local_model_path, "config.json"), "r") as f:
+                config_data = json.load(f)
+                log.append(f"Model architecture type: {config_data.get('model_type', 'unknown')}")
+                # Force model_type to llama if needed
+                if "architectures" in config_data and "LlamaForCausalLM" in config_data["architectures"]:
+                    config_data["model_type"] = "llama"
+                    with open(os.path.join(local_model_path, "config.json"), "w") as f:
+                        json.dump(config_data, f)
+                    log.append("Updated config.json to use llama model_type")
+        # Now try to load the config and model from local path
+        config = AutoConfig.from_pretrained(
+            local_model_path,
+            trust_remote_code=False  # Set to False to avoid custom model code loading
         )
+        log.append(f"Successfully loaded config: {config.model_type}")
+        # Load model with the config
         model = AutoModelForCausalLM.from_pretrained(
+            local_model_path,
             config=config,
             quantization_config=bnb_config,
             device_map="auto",
+            trust_remote_code=False,
+            torch_dtype=torch.bfloat16
         )
         log.append(f"Loaded model vocab size: {model.config.vocab_size}")
         log.append(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
     except Exception as e:
+        error_msg = f"Error loading model: {str(e)}"
         log.append(error_msg)
+        return "\n".join(log)
+    # --- Prepare for K-bit Training & Apply LoRA ---
+    progress(0.15, desc="Preparing model for fine-tuning...")
+    try:
+        model = prepare_model_for_kbit_training(model)
+        log.append("Model prepared for k-bit training")
+        lora_config = LoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            r=16,
+            lora_alpha=32,
+            lora_dropout=0.05,
+            bias="none",
+            target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
+        )
+        peft_model = get_peft_model(model, lora_config)
+        trainable_params = peft_model.print_trainable_parameters()
+        log.append(f"LoRA applied to model")
+        model_to_train = peft_model
+    except Exception as e:
+        error_msg = f"Error preparing model for training: {str(e)}"
+        log.append(error_msg)
+        return "\n".join(log)
     # Cleanup
     gc.collect()