Spaces:

Twelve2five
/

qlora-llama3-finetuning

Sleeping

Twelve2five commited on Apr 9

Commit

9295d60

verified ·

1 Parent(s): fe289fa

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -164,9 +164,18 @@ def load_model():
     print(f"Model loaded on device: cuda:{gpu_id}")
-    # Load tokenizer as well
-    tokenizer = AutoTokenizer.from_pretrained(hf_model_repo_id)
-    print(f"Loaded model vocab size: {len(tokenizer)}")
     # Print information about input embeddings
     print(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")

     print(f"Model loaded on device: cuda:{gpu_id}")
+    # Load the official Meta tokenizer for LLaMA 3
+    tokenizer = AutoTokenizer.from_pretrained(
+        "meta-llama/Llama-3-8B",  # Use the official Meta tokenizer
+        use_auth_token=os.environ.get("HF_TOKEN", None)  # In case it's needed
+    )
+    if tokenizer is None:
+        # Fallback to another common foundation model tokenizer
+        print("Falling back to another tokenizer as Meta tokenizer requires auth token")
+        tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+    print(f"Loaded tokenizer vocabulary size: {len(tokenizer)}")
     # Print information about input embeddings
     print(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")