Twelve2five commited on
Commit
0586d21
·
verified ·
1 Parent(s): 16c5c11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -10
app.py CHANGED
@@ -207,16 +207,51 @@ def load_model():
207
  log.append(f"Alternative loading also failed: {e2}")
208
  return "\n".join(log)
209
 
210
- # Load the official Meta tokenizer for LLaMA 3
211
- tokenizer = AutoTokenizer.from_pretrained(
212
- "meta-llama/Llama-3-8B", # Use the official Meta tokenizer
213
- use_auth_token=os.environ.get("HF_TOKEN", None) # In case it's needed
214
- )
215
-
216
- if tokenizer is None:
217
- # Fallback to another common foundation model tokenizer
218
- print("Falling back to another tokenizer as Meta tokenizer requires auth token")
219
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  print(f"Loaded tokenizer vocabulary size: {len(tokenizer)}")
222
 
 
207
  log.append(f"Alternative loading also failed: {e2}")
208
  return "\n".join(log)
209
 
210
+ # Try to load the tokenizer from the model repository directly
211
+ progress(0.3, desc="Loading tokenizer...")
212
+ try:
213
+ # First attempt: Try loading from local path
214
+ tokenizer = AutoTokenizer.from_pretrained(
215
+ local_model_path,
216
+ padding_side="right",
217
+ use_fast=True,
218
+ )
219
+ log.append("Tokenizer loaded from local files")
220
+ except Exception as e:
221
+ log.append(f"Could not load tokenizer from local files: {e}")
222
+
223
+ # Second attempt: Try loading directly from HF repo
224
+ try:
225
+ log.append("Attempting to load tokenizer directly from Hugging Face...")
226
+ tokenizer = AutoTokenizer.from_pretrained(
227
+ hf_model_repo_id,
228
+ padding_side="right",
229
+ use_fast=True,
230
+ )
231
+ log.append("Tokenizer loaded from Hugging Face repository")
232
+ except Exception as e2:
233
+ # Third attempt: Try loading a compatible tokenizer
234
+ log.append(f"Could not load tokenizer from repo: {e2}")
235
+ log.append("Attempting to load a compatible LlamaTokenizer...")
236
+ try:
237
+ from transformers import LlamaTokenizer
238
+
239
+ # Try Meta's standard Llama tokenizer
240
+ tokenizer = LlamaTokenizer.from_pretrained(
241
+ "meta-llama/Llama-2-7b-hf", # Standard Llama tokenizer
242
+ padding_side="right",
243
+ use_fast=False, # Try the Python version
244
+ )
245
+ log.append("Loaded a compatible LlamaTokenizer as fallback")
246
+ except Exception as e3:
247
+ error_msg = f"Failed to load any compatible tokenizer: {e3}"
248
+ log.append(error_msg)
249
+ return "\n".join(log)
250
+
251
+ # Set pad token if not already set
252
+ if tokenizer.pad_token is None:
253
+ tokenizer.pad_token = tokenizer.eos_token
254
+ log.append("Set pad_token to eos_token")
255
 
256
  print(f"Loaded tokenizer vocabulary size: {len(tokenizer)}")
257