Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -207,16 +207,51 @@ def load_model():
|
|
207 |
log.append(f"Alternative loading also failed: {e2}")
|
208 |
return "\n".join(log)
|
209 |
|
210 |
-
#
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
print(f"Loaded tokenizer vocabulary size: {len(tokenizer)}")
|
222 |
|
|
|
207 |
log.append(f"Alternative loading also failed: {e2}")
|
208 |
return "\n".join(log)
|
209 |
|
210 |
+
# Try to load the tokenizer from the model repository directly
|
211 |
+
progress(0.3, desc="Loading tokenizer...")
|
212 |
+
try:
|
213 |
+
# First attempt: Try loading from local path
|
214 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
215 |
+
local_model_path,
|
216 |
+
padding_side="right",
|
217 |
+
use_fast=True,
|
218 |
+
)
|
219 |
+
log.append("Tokenizer loaded from local files")
|
220 |
+
except Exception as e:
|
221 |
+
log.append(f"Could not load tokenizer from local files: {e}")
|
222 |
+
|
223 |
+
# Second attempt: Try loading directly from HF repo
|
224 |
+
try:
|
225 |
+
log.append("Attempting to load tokenizer directly from Hugging Face...")
|
226 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
227 |
+
hf_model_repo_id,
|
228 |
+
padding_side="right",
|
229 |
+
use_fast=True,
|
230 |
+
)
|
231 |
+
log.append("Tokenizer loaded from Hugging Face repository")
|
232 |
+
except Exception as e2:
|
233 |
+
# Third attempt: Try loading a compatible tokenizer
|
234 |
+
log.append(f"Could not load tokenizer from repo: {e2}")
|
235 |
+
log.append("Attempting to load a compatible LlamaTokenizer...")
|
236 |
+
try:
|
237 |
+
from transformers import LlamaTokenizer
|
238 |
+
|
239 |
+
# Try Meta's standard Llama tokenizer
|
240 |
+
tokenizer = LlamaTokenizer.from_pretrained(
|
241 |
+
"meta-llama/Llama-2-7b-hf", # Standard Llama tokenizer
|
242 |
+
padding_side="right",
|
243 |
+
use_fast=False, # Try the Python version
|
244 |
+
)
|
245 |
+
log.append("Loaded a compatible LlamaTokenizer as fallback")
|
246 |
+
except Exception as e3:
|
247 |
+
error_msg = f"Failed to load any compatible tokenizer: {e3}"
|
248 |
+
log.append(error_msg)
|
249 |
+
return "\n".join(log)
|
250 |
+
|
251 |
+
# Set pad token if not already set
|
252 |
+
if tokenizer.pad_token is None:
|
253 |
+
tokenizer.pad_token = tokenizer.eos_token
|
254 |
+
log.append("Set pad_token to eos_token")
|
255 |
|
256 |
print(f"Loaded tokenizer vocabulary size: {len(tokenizer)}")
|
257 |
|