Spaces:
Running
Running
update initialize_model_once and create_llm_pipeline to handle error Failed to create pipeline: 'model'
Browse files
app.py
CHANGED
@@ -224,21 +224,37 @@ def initialize_model_once(model_key):
|
|
224 |
print(f"Error loading model {model_name}: {str(e)}")
|
225 |
print(traceback.format_exc())
|
226 |
raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"], MODEL_CACHE.get("is_gguf", False)
|
229 |
|
230 |
def create_llm_pipeline(model_key):
|
231 |
-
"""Create a new pipeline using the specified model"""
|
232 |
try:
|
233 |
print(f"Creating pipeline for model: {model_key}")
|
234 |
tokenizer, model, is_gguf = initialize_model_once(model_key)
|
235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
# Get the model info for reference
|
237 |
-
|
238 |
|
239 |
-
if model is None:
|
240 |
-
raise ValueError(f"Model is None for {model_key}")
|
241 |
-
|
242 |
# For GGUF models from llama-cpp-python
|
243 |
if is_gguf:
|
244 |
# Create adapter to use GGUF model like HF pipeline
|
|
|
224 |
print(f"Error loading model {model_name}: {str(e)}")
|
225 |
print(traceback.format_exc())
|
226 |
raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")
|
227 |
+
|
228 |
+
# Final verification that model loaded correctly
|
229 |
+
if MODEL_CACHE["model"] is None:
|
230 |
+
print(f"WARNING: Model {model_name} appears to be None after loading")
|
231 |
+
# Try to free memory before returning
|
232 |
+
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
233 |
+
gc.collect()
|
234 |
+
|
235 |
return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"], MODEL_CACHE.get("is_gguf", False)
|
236 |
|
237 |
def create_llm_pipeline(model_key):
|
238 |
+
"""Create a new pipeline using the specified model with better error handling"""
|
239 |
try:
|
240 |
print(f"Creating pipeline for model: {model_key}")
|
241 |
tokenizer, model, is_gguf = initialize_model_once(model_key)
|
242 |
|
243 |
+
# Additional check to ensure model was properly loaded
|
244 |
+
if model is None:
|
245 |
+
print(f"Model is None for {model_key}, falling back to alternate model")
|
246 |
+
fallback_model = get_fallback_model(model_key)
|
247 |
+
if fallback_model != model_key:
|
248 |
+
print(f"Attempting to use fallback model: {fallback_model}")
|
249 |
+
tokenizer, model, is_gguf = initialize_model_once(fallback_model)
|
250 |
+
if model is None:
|
251 |
+
raise ValueError(f"Both original and fallback models failed to load")
|
252 |
+
else:
|
253 |
+
raise ValueError(f"Model is None and no fallback available")
|
254 |
+
|
255 |
# Get the model info for reference
|
256 |
+
model_info = MODEL_CONFIG.get(model_key, MODEL_CONFIG.get(fallback_model, {}))
|
257 |
|
|
|
|
|
|
|
258 |
# For GGUF models from llama-cpp-python
|
259 |
if is_gguf:
|
260 |
# Create adapter to use GGUF model like HF pipeline
|