hmrizal commited on
Commit
399135b
·
verified ·
1 Parent(s): 0275bb1

update initialize_model_once and create_llm_pipeline to handle error Failed to create pipeline: 'model'

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -224,21 +224,37 @@ def initialize_model_once(model_key):
224
  print(f"Error loading model {model_name}: {str(e)}")
225
  print(traceback.format_exc())
226
  raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")
227
-
 
 
 
 
 
 
 
228
  return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"], MODEL_CACHE.get("is_gguf", False)
229
 
230
  def create_llm_pipeline(model_key):
231
- """Create a new pipeline using the specified model"""
232
  try:
233
  print(f"Creating pipeline for model: {model_key}")
234
  tokenizer, model, is_gguf = initialize_model_once(model_key)
235
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  # Get the model info for reference
237
- model_info = MODEL_CONFIG[model_key]
238
 
239
- if model is None:
240
- raise ValueError(f"Model is None for {model_key}")
241
-
242
  # For GGUF models from llama-cpp-python
243
  if is_gguf:
244
  # Create adapter to use GGUF model like HF pipeline
 
224
  print(f"Error loading model {model_name}: {str(e)}")
225
  print(traceback.format_exc())
226
  raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")
227
+
228
+ # Final verification that model loaded correctly
229
+ if MODEL_CACHE["model"] is None:
230
+ print(f"WARNING: Model {model_name} appears to be None after loading")
231
+ # Try to free memory before returning
232
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
233
+ gc.collect()
234
+
235
  return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"], MODEL_CACHE.get("is_gguf", False)
236
 
237
  def create_llm_pipeline(model_key):
238
+ """Create a new pipeline using the specified model with better error handling"""
239
  try:
240
  print(f"Creating pipeline for model: {model_key}")
241
  tokenizer, model, is_gguf = initialize_model_once(model_key)
242
 
243
+ # Additional check to ensure model was properly loaded
244
+ if model is None:
245
+ print(f"Model is None for {model_key}, falling back to alternate model")
246
+ fallback_model = get_fallback_model(model_key)
247
+ if fallback_model != model_key:
248
+ print(f"Attempting to use fallback model: {fallback_model}")
249
+ tokenizer, model, is_gguf = initialize_model_once(fallback_model)
250
+ if model is None:
251
+ raise ValueError(f"Both original and fallback models failed to load")
252
+ else:
253
+ raise ValueError(f"Model is None and no fallback available")
254
+
255
  # Get the model info for reference
256
+ model_info = MODEL_CONFIG.get(model_key, MODEL_CONFIG.get(fallback_model, {}))
257
 
 
 
 
258
  # For GGUF models from llama-cpp-python
259
  if is_gguf:
260
  # Create adapter to use GGUF model like HF pipeline