Spaces:

ajsbsd
/

smollm2-zerocpu-demo

Running

ajsbsd commited on Jun 16

Commit

87e021b

verified ·

1 Parent(s): 85c828a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import time
 # --- Try to import ctransformers for GGUF, provide helpful message if not found ---
 try:
     from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
     from transformers import AutoTokenizer, AutoModelForCausalLM
     GGUF_AVAILABLE = True
 except ImportError:
@@ -71,7 +73,6 @@ def load_model_for_zerocpu():
 # --- Inference Function for Gradio ChatInterface ---
 def predict_chat(message: str, history: list):
-    # NEW DIAGNOSTIC PRINT: Check model type at the start of prediction
     print(f"Model type in predict_chat: {type(model)}")
     if model is None or tokenizer is None:
@@ -84,8 +85,8 @@ def predict_chat(message: str, history: list):
     generated_text = ""
     start_time = time.time()
-    if isinstance(model, AutoModelForCausalLM_GGUF):
-        # NEW DIAGNOSTIC PRINT: Confirm GGUF path is taken
         print("Using GGUF model generation path.")
         prompt_input = ""
         for msg in messages:
@@ -111,7 +112,6 @@ def predict_chat(message: str, history: list):
             yield generated_text
     else:
-        # NEW DIAGNOSTIC PRINT: Confirm standard Hugging Face path is taken
         print("Using standard Hugging Face model generation path.")
         input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)

 # --- Try to import ctransformers for GGUF, provide helpful message if not found ---
 try:
     from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
+    # Import LLM directly as it's the actual type of the loaded model
+    from ctransformers.llm import LLM
     from transformers import AutoTokenizer, AutoModelForCausalLM
     GGUF_AVAILABLE = True
 except ImportError:
 # --- Inference Function for Gradio ChatInterface ---
 def predict_chat(message: str, history: list):
     print(f"Model type in predict_chat: {type(model)}")
     if model is None or tokenizer is None:
     generated_text = ""
     start_time = time.time()
+    # CORRECTED: Check against ctransformers.llm.LLM directly
+    if GGUF_AVAILABLE and isinstance(model, LLM):
         print("Using GGUF model generation path.")
         prompt_input = ""
         for msg in messages:
             yield generated_text
     else:
         print("Using standard Hugging Face model generation path.")
         input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)