Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ import time
|
|
6 |
# --- Try to import ctransformers for GGUF, provide helpful message if not found ---
|
7 |
try:
|
8 |
from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
|
|
|
|
|
9 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
10 |
GGUF_AVAILABLE = True
|
11 |
except ImportError:
|
@@ -71,7 +73,6 @@ def load_model_for_zerocpu():
|
|
71 |
|
72 |
# --- Inference Function for Gradio ChatInterface ---
|
73 |
def predict_chat(message: str, history: list):
|
74 |
-
# NEW DIAGNOSTIC PRINT: Check model type at the start of prediction
|
75 |
print(f"Model type in predict_chat: {type(model)}")
|
76 |
|
77 |
if model is None or tokenizer is None:
|
@@ -84,8 +85,8 @@ def predict_chat(message: str, history: list):
|
|
84 |
generated_text = ""
|
85 |
start_time = time.time()
|
86 |
|
87 |
-
|
88 |
-
|
89 |
print("Using GGUF model generation path.")
|
90 |
prompt_input = ""
|
91 |
for msg in messages:
|
@@ -111,7 +112,6 @@ def predict_chat(message: str, history: list):
|
|
111 |
yield generated_text
|
112 |
|
113 |
else:
|
114 |
-
# NEW DIAGNOSTIC PRINT: Confirm standard Hugging Face path is taken
|
115 |
print("Using standard Hugging Face model generation path.")
|
116 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
117 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
|
|
6 |
# --- Try to import ctransformers for GGUF, provide helpful message if not found ---
|
7 |
try:
|
8 |
from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
|
9 |
+
# Import LLM directly as it's the actual type of the loaded model
|
10 |
+
from ctransformers.llm import LLM
|
11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
12 |
GGUF_AVAILABLE = True
|
13 |
except ImportError:
|
|
|
73 |
|
74 |
# --- Inference Function for Gradio ChatInterface ---
|
75 |
def predict_chat(message: str, history: list):
|
|
|
76 |
print(f"Model type in predict_chat: {type(model)}")
|
77 |
|
78 |
if model is None or tokenizer is None:
|
|
|
85 |
generated_text = ""
|
86 |
start_time = time.time()
|
87 |
|
88 |
+
# CORRECTED: Check against ctransformers.llm.LLM directly
|
89 |
+
if GGUF_AVAILABLE and isinstance(model, LLM):
|
90 |
print("Using GGUF model generation path.")
|
91 |
prompt_input = ""
|
92 |
for msg in messages:
|
|
|
112 |
yield generated_text
|
113 |
|
114 |
else:
|
|
|
115 |
print("Using standard Hugging Face model generation path.")
|
116 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
117 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|