Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -71,6 +71,9 @@ def load_model_for_zerocpu():
|
|
71 |
|
72 |
# --- Inference Function for Gradio ChatInterface ---
|
73 |
def predict_chat(message: str, history: list):
|
|
|
|
|
|
|
74 |
if model is None or tokenizer is None:
|
75 |
yield "Error: Model or tokenizer failed to load. Please check the Space logs for details."
|
76 |
return
|
@@ -82,6 +85,8 @@ def predict_chat(message: str, history: list):
|
|
82 |
start_time = time.time()
|
83 |
|
84 |
if isinstance(model, AutoModelForCausalLM_GGUF):
|
|
|
|
|
85 |
prompt_input = ""
|
86 |
for msg in messages:
|
87 |
if msg["role"] == "system":
|
@@ -105,13 +110,14 @@ def predict_chat(message: str, history: list):
|
|
105 |
generated_text += token
|
106 |
yield generated_text
|
107 |
|
108 |
-
else:
|
|
|
|
|
109 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
110 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
111 |
|
112 |
outputs = model.generate(
|
113 |
inputs,
|
114 |
-
# Changed max_new_tokens to max_length for broader compatibility
|
115 |
max_length=inputs.shape[-1] + MAX_NEW_TOKENS,
|
116 |
temperature=TEMPERATURE,
|
117 |
top_k=TOP_K,
|
@@ -158,7 +164,6 @@ if __name__ == "__main__":
|
|
158 |
["What's the best way to stay motivated?"],
|
159 |
],
|
160 |
cache_examples=False,
|
161 |
-
# clear_btn="Clear Chat" was removed in the previous step
|
162 |
)
|
163 |
|
164 |
demo.chatbot.value = initial_messages_for_value
|
|
|
71 |
|
72 |
# --- Inference Function for Gradio ChatInterface ---
|
73 |
def predict_chat(message: str, history: list):
|
74 |
+
# NEW DIAGNOSTIC PRINT: Check model type at the start of prediction
|
75 |
+
print(f"Model type in predict_chat: {type(model)}")
|
76 |
+
|
77 |
if model is None or tokenizer is None:
|
78 |
yield "Error: Model or tokenizer failed to load. Please check the Space logs for details."
|
79 |
return
|
|
|
85 |
start_time = time.time()
|
86 |
|
87 |
if isinstance(model, AutoModelForCausalLM_GGUF):
|
88 |
+
# NEW DIAGNOSTIC PRINT: Confirm GGUF path is taken
|
89 |
+
print("Using GGUF model generation path.")
|
90 |
prompt_input = ""
|
91 |
for msg in messages:
|
92 |
if msg["role"] == "system":
|
|
|
110 |
generated_text += token
|
111 |
yield generated_text
|
112 |
|
113 |
+
else:
|
114 |
+
# NEW DIAGNOSTIC PRINT: Confirm standard Hugging Face path is taken
|
115 |
+
print("Using standard Hugging Face model generation path.")
|
116 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
117 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
118 |
|
119 |
outputs = model.generate(
|
120 |
inputs,
|
|
|
121 |
max_length=inputs.shape[-1] + MAX_NEW_TOKENS,
|
122 |
temperature=TEMPERATURE,
|
123 |
top_k=TOP_K,
|
|
|
164 |
["What's the best way to stay motivated?"],
|
165 |
],
|
166 |
cache_examples=False,
|
|
|
167 |
)
|
168 |
|
169 |
demo.chatbot.value = initial_messages_for_value
|