Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -39,20 +39,25 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
|
|
39 |
load_time = time.time() - start_time
|
40 |
print(f"✅ Model loaded in {load_time:.2f}s")
|
41 |
|
42 |
-
# Build
|
43 |
-
|
|
|
|
|
44 |
for user_msg, assistant_msg in conversation:
|
45 |
if user_msg:
|
46 |
-
|
47 |
if assistant_msg:
|
48 |
-
|
49 |
|
50 |
# Add current user message
|
51 |
-
|
52 |
-
conversation_history.append("Athena:")
|
53 |
|
54 |
-
#
|
55 |
-
prompt =
|
|
|
|
|
|
|
|
|
56 |
|
57 |
# Tokenize and move to GPU
|
58 |
inputs = tokenizer(prompt, return_tensors="pt")
|
|
|
39 |
load_time = time.time() - start_time
|
40 |
print(f"✅ Model loaded in {load_time:.2f}s")
|
41 |
|
42 |
+
# Build messages in proper chat format
|
43 |
+
messages = []
|
44 |
+
|
45 |
+
# Add conversation history
|
46 |
for user_msg, assistant_msg in conversation:
|
47 |
if user_msg:
|
48 |
+
messages.append({"role": "user", "content": user_msg})
|
49 |
if assistant_msg:
|
50 |
+
messages.append({"role": "assistant", "content": assistant_msg})
|
51 |
|
52 |
# Add current user message
|
53 |
+
messages.append({"role": "user", "content": user_message})
|
|
|
54 |
|
55 |
+
# Apply chat template
|
56 |
+
prompt = tokenizer.apply_chat_template(
|
57 |
+
messages,
|
58 |
+
tokenize=False,
|
59 |
+
add_generation_prompt=True
|
60 |
+
)
|
61 |
|
62 |
# Tokenize and move to GPU
|
63 |
inputs = tokenizer(prompt, return_tensors="pt")
|