Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ import re
|
|
12 |
import psutil
|
13 |
import subprocess
|
14 |
import traceback
|
15 |
-
import torch
|
16 |
|
17 |
# Set VLLM logging level to DEBUG for detailed output
|
18 |
os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
|
@@ -35,7 +35,7 @@ vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
|
|
35 |
for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
|
36 |
os.makedirs(directory, exist_ok=True)
|
37 |
|
38 |
-
# Update environment variables
|
39 |
os.environ["HF_HOME"] = model_cache_dir
|
40 |
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
|
41 |
os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
|
@@ -208,7 +208,6 @@ Medical Records:
|
|
208 |
|
209 |
# Initialize response tracking
|
210 |
full_response = ""
|
211 |
-
last_update_time = 0
|
212 |
response_chunks = []
|
213 |
|
214 |
# Process streaming response
|
@@ -224,10 +223,12 @@ Medical Records:
|
|
224 |
try:
|
225 |
if chunk is None:
|
226 |
continue
|
227 |
-
|
228 |
# Handle different chunk types
|
229 |
if isinstance(chunk, str):
|
230 |
chunk_content = chunk
|
|
|
|
|
231 |
elif isinstance(chunk, list):
|
232 |
chunk_content = "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
|
233 |
else:
|
@@ -240,11 +241,14 @@ Medical Records:
|
|
240 |
response_chunks.append(chunk_content)
|
241 |
full_response = "".join(response_chunks)
|
242 |
|
|
|
|
|
|
|
243 |
# Update the chat history with the latest response
|
244 |
if len(history) > 0 and history[-1]["role"] == "assistant":
|
245 |
-
history[-1]["content"] =
|
246 |
else:
|
247 |
-
history.append({"role": "assistant", "content":
|
248 |
|
249 |
yield history, None
|
250 |
|
@@ -256,6 +260,9 @@ Medical Records:
|
|
256 |
# Final response handling
|
257 |
if not full_response:
|
258 |
full_response = "⚠️ No clear oversights identified or model output was invalid."
|
|
|
|
|
|
|
259 |
|
260 |
# Save report if we have files
|
261 |
report_path = None
|
|
|
12 |
import psutil
|
13 |
import subprocess
|
14 |
import traceback
|
15 |
+
import torch
|
16 |
|
17 |
# Set VLLM logging level to DEBUG for detailed output
|
18 |
os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
|
|
|
35 |
for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
|
36 |
os.makedirs(directory, exist_ok=True)
|
37 |
|
38 |
+
# Update environment variables
|
39 |
os.environ["HF_HOME"] = model_cache_dir
|
40 |
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
|
41 |
os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
|
|
|
208 |
|
209 |
# Initialize response tracking
|
210 |
full_response = ""
|
|
|
211 |
response_chunks = []
|
212 |
|
213 |
# Process streaming response
|
|
|
223 |
try:
|
224 |
if chunk is None:
|
225 |
continue
|
226 |
+
|
227 |
# Handle different chunk types
|
228 |
if isinstance(chunk, str):
|
229 |
chunk_content = chunk
|
230 |
+
elif hasattr(chunk, 'content'):
|
231 |
+
chunk_content = chunk.content
|
232 |
elif isinstance(chunk, list):
|
233 |
chunk_content = "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
|
234 |
else:
|
|
|
241 |
response_chunks.append(chunk_content)
|
242 |
full_response = "".join(response_chunks)
|
243 |
|
244 |
+
# Remove any tool call markers from the displayed response
|
245 |
+
display_response = full_response.split('[TOOL_CALLS]')[0].strip()
|
246 |
+
|
247 |
# Update the chat history with the latest response
|
248 |
if len(history) > 0 and history[-1]["role"] == "assistant":
|
249 |
+
history[-1]["content"] = display_response
|
250 |
else:
|
251 |
+
history.append({"role": "assistant", "content": display_response})
|
252 |
|
253 |
yield history, None
|
254 |
|
|
|
260 |
# Final response handling
|
261 |
if not full_response:
|
262 |
full_response = "⚠️ No clear oversights identified or model output was invalid."
|
263 |
+
else:
|
264 |
+
# Clean up the final response
|
265 |
+
full_response = full_response.split('[TOOL_CALLS]')[0].strip()
|
266 |
|
267 |
# Save report if we have files
|
268 |
report_path = None
|