Ali2206 commited on
Commit
bdb999a
·
verified ·
1 Parent(s): ac11a7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -12,7 +12,7 @@ import re
12
  import psutil
13
  import subprocess
14
  import traceback
15
- import torch # For checking CUDA availability
16
 
17
  # Set VLLM logging level to DEBUG for detailed output
18
  os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
@@ -35,7 +35,7 @@ vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
35
  for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
36
  os.makedirs(directory, exist_ok=True)
37
 
38
- # Update environment variables to use HF_HOME
39
  os.environ["HF_HOME"] = model_cache_dir
40
  os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
41
  os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
@@ -208,7 +208,6 @@ Medical Records:
208
 
209
  # Initialize response tracking
210
  full_response = ""
211
- last_update_time = 0
212
  response_chunks = []
213
 
214
  # Process streaming response
@@ -224,10 +223,12 @@ Medical Records:
224
  try:
225
  if chunk is None:
226
  continue
227
-
228
  # Handle different chunk types
229
  if isinstance(chunk, str):
230
  chunk_content = chunk
 
 
231
  elif isinstance(chunk, list):
232
  chunk_content = "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
233
  else:
@@ -240,11 +241,14 @@ Medical Records:
240
  response_chunks.append(chunk_content)
241
  full_response = "".join(response_chunks)
242
 
 
 
 
243
  # Update the chat history with the latest response
244
  if len(history) > 0 and history[-1]["role"] == "assistant":
245
- history[-1]["content"] = full_response
246
  else:
247
- history.append({"role": "assistant", "content": full_response})
248
 
249
  yield history, None
250
 
@@ -256,6 +260,9 @@ Medical Records:
256
  # Final response handling
257
  if not full_response:
258
  full_response = "⚠️ No clear oversights identified or model output was invalid."
 
 
 
259
 
260
  # Save report if we have files
261
  report_path = None
 
12
  import psutil
13
  import subprocess
14
  import traceback
15
+ import torch
16
 
17
  # Set VLLM logging level to DEBUG for detailed output
18
  os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
 
35
  for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
36
  os.makedirs(directory, exist_ok=True)
37
 
38
+ # Update environment variables
39
  os.environ["HF_HOME"] = model_cache_dir
40
  os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
41
  os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
 
208
 
209
  # Initialize response tracking
210
  full_response = ""
 
211
  response_chunks = []
212
 
213
  # Process streaming response
 
223
  try:
224
  if chunk is None:
225
  continue
226
+
227
  # Handle different chunk types
228
  if isinstance(chunk, str):
229
  chunk_content = chunk
230
+ elif hasattr(chunk, 'content'):
231
+ chunk_content = chunk.content
232
  elif isinstance(chunk, list):
233
  chunk_content = "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
234
  else:
 
241
  response_chunks.append(chunk_content)
242
  full_response = "".join(response_chunks)
243
 
244
+ # Remove any tool call markers from the displayed response
245
+ display_response = full_response.split('[TOOL_CALLS]')[0].strip()
246
+
247
  # Update the chat history with the latest response
248
  if len(history) > 0 and history[-1]["role"] == "assistant":
249
+ history[-1]["content"] = display_response
250
  else:
251
+ history.append({"role": "assistant", "content": display_response})
252
 
253
  yield history, None
254
 
 
260
  # Final response handling
261
  if not full_response:
262
  full_response = "⚠️ No clear oversights identified or model output was invalid."
263
+ else:
264
+ # Clean up the final response
265
+ full_response = full_response.split('[TOOL_CALLS]')[0].strip()
266
 
267
  # Save report if we have files
268
  report_path = None