Ali2206 commited on
Commit
47975be
·
verified ·
1 Parent(s): 41c4b2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -16,7 +16,7 @@ import torch
16
  import copy
17
  import time
18
 
19
- # Configure environment variables
20
  os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
21
  if not torch.cuda.is_available():
22
  print("No GPU detected. Forcing CPU mode by setting CUDA_VISIBLE_DEVICES to an empty string.")
@@ -60,9 +60,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
60
  try:
61
  text_chunks = []
62
  with pdfplumber.open(file_path) as pdf:
 
63
  for i, page in enumerate(pdf.pages[:3]):
64
  text = page.extract_text() or ""
65
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
 
66
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
67
  page_text = page.extract_text() or ""
68
  if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
@@ -85,7 +87,8 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
85
  text = extract_priority_pages(file_path)
86
  result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
87
  elif file_type == "csv":
88
- df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
 
89
  content = df.fillna("").astype(str).values.tolist()
90
  result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
91
  elif file_type in ["xls", "xlsx"]:
@@ -153,7 +156,7 @@ def init_agent():
153
  def create_ui(agent):
154
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
155
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
156
- # Persistent conversation state
157
  conversation_state = gr.State([])
158
  chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
159
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
@@ -167,6 +170,7 @@ def create_ui(agent):
167
  history = state
168
  history.append({"role": "user", "content": message})
169
  history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
 
170
  yield copy.deepcopy(history), None, copy.deepcopy(history)
171
 
172
  extracted = ""
@@ -221,6 +225,7 @@ Medical Records:
221
  response_chunks.append(chunk_content)
222
  full_response = "".join(response_chunks)
223
 
 
224
  matches = re.findall(r"\[TOOL_CALLS\]\[(.*?)\]", chunk_content, re.DOTALL)
225
  for m in matches:
226
  tool_calls_rendered.append(f"\n📦 Tool Call: [{m.strip()}]")
@@ -234,6 +239,7 @@ Medical Records:
234
  else:
235
  history.append({"role": "assistant", "content": display_response})
236
 
 
237
  yield copy.deepcopy(history), None, copy.deepcopy(history)
238
 
239
  full_response = re.sub(r"\[TOOL_CALLS\].*?\n*", "", full_response, flags=re.DOTALL).strip()
 
16
  import copy
17
  import time
18
 
19
+ # Configure environment variables and logging
20
  os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
21
  if not torch.cuda.is_available():
22
  print("No GPU detected. Forcing CPU mode by setting CUDA_VISIBLE_DEVICES to an empty string.")
 
60
  try:
61
  text_chunks = []
62
  with pdfplumber.open(file_path) as pdf:
63
+ # Always extract the first 3 pages
64
  for i, page in enumerate(pdf.pages[:3]):
65
  text = page.extract_text() or ""
66
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
67
+ # For pages 4 to max_pages, add only if medical keywords are found
68
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
69
  page_text = page.extract_text() or ""
70
  if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
 
87
  text = extract_priority_pages(file_path)
88
  result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
89
  elif file_type == "csv":
90
+ df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
91
+ skip_blank_lines=False, on_bad_lines="skip")
92
  content = df.fillna("").astype(str).values.tolist()
93
  result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
94
  elif file_type in ["xls", "xlsx"]:
 
156
  def create_ui(agent):
157
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
158
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
159
+ # Persistent conversation state to maintain history
160
  conversation_state = gr.State([])
161
  chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
162
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
 
170
  history = state
171
  history.append({"role": "user", "content": message})
172
  history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
173
+ # Yield the initial update
174
  yield copy.deepcopy(history), None, copy.deepcopy(history)
175
 
176
  extracted = ""
 
225
  response_chunks.append(chunk_content)
226
  full_response = "".join(response_chunks)
227
 
228
+ # Collect and render any tool calls
229
  matches = re.findall(r"\[TOOL_CALLS\]\[(.*?)\]", chunk_content, re.DOTALL)
230
  for m in matches:
231
  tool_calls_rendered.append(f"\n📦 Tool Call: [{m.strip()}]")
 
239
  else:
240
  history.append({"role": "assistant", "content": display_response})
241
 
242
+ # Yield updated conversation state
243
  yield copy.deepcopy(history), None, copy.deepcopy(history)
244
 
245
  full_response = re.sub(r"\[TOOL_CALLS\].*?\n*", "", full_response, flags=re.DOTALL).strip()