Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ import torch
|
|
| 16 |
import copy
|
| 17 |
import time
|
| 18 |
|
| 19 |
-
# Configure environment variables
|
| 20 |
os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
|
| 21 |
if not torch.cuda.is_available():
|
| 22 |
print("No GPU detected. Forcing CPU mode by setting CUDA_VISIBLE_DEVICES to an empty string.")
|
|
@@ -60,9 +60,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
|
|
| 60 |
try:
|
| 61 |
text_chunks = []
|
| 62 |
with pdfplumber.open(file_path) as pdf:
|
|
|
|
| 63 |
for i, page in enumerate(pdf.pages[:3]):
|
| 64 |
text = page.extract_text() or ""
|
| 65 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
|
|
|
| 66 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
| 67 |
page_text = page.extract_text() or ""
|
| 68 |
if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
|
@@ -85,7 +87,8 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
|
|
| 85 |
text = extract_priority_pages(file_path)
|
| 86 |
result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
|
| 87 |
elif file_type == "csv":
|
| 88 |
-
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
|
|
|
|
| 89 |
content = df.fillna("").astype(str).values.tolist()
|
| 90 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
| 91 |
elif file_type in ["xls", "xlsx"]:
|
|
@@ -153,7 +156,7 @@ def init_agent():
|
|
| 153 |
def create_ui(agent):
|
| 154 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 155 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
| 156 |
-
# Persistent conversation state
|
| 157 |
conversation_state = gr.State([])
|
| 158 |
chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
|
| 159 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
|
@@ -167,6 +170,7 @@ def create_ui(agent):
|
|
| 167 |
history = state
|
| 168 |
history.append({"role": "user", "content": message})
|
| 169 |
history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
|
|
|
|
| 170 |
yield copy.deepcopy(history), None, copy.deepcopy(history)
|
| 171 |
|
| 172 |
extracted = ""
|
|
@@ -221,6 +225,7 @@ Medical Records:
|
|
| 221 |
response_chunks.append(chunk_content)
|
| 222 |
full_response = "".join(response_chunks)
|
| 223 |
|
|
|
|
| 224 |
matches = re.findall(r"\[TOOL_CALLS\]\[(.*?)\]", chunk_content, re.DOTALL)
|
| 225 |
for m in matches:
|
| 226 |
tool_calls_rendered.append(f"\n📦 Tool Call: [{m.strip()}]")
|
|
@@ -234,6 +239,7 @@ Medical Records:
|
|
| 234 |
else:
|
| 235 |
history.append({"role": "assistant", "content": display_response})
|
| 236 |
|
|
|
|
| 237 |
yield copy.deepcopy(history), None, copy.deepcopy(history)
|
| 238 |
|
| 239 |
full_response = re.sub(r"\[TOOL_CALLS\].*?\n*", "", full_response, flags=re.DOTALL).strip()
|
|
|
|
| 16 |
import copy
|
| 17 |
import time
|
| 18 |
|
| 19 |
+
# Configure environment variables and logging
|
| 20 |
os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
|
| 21 |
if not torch.cuda.is_available():
|
| 22 |
print("No GPU detected. Forcing CPU mode by setting CUDA_VISIBLE_DEVICES to an empty string.")
|
|
|
|
| 60 |
try:
|
| 61 |
text_chunks = []
|
| 62 |
with pdfplumber.open(file_path) as pdf:
|
| 63 |
+
# Always extract the first 3 pages
|
| 64 |
for i, page in enumerate(pdf.pages[:3]):
|
| 65 |
text = page.extract_text() or ""
|
| 66 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
| 67 |
+
# For pages 4 to max_pages, add only if medical keywords are found
|
| 68 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
| 69 |
page_text = page.extract_text() or ""
|
| 70 |
if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
|
|
|
| 87 |
text = extract_priority_pages(file_path)
|
| 88 |
result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
|
| 89 |
elif file_type == "csv":
|
| 90 |
+
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
|
| 91 |
+
skip_blank_lines=False, on_bad_lines="skip")
|
| 92 |
content = df.fillna("").astype(str).values.tolist()
|
| 93 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
| 94 |
elif file_type in ["xls", "xlsx"]:
|
|
|
|
| 156 |
def create_ui(agent):
|
| 157 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 158 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
| 159 |
+
# Persistent conversation state to maintain history
|
| 160 |
conversation_state = gr.State([])
|
| 161 |
chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
|
| 162 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
|
|
|
| 170 |
history = state
|
| 171 |
history.append({"role": "user", "content": message})
|
| 172 |
history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
|
| 173 |
+
# Yield the initial update
|
| 174 |
yield copy.deepcopy(history), None, copy.deepcopy(history)
|
| 175 |
|
| 176 |
extracted = ""
|
|
|
|
| 225 |
response_chunks.append(chunk_content)
|
| 226 |
full_response = "".join(response_chunks)
|
| 227 |
|
| 228 |
+
# Collect and render any tool calls
|
| 229 |
matches = re.findall(r"\[TOOL_CALLS\]\[(.*?)\]", chunk_content, re.DOTALL)
|
| 230 |
for m in matches:
|
| 231 |
tool_calls_rendered.append(f"\n📦 Tool Call: [{m.strip()}]")
|
|
|
|
| 239 |
else:
|
| 240 |
history.append({"role": "assistant", "content": display_response})
|
| 241 |
|
| 242 |
+
# Yield updated conversation state
|
| 243 |
yield copy.deepcopy(history), None, copy.deepcopy(history)
|
| 244 |
|
| 245 |
full_response = re.sub(r"\[TOOL_CALLS\].*?\n*", "", full_response, flags=re.DOTALL).strip()
|