CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 14

Commit

47975be

verified ·

1 Parent(s): 41c4b2a

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -3

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ import torch
 import copy
 import time
-# Configure environment variables
 os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
 if not torch.cuda.is_available():
     print("No GPU detected. Forcing CPU mode by setting CUDA_VISIBLE_DEVICES to an empty string.")
@@ -60,9 +60,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages[:3]):
                 text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
@@ -85,7 +87,8 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
         elif file_type == "csv":
-            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
@@ -153,7 +156,7 @@ def init_agent():
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
-        # Persistent conversation state
         conversation_state = gr.State([])
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
         file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
@@ -167,6 +170,7 @@ def create_ui(agent):
             history = state
             history.append({"role": "user", "content": message})
             history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
             yield copy.deepcopy(history), None, copy.deepcopy(history)
             extracted = ""
@@ -221,6 +225,7 @@ Medical Records:
                     response_chunks.append(chunk_content)
                     full_response = "".join(response_chunks)
                     matches = re.findall(r"\[TOOL_CALLS\]\[(.*?)\]", chunk_content, re.DOTALL)
                     for m in matches:
                         tool_calls_rendered.append(f"\n📦 Tool Call: [{m.strip()}]")
@@ -234,6 +239,7 @@ Medical Records:
                     else:
                         history.append({"role": "assistant", "content": display_response})
                     yield copy.deepcopy(history), None, copy.deepcopy(history)
                 full_response = re.sub(r"\[TOOL_CALLS\].*?\n*", "", full_response, flags=re.DOTALL).strip()

 import copy
 import time
+# Configure environment variables and logging
 os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
 if not torch.cuda.is_available():
     print("No GPU detected. Forcing CPU mode by setting CUDA_VISIBLE_DEVICES to an empty string.")
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
+            # Always extract the first 3 pages
             for i, page in enumerate(pdf.pages[:3]):
                 text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
+            # For pages 4 to max_pages, add only if medical keywords are found
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
         elif file_type == "csv":
+            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
+                             skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
+        # Persistent conversation state to maintain history
         conversation_state = gr.State([])
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
         file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
             history = state
             history.append({"role": "user", "content": message})
             history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
+            # Yield the initial update
             yield copy.deepcopy(history), None, copy.deepcopy(history)
             extracted = ""
                     response_chunks.append(chunk_content)
                     full_response = "".join(response_chunks)
+                    # Collect and render any tool calls
                     matches = re.findall(r"\[TOOL_CALLS\]\[(.*?)\]", chunk_content, re.DOTALL)
                     for m in matches:
                         tool_calls_rendered.append(f"\n📦 Tool Call: [{m.strip()}]")
                     else:
                         history.append({"role": "assistant", "content": display_response})
+                    # Yield updated conversation state
                     yield copy.deepcopy(history), None, copy.deepcopy(history)
                 full_response = re.sub(r"\[TOOL_CALLS\].*?\n*", "", full_response, flags=re.DOTALL).strip()