CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 13

Commit

65a2e99

verified ·

1 Parent(s): 38b7c69

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -27

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import pdfplumber
 import json
 import gradio as gr
-from typing import List, Optional
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import hashlib
 import shutil
@@ -14,25 +14,21 @@ from threading import Thread
 import re
 import tempfile
-# Environment setup
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
 # Cache directories
 base_dir = "/data"
-os.makedirs(base_dir, exist_ok=True)
 model_cache_dir = os.path.join(base_dir, "txagent_models")
 tool_cache_dir = os.path.join(base_dir, "tool_cache")
 file_cache_dir = os.path.join(base_dir, "cache")
-report_dir = "/data/reports"
 vllm_cache_dir = os.path.join(base_dir, "vllm_cache")
-os.makedirs(model_cache_dir, exist_ok=True)
-os.makedirs(tool_cache_dir, exist_ok=True)
-os.makedirs(file_cache_dir, exist_ok=True)
-os.makedirs(report_dir, exist_ok=True)
-os.makedirs(vllm_cache_dir, exist_ok=True)
 os.environ.update({
     "TRANSFORMERS_CACHE": model_cache_dir,
@@ -64,7 +60,7 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
                 text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
-                if any(re.search(rf'\\b{kw}\\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
                     text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
     except Exception as e:
@@ -81,12 +77,10 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
             Thread(target=full_pdf_processing, args=(file_path, h)).start()
         elif file_type == "csv":
             df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
@@ -94,7 +88,6 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
@@ -154,20 +147,21 @@ def create_ui(agent: TxAgent):
         download_output = gr.File(label="Download Full Report")
         def analyze_potential_oversights(message: str, history: list, conversation: list, files: list):
-            start_time = time.time()
             try:
-                history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."}]
                 yield history, None
                 extracted_data = ""
                 file_hash_value = ""
                 if files and isinstance(files, list):
                     with ThreadPoolExecutor(max_workers=4) as executor:
                         futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files if hasattr(f, 'name')]
                         extracted_data = "\n".join([sanitize_utf8(f.result()) for f in as_completed(futures)])
                         file_hash_value = file_hash(files[0].name) if files else ""
-                analysis_prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
 1. List potential missed diagnoses
 2. Flag any medication conflicts
 3. Note incomplete assessments
@@ -177,9 +171,9 @@ Medical Records:\n{extracted_data[:15000]}
 ### Potential Oversights:\n"""
-                response = ""
                 for chunk in agent.run_gradio_chat(
-                    message=analysis_prompt,
                     history=[],
                     temperature=0.2,
                     max_new_tokens=1024,
@@ -188,16 +182,13 @@ Medical Records:\n{extracted_data[:15000]}
                     conversation=conversation
                 ):
                     if isinstance(chunk, str):
-                        response += chunk
                     elif isinstance(chunk, list):
-                        response += "".join([c.content for c in chunk if hasattr(c, 'content')])
-                    cleaned = response.replace("[TOOL_CALLS]", "").strip()
-                    yield history[:-1] + [{"role": "assistant", "content": cleaned}], None
-                final_output = response.replace("[TOOL_CALLS]", "").strip()
-                if not final_output:
-                    final_output = "No clear oversights identified. Recommend comprehensive review."
                 report_path = None
                 if file_hash_value:
@@ -205,7 +196,7 @@ Medical Records:\n{extracted_data[:15000]}
                     if os.path.exists(possible_report):
                         report_path = possible_report
-                history = history[:-1] + [{"role": "assistant", "content": final_output}]
                 yield history, report_path
             except Exception as e:

 import pdfplumber
 import json
 import gradio as gr
+from typing import List
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import hashlib
 import shutil
 import re
 import tempfile
+# Setup paths
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
 # Cache directories
 base_dir = "/data"
 model_cache_dir = os.path.join(base_dir, "txagent_models")
 tool_cache_dir = os.path.join(base_dir, "tool_cache")
 file_cache_dir = os.path.join(base_dir, "cache")
+report_dir = os.path.join(base_dir, "reports")
 vllm_cache_dir = os.path.join(base_dir, "vllm_cache")
+for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
+    os.makedirs(d, exist_ok=True)
 os.environ.update({
     "TRANSFORMERS_CACHE": model_cache_dir,
                 text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
+                if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
                     text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
     except Exception as e:
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
             Thread(target=full_pdf_processing, args=(file_path, h)).start()
         elif file_type == "csv":
             df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
         download_output = gr.File(label="Download Full Report")
         def analyze_potential_oversights(message: str, history: list, conversation: list, files: list):
             try:
+                history.append({"role": "user", "content": message})
+                history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
                 yield history, None
                 extracted_data = ""
                 file_hash_value = ""
                 if files and isinstance(files, list):
                     with ThreadPoolExecutor(max_workers=4) as executor:
                         futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files if hasattr(f, 'name')]
                         extracted_data = "\n".join([sanitize_utf8(f.result()) for f in as_completed(futures)])
                         file_hash_value = file_hash(files[0].name) if files else ""
+                prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
 1. List potential missed diagnoses
 2. Flag any medication conflicts
 3. Note incomplete assessments
 ### Potential Oversights:\n"""
+                final_output = ""
                 for chunk in agent.run_gradio_chat(
+                    message=prompt,
                     history=[],
                     temperature=0.2,
                     max_new_tokens=1024,
                     conversation=conversation
                 ):
                     if isinstance(chunk, str):
+                        final_output += chunk
                     elif isinstance(chunk, list):
+                        final_output += "".join([c.content for c in chunk if hasattr(c, 'content')])
+                cleaned = final_output.replace("[TOOL_CALLS]", "").strip()
+                if not cleaned:
+                    cleaned = "No clear oversights identified. Recommend comprehensive review."
                 report_path = None
                 if file_hash_value:
                     if os.path.exists(possible_report):
                         report_path = possible_report
+                history[-1] = {"role": "assistant", "content": cleaned}
                 yield history, report_path
             except Exception as e: