CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 12

Commit

722c891

verified ·

1 Parent(s): fddf521

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -41

app.py CHANGED Viewed

@@ -52,51 +52,41 @@ def file_hash(path: str) -> str:
         return hashlib.md5(f.read()).hexdigest()
 def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
-    """Fast extraction of first pages and medically relevant sections"""
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
-            # Always process first 3 pages
             for i, page in enumerate(pdf.pages[:3]):
                 text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
-            # Scan subsequent pages for medical keywords
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
                     text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
     except Exception as e:
         return f"PDF processing error: {str(e)}"
 def convert_file_to_json(file_path: str, file_type: str) -> str:
-    """Optimized file conversion with medical focus"""
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
             return open(cache_path, "r", encoding="utf-8").read()
         if file_type == "pdf":
-            # Fast initial processing
             text = extract_priority_pages(file_path)
             result = json.dumps({
                 "filename": os.path.basename(file_path),
                 "content": text,
                 "status": "initial"
             })
-            # Start background full processing
             Thread(target=full_pdf_processing, args=(file_path, h)).start()
         elif file_type == "csv":
-            df = pd.read_csv(file_path, encoding_errors="replace", header=None,
-                           dtype=str, skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
@@ -104,44 +94,41 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def full_pdf_processing(file_path: str, file_hash: str):
-    """Background full PDF processing"""
     try:
         cache_path = os.path.join(file_cache_dir, f"{file_hash}_full.json")
         if os.path.exists(cache_path):
             return
         with pdfplumber.open(file_path) as pdf:
-            full_text = "\n".join([f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}"
-                                 for i, page in enumerate(pdf.pages)])
         result = json.dumps({
             "filename": os.path.basename(file_path),
             "content": full_text,
             "status": "complete"
         })
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
     except Exception as e:
         print(f"Background processing failed: {str(e)}")
 def init_agent():
-    """Initialize TxAgent with medical analysis focus"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
@@ -153,8 +140,7 @@ def init_agent():
         enable_checker=True,
         step_rag_num=8,
         seed=100,
-        additional_default_tools=[],
     )
     agent.init_model()
     return agent
@@ -164,7 +150,7 @@ def create_ui(agent: TxAgent):
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         gr.Markdown("<h3 style='text-align: center;'>Identify potential oversights in patient care</h3>")
-        chatbot = gr.Chatbot(label="Analysis", height=600)
         file_upload = gr.File(
             label="Upload Medical Records",
             file_types=[".pdf", ".csv", ".xls", ".xlsx"],
@@ -179,16 +165,13 @@ def create_ui(agent: TxAgent):
             try:
                 history.append((message, "Analyzing records for potential oversights..."))
                 yield history
-                # Process files
                 extracted_data = ""
                 if files:
                     with ThreadPoolExecutor(max_workers=4) as executor:
-                        futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower())
-                                 for f in files if hasattr(f, 'name')]
                         extracted_data = "\n".join([sanitize_utf8(f.result()) for f in as_completed(futures)])
-                # Medical oversight analysis prompt
                 analysis_prompt = """Review these medical records and identify EXACTLY what might have been missed:
 1. List potential missed diagnoses
 2. Flag any medication conflicts
@@ -203,14 +186,13 @@ Provide ONLY the potential oversights in this format:
 ### Potential Oversights:
 1. [Missed diagnosis] - [Evidence from records]
 2. [Medication issue] - [Supporting data]
-3. [Assessment gap] - [Relevant findings]""".format(records=extracted_data[:15000])  # Limit input size
-                # Generate analysis
                 response = []
                 for chunk in agent.run_gradio_chat(
                     message=analysis_prompt,
                     history=[],
-                    temperature=0.2,  # More deterministic
                     max_new_tokens=1024,
                     max_token=4096,
                     call_agent=False,
@@ -220,17 +202,15 @@ Provide ONLY the potential oversights in this format:
                         response.append(chunk)
                     elif isinstance(chunk, list):
                         response.extend([c.content for c in chunk if hasattr(c, 'content')])
-                    if len(response) % 3 == 0:  # Update every 3 chunks
                         history[-1] = (message, "".join(response).strip())
                         yield history
-                # Finalize output
                 final_output = "".join(response).strip()
                 if not final_output:
                     final_output = "No clear oversights identified. Recommend comprehensive review."
-                # Format as bullet points if not already
                 if not final_output.startswith(("1.", "-", "*", "#")):
                     final_output = "• " + final_output.replace("\n", "\n• ")
@@ -242,7 +222,6 @@ Provide ONLY the potential oversights in this format:
                 history.append((message, f"❌ Analysis failed: {str(e)}"))
                 yield history
-        # UI event handlers
         inputs = [msg_input, chatbot, conversation_state, file_upload]
         send_btn.click(analyze_potential_oversights, inputs=inputs, outputs=chatbot)
         msg_input.submit(analyze_potential_oversights, inputs=inputs, outputs=chatbot)
@@ -258,10 +237,10 @@ Provide ONLY the potential oversights in this format:
 if __name__ == "__main__":
     print("Initializing medical analysis agent...")
     agent = init_agent()
     print("Launching interface...")
     demo = create_ui(agent)
-    demo.queue(concurrency_count=2).launch(
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,

         return hashlib.md5(f.read()).hexdigest()
 def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages[:3]):
                 text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
                     text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
     except Exception as e:
         return f"PDF processing error: {str(e)}"
 def convert_file_to_json(file_path: str, file_type: str) -> str:
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
             return open(cache_path, "r", encoding="utf-8").read()
         if file_type == "pdf":
             text = extract_priority_pages(file_path)
             result = json.dumps({
                 "filename": os.path.basename(file_path),
                 "content": text,
                 "status": "initial"
             })
             Thread(target=full_pdf_processing, args=(file_path, h)).start()
         elif file_type == "csv":
+            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def full_pdf_processing(file_path: str, file_hash: str):
     try:
         cache_path = os.path.join(file_cache_dir, f"{file_hash}_full.json")
         if os.path.exists(cache_path):
             return
         with pdfplumber.open(file_path) as pdf:
+            full_text = "\n".join([f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}" for i, page in enumerate(pdf.pages)])
         result = json.dumps({
             "filename": os.path.basename(file_path),
             "content": full_text,
             "status": "complete"
         })
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
     except Exception as e:
         print(f"Background processing failed: {str(e)}")
 def init_agent():
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
         enable_checker=True,
         step_rag_num=8,
         seed=100,
+        additional_default_tools=[]
     )
     agent.init_model()
     return agent
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         gr.Markdown("<h3 style='text-align: center;'>Identify potential oversights in patient care</h3>")
+        chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
         file_upload = gr.File(
             label="Upload Medical Records",
             file_types=[".pdf", ".csv", ".xls", ".xlsx"],
             try:
                 history.append((message, "Analyzing records for potential oversights..."))
                 yield history
                 extracted_data = ""
                 if files:
                     with ThreadPoolExecutor(max_workers=4) as executor:
+                        futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files if hasattr(f, 'name')]
                         extracted_data = "\n".join([sanitize_utf8(f.result()) for f in as_completed(futures)])
                 analysis_prompt = """Review these medical records and identify EXACTLY what might have been missed:
 1. List potential missed diagnoses
 2. Flag any medication conflicts
 ### Potential Oversights:
 1. [Missed diagnosis] - [Evidence from records]
 2. [Medication issue] - [Supporting data]
+3. [Assessment gap] - [Relevant findings]""".format(records=extracted_data[:15000])
                 response = []
                 for chunk in agent.run_gradio_chat(
                     message=analysis_prompt,
                     history=[],
+                    temperature=0.2,
                     max_new_tokens=1024,
                     max_token=4096,
                     call_agent=False,
                         response.append(chunk)
                     elif isinstance(chunk, list):
                         response.extend([c.content for c in chunk if hasattr(c, 'content')])
+                    if len(response) % 3 == 0:
                         history[-1] = (message, "".join(response).strip())
                         yield history
                 final_output = "".join(response).strip()
                 if not final_output:
                     final_output = "No clear oversights identified. Recommend comprehensive review."
                 if not final_output.startswith(("1.", "-", "*", "#")):
                     final_output = "• " + final_output.replace("\n", "\n• ")
                 history.append((message, f"❌ Analysis failed: {str(e)}"))
                 yield history
         inputs = [msg_input, chatbot, conversation_state, file_upload]
         send_btn.click(analyze_potential_oversights, inputs=inputs, outputs=chatbot)
         msg_input.submit(analyze_potential_oversights, inputs=inputs, outputs=chatbot)
 if __name__ == "__main__":
     print("Initializing medical analysis agent...")
     agent = init_agent()
     print("Launching interface...")
     demo = create_ui(agent)
+    demo.queue().launch(
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,