CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 13

Commit

3cdcbc4

verified ·

1 Parent(s): 6af3907

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -32

app.py CHANGED Viewed

@@ -14,36 +14,42 @@ import re
 import tempfile
 import threading
-# Environment setup
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
-# Cache directories
-base_dir = "/data"
-os.makedirs(base_dir, exist_ok=True)
-model_cache_dir = os.path.join(base_dir, "txagent_models")
-tool_cache_dir = os.path.join(base_dir, "tool_cache")
-file_cache_dir = os.path.join(base_dir, "cache")
-report_dir = "/data/reports"
-vllm_cache_dir = os.path.join(base_dir, "vllm_cache")
-os.makedirs(model_cache_dir, exist_ok=True)
-os.makedirs(tool_cache_dir, exist_ok=True)
-os.makedirs(file_cache_dir, exist_ok=True)
-os.makedirs(report_dir, exist_ok=True)
-os.makedirs(vllm_cache_dir, exist_ok=True)
-os.environ.update({
-    "TRANSFORMERS_CACHE": model_cache_dir,
-    "HF_HOME": model_cache_dir,
-    "VLLM_CACHE_DIR": vllm_cache_dir,
-    "TOKENIZERS_PARALLELISM": "false",
-    "CUDA_LAUNCH_BLOCKING": "1"
-})
 from txagent.txagent import TxAgent
 MEDICAL_KEYWORDS = {
     'diagnosis', 'assessment', 'plan', 'results', 'medications',
     'allergies', 'summary', 'impression', 'findings', 'recommendations'
@@ -60,11 +66,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
-            # Process first three pages
             for i, page in enumerate(pdf.pages[:3]):
                 text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
-            # Check for keywords on later pages and add if found
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
@@ -121,7 +127,9 @@ def full_pdf_processing(file_path: str, file_hash_value: str):
     except Exception as e:
         print(f"Background processing failed: {str(e)}")
-# Global agent and a lock for safe multi-threaded access
 agent = None
 agent_lock = Lock()
@@ -147,13 +155,16 @@ def load_agent_in_background():
     global agent
     with agent_lock:
         if agent is None:
-            print("Initializing agent in background...")
             agent = init_agent()
             print("Agent initialization complete.")
 # Start background agent loading at startup
 threading.Thread(target=load_agent_in_background, daemon=True).start()
 def create_ui():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
@@ -197,7 +208,7 @@ def create_ui():
                     extracted_data = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if hasattr(files[0], 'name') else ""
-            # Truncate the extracted data to avoid token overflows
             max_extracted_chars = 12000
             truncated_data = extracted_data[:max_extracted_chars]
@@ -236,8 +247,7 @@ Medical Records:
                     history[-1] = {"role": "assistant", "content": cleaned}
                     yield history, None
             except Exception as agent_error:
-                history[-1] = {"role": "assistant",
-                               "content": f"❌ Analysis failed during processing: {str(agent_error)}"}
                 yield history, None
                 return
@@ -275,6 +285,6 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
-        allowed_paths=["/data/reports"],
         share=False
     )

 import tempfile
 import threading
+# ---------------------------------------------------------------------------------------
+# Setup persistent directories for Hugging Face Spaces
+# ---------------------------------------------------------------------------------------
+# Use a persistent cache directory (adjust the path as needed based on your HF Space settings)
+persistent_dir = "/workspace/hf_cache"
+os.makedirs(persistent_dir, exist_ok=True)
+model_cache_dir = os.path.join(persistent_dir, "txagent_models")
+tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
+file_cache_dir = os.path.join(persistent_dir, "cache")
+report_dir = os.path.join(persistent_dir, "reports")
+vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
+for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
+    os.makedirs(directory, exist_ok=True)
+# Set environment variables so that model and transformers caches point to persistent storage.
+os.environ["HF_HOME"] = model_cache_dir
+os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
+os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+# Append the local source path if needed
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
+# ---------------------------------------------------------------------------------------
+# Import the TxAgent from your tool package
+# ---------------------------------------------------------------------------------------
 from txagent.txagent import TxAgent
+# ---------------------------------------------------------------------------------------
+# Define constants and helper functions
+# ---------------------------------------------------------------------------------------
 MEDICAL_KEYWORDS = {
     'diagnosis', 'assessment', 'plan', 'results', 'medications',
     'allergies', 'summary', 'impression', 'findings', 'recommendations'
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
+            # Process first three pages always
             for i, page in enumerate(pdf.pages[:3]):
                 text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
+            # Process subsequent pages only if they contain key medical keywords
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
     except Exception as e:
         print(f"Background processing failed: {str(e)}")
+# ---------------------------------------------------------------------------------------
+# Global agent variable and thread-safe lock for background model loading
+# ---------------------------------------------------------------------------------------
 agent = None
 agent_lock = Lock()
     global agent
     with agent_lock:
         if agent is None:
+            print("Initializing agent in background (this may take a while)...")
             agent = init_agent()
             print("Agent initialization complete.")
 # Start background agent loading at startup
 threading.Thread(target=load_agent_in_background, daemon=True).start()
+# ---------------------------------------------------------------------------------------
+# Define the Gradio UI
+# ---------------------------------------------------------------------------------------
 def create_ui():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
                     extracted_data = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if hasattr(files[0], 'name') else ""
+            # Truncate extracted data to avoid token overflow
             max_extracted_chars = 12000
             truncated_data = extracted_data[:max_extracted_chars]
                     history[-1] = {"role": "assistant", "content": cleaned}
                     yield history, None
             except Exception as agent_error:
+                history[-1] = {"role": "assistant", "content": f"❌ Analysis failed during processing: {str(agent_error)}"}
                 yield history, None
                 return
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
+        allowed_paths=[report_dir],
         share=False
     )