CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 16

Commit

13df505

verified ·

1 Parent(s): 5707e8d

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -44

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import re
 import psutil
 import subprocess
 from collections import defaultdict
-from vllm import LLM, SamplingParams  # MODIFIED: Direct vLLM for batching
 # Persistent directory
 persistent_dir = os.getenv("HF_HOME", "/data/hf_cache")
@@ -35,7 +35,7 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
-from txagent.txagent import TxAgent
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
@@ -88,31 +88,6 @@ def log_system_usage(tag=""):
     except Exception as e:
         print(f"[{tag}] GPU/CPU monitor failed: {e}")
-def clean_response(text: str) -> str:
-    text = sanitize_utf8(text)
-    text = re.sub(r"\[TOOL_CALLS\].*?\n|\[.*?\].*?\n|(?:get_|tool\s|retrieve\s|use\s|rag\s).*?\n", "", text, flags=re.DOTALL | re.IGNORECASE)
-    text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
-    text = re.sub(
-        r"(?i)(to\s|analyze|will\s|since\s|no\s|none|previous|attempt|involve|check\s|explore|manually|"
-        r"start|look|use|focus|retrieve|tool|based\s|overall|indicate|mention|consider|ensure|need\s|"
-        r"provide|review|assess|identify|potential|records|patient|history|symptoms|medication|"
-        r"conflict|assessment|follow-up|issue|reasoning|step|prompt|address|rag|thought|try|john\sdoe|nkma).*?\n",
-        "", text, flags=re.DOTALL
-    )
-    text = re.sub(r"\n{2,}", "\n", text).strip()
-    lines = []
-    valid_heading = False
-    for line in text.split("\n"):
-        line = line.strip()
-        if line.lower() in ["missed diagnoses:", "medication conflicts:", "incomplete assessments:", "urgent follow-up:"]:
-            valid_heading = True
-            lines.append(f"**{line[:-1]}**:")
-        elif valid_heading and line.startswith("-"):
-            lines.append(line)
-        else:
-            valid_heading = False
-    return "\n".join(lines).strip()
 def normalize_text(text: str) -> str:
     return re.sub(r"\s+", " ", text.lower().strip())
@@ -146,10 +121,11 @@ def init_agent():
     log_system_usage("Before Load")
     model = LLM(
         model="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-        max_model_len=4096,  # MODIFIED: Reduce KV cache
         enforce_eager=True,
         enable_chunked_prefill=True,
         max_num_batched_tokens=8192,
     )
     log_system_usage("After Load")
     print("✅ Model Ready")
@@ -178,44 +154,66 @@ def create_ui(model):
                     extracted = "\n".join([json.loads(r).get("content", "") for r in results if "content" in json.loads(r)])
                     file_hash_value = file_hash(files[0].name) if files else ""
-            chunk_size = 800
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
             chunk_responses = []
-            batch_size = 8
             total_chunks = len(chunks)
             prompt_template = """
-Output only oversights under these headings, one point each. No tools, reasoning, or extra text.
-**Missed Diagnoses**:
-**Medication Conflicts**:
-**Incomplete Assessments**:
-**Urgent Follow-up**:
-Records:
-{chunk}
-"""
             sampling_params = SamplingParams(
-                temperature=0.1,
-                max_tokens=32,  # MODIFIED: Reduce for speed
                 seed=100,
             )
             try:
                 for i in range(0, len(chunks), batch_size):
                     batch = chunks[i:i + batch_size]
                     prompts = [prompt_template.format(chunk=chunk) for chunk in batch]
                     log_system_usage(f"Batch {i//batch_size + 1}")
-                    outputs = model.generate(prompts, sampling_params)  # MODIFIED: Batch inference
                     batch_responses = []
-                    with ThreadPoolExecutor(max_workers=8) as executor:  # MODIFIED: Parallel cleanup
                         futures = [executor.submit(clean_response, output.outputs[0].text) for output in outputs]
                         batch_responses.extend(f.result() for f in as_completed(futures))
-                    chunk_responses.extend([r for r in batch_responses if r])
                     processed = min(i + len(batch), total_chunks)
-                    history[-1]["content"] = f"🔄 Analyzing... ({processed}/{total_chunks} chunks)"
                     yield history, None
                 final_response = consolidate_findings(chunk_responses)
                 history[-1]["content"] = final_response
                 yield history, None

 import psutil
 import subprocess
 from collections import defaultdict
+from vllm import LLM, SamplingParams
 # Persistent directory
 persistent_dir = os.getenv("HF_HOME", "/data/hf_cache")
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
+from txagent.txagent import TxAgent, clean_response  # MODIFIED: Import clean_response
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
     except Exception as e:
         print(f"[{tag}] GPU/CPU monitor failed: {e}")
 def normalize_text(text: str) -> str:
     return re.sub(r"\s+", " ", text.lower().strip())
     log_system_usage("Before Load")
     model = LLM(
         model="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        max_model_len=4096,  # MODIFIED: Enforce low VRAM
         enforce_eager=True,
         enable_chunked_prefill=True,
         max_num_batched_tokens=8192,
+        gpu_memory_utilization=0.5,  # MODIFIED: Limit VRAM
     )
     log_system_usage("After Load")
     print("✅ Model Ready")
                     extracted = "\n".join([json.loads(r).get("content", "") for r in results if "content" in json.loads(r)])
                     file_hash_value = file_hash(files[0].name) if files else ""
+            chunk_size = 800  # MODIFIED: Enforce correct size
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
             chunk_responses = []
+            batch_size = 4  # MODIFIED: Lower for VRAM
             total_chunks = len(chunks)
             prompt_template = """
+            Strictly output oversights under these exact headings, one point per line, starting with "-". No other text, reasoning, or tools.
+            **Missed Diagnoses**:
+            **Medication Conflicts**:
+            **Incomplete Assessments**:
+            **Urgent Follow-up**:
+            Records:
+            {chunk}
+            """  # MODIFIED: Stronger instructions
             sampling_params = SamplingParams(
+                temperature=0.3,  # MODIFIED: Improve output quality
+                max_tokens=64,   # MODIFIED: Allow full responses
                 seed=100,
             )
             try:
+                findings = defaultdict(list)  # MODIFIED: Track per batch
                 for i in range(0, len(chunks), batch_size):
                     batch = chunks[i:i + batch_size]
                     prompts = [prompt_template.format(chunk=chunk) for chunk in batch]
                     log_system_usage(f"Batch {i//batch_size + 1}")
+                    outputs = model.generate(prompts, sampling_params, use_tqdm=True)  # MODIFIED: Stream progress
                     batch_responses = []
+                    with ThreadPoolExecutor(max_workers=4) as executor:
                         futures = [executor.submit(clean_response, output.outputs[0].text) for output in outputs]
                         batch_responses.extend(f.result() for f in as_completed(futures))
                     processed = min(i + len(batch), total_chunks)
+                    batch_output = []
+                    for response in batch_responses:
+                        if response:
+                            chunk_responses.append(response)
+                            current_heading = None
+                            for line in response.split("\n"):
+                                line = line.strip()
+                                if line.lower().startswith(tuple(h.lower() + ":" for h in ["missed diagnoses", "medication conflicts", "incomplete assessments", "urgent follow-up"])):
+                                    current_heading = line[:-1]
+                                    if current_heading not in batch_output:
+                                        batch_output.append(current_heading + ":")
+                                elif current_heading and line.startswith("-"):
+                                    findings[current_heading].append(line)
+                                    batch_output.append(line)
+                    # MODIFIED: Stream partial results
+                    if batch_output:
+                        history[-1]["content"] = "\n".join(batch_output) + f"\n\n🔄 Processing chunk {processed}/{total_chunks}..."
+                    else:
+                        history[-1]["content"] = f"🔄 Processing chunk {processed}/{total_chunks}..."
                     yield history, None
+                # MODIFIED: Final consolidation
                 final_response = consolidate_findings(chunk_responses)
                 history[-1]["content"] = final_response
                 yield history, None