CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 19

Commit

5601da1

verified ·

1 Parent(s): 5d37db7

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +21 -1

src/txagent/txagent.py CHANGED Viewed

@@ -81,7 +81,13 @@ class TxAgent:
             self.model_name = model_name
         try:
-            self.model = LLM(model=self.model_name, dtype="float16", max_model_len=131072)
             self.chat_template = Template(self.model.get_tokenizer().chat_template)
             self.tokenizer = self.model.get_tokenizer()
             logger.info("Model %s loaded successfully", self.model_name)
@@ -419,6 +425,10 @@ class TxAgent:
         if output_begin_string:
             prompt += output_begin_string
         if check_token_status and max_token:
             num_input_tokens = len(self.tokenizer.encode(prompt, add_special_tokens=False))
             if num_input_tokens > max_token:
@@ -430,6 +440,7 @@ class TxAgent:
                 return None, True
         try:
             output = model.generate(prompt, sampling_params=sampling_params)
             output = output[0].outputs[0].text
             logger.debug("Inference output: %s", output[:100])
@@ -438,6 +449,7 @@ class TxAgent:
             return None, True
         torch.cuda.empty_cache()
         if check_token_status:
             return output, False
         return output
@@ -445,6 +457,10 @@ class TxAgent:
     def run_quick_summary(self, message: str, temperature: float = 0.1, max_new_tokens: int = 256, max_token: int = 1024):
         """Generate a fast, concise summary of potential missed diagnoses without tool calls"""
         logger.debug("Starting quick summary for message: %s", message[:100])
         prompt = """
 Analyze the patient record excerpt for missed diagnoses, focusing ONLY on clinical findings such as symptoms, medications, or evaluation results. Provide a concise summary in ONE paragraph without headings or bullet points. ALWAYS treat medications or psychiatric evaluations as potential missed diagnoses, specifying their implications (e.g., 'use of Seroquel may indicate untreated psychosis'). Recommend urgent review for identified findings. Do NOT use external tools or repeat non-clinical data (e.g., name, date of birth). If no clinical findings are present, state 'No missed diagnoses identified' in ONE sentence.
 Patient Record Excerpt:
@@ -473,6 +489,10 @@ Patient Record Excerpt:
                              call_agent_level: int, report_path: str):
         """Run detailed report generation in the background and save to file"""
         logger.debug("Starting background report for message: %s", message[:100])
         combined_response = ""
         history_copy = history.copy()

             self.model_name = model_name
         try:
+            torch.cuda.empty_cache()
+            self.model = LLM(
+                model=self.model_name,
+                dtype="float16",
+                max_model_len=131072,
+                enforce_eager=True  # Avoid graph compilation issues
+            )
             self.chat_template = Template(self.model.get_tokenizer().chat_template)
             self.tokenizer = self.model.get_tokenizer()
             logger.info("Model %s loaded successfully", self.model_name)
         if output_begin_string:
             prompt += output_begin_string
+        if len(prompt) > 100000:  # Early text length check
+            logger.error(f"Prompt length ({len(prompt)}) exceeds limit (100000).")
+            return None, True
         if check_token_status and max_token:
             num_input_tokens = len(self.tokenizer.encode(prompt, add_special_tokens=False))
             if num_input_tokens > max_token:
                 return None, True
         try:
+            torch.cuda.empty_cache()
             output = model.generate(prompt, sampling_params=sampling_params)
             output = output[0].outputs[0].text
             logger.debug("Inference output: %s", output[:100])
             return None, True
         torch.cuda.empty_cache()
+        gc.collect()
         if check_token_status:
             return output, False
         return output
     def run_quick_summary(self, message: str, temperature: float = 0.1, max_new_tokens: int = 256, max_token: int = 1024):
         """Generate a fast, concise summary of potential missed diagnoses without tool calls"""
         logger.debug("Starting quick summary for message: %s", message[:100])
+        if len(message) > 50000:
+            logger.warning(f"Message length ({len(message)}) exceeds limit (50000). Truncating.")
+            message = message[:50000]
         prompt = """
 Analyze the patient record excerpt for missed diagnoses, focusing ONLY on clinical findings such as symptoms, medications, or evaluation results. Provide a concise summary in ONE paragraph without headings or bullet points. ALWAYS treat medications or psychiatric evaluations as potential missed diagnoses, specifying their implications (e.g., 'use of Seroquel may indicate untreated psychosis'). Recommend urgent review for identified findings. Do NOT use external tools or repeat non-clinical data (e.g., name, date of birth). If no clinical findings are present, state 'No missed diagnoses identified' in ONE sentence.
 Patient Record Excerpt:
                              call_agent_level: int, report_path: str):
         """Run detailed report generation in the background and save to file"""
         logger.debug("Starting background report for message: %s", message[:100])
+        if len(message) > 50000:
+            logger.warning(f"Message length ({len(message)}) exceeds limit (50000). Truncating.")
+            message = message[:50000]
         combined_response = ""
         history_copy = history.copy()