CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 20

Commit

650fb34

verified ·

1 Parent(s): fb2ccc1

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -23

app.py CHANGED Viewed

@@ -45,9 +45,9 @@ MEDICAL_KEYWORDS = {
     'allergies', 'summary', 'impression', 'findings', 'recommendations',
     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
-TOKENIZER = "cl100k_base"  # Matches Llama 3's tokenizer
-MAX_MODEL_LEN = 8000  # Conservative estimate for model context
-CHUNK_TOKEN_SIZE = MAX_MODEL_LEN // 2  # Target chunk size
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def sanitize_utf8(text: str) -> str:
@@ -226,7 +226,7 @@ def format_final_report(analysis_results: List[str], filename: str) -> str:
     return "\n".join(report)
-def split_content_by_tokens(content: str, max_tokens: int = CHUNK_TOKEN_SIZE) -> List[str]:
     """Split content into chunks that fit within token limits"""
     paragraphs = re.split(r"\n\s*\n", content)
     chunks = []
@@ -292,40 +292,49 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
     for i, chunk in enumerate(chunks):
         try:
-            # Create context-aware prompt
             prompt = f"""
-Analyze this section ({i+1}/{len(chunks)}) of medical records for clinical oversights.
-Focus on factual evidence from the content only.
-**File:** {filename}
-**Content:**
 {chunk}
-Provide concise findings under these headings:
-1. CRITICAL FINDINGS (urgent issues)
-2. MISSED DIAGNOSES (with supporting evidence)
-3. MEDICATION ISSUES (specific conflicts)
-4. ASSESSMENT GAPS (missing evaluations)
-5. FOLLOW-UP RECOMMENDATIONS (specific actions)
-Be concise and evidence-based:
 """
-            # Ensure prompt + chunk doesn't exceed model limits
             prompt_tokens = count_tokens(prompt)
             chunk_tokens = count_tokens(chunk)
-            if prompt_tokens + chunk_tokens > MAX_MODEL_LEN - 1024:  # Leave room for response
-                # Dynamically adjust chunk size
-                max_chunk_tokens = MAX_MODEL_LEN - prompt_tokens - 1024
                 adjusted_chunk = ""
                 tokens_used = 0
                 for para in re.split(r"\n\s*\n", chunk):
                     para_tokens = count_tokens(para)
-                    if tokens_used + para_tokens <= max_chunk_tokens:
                         adjusted_chunk += "\n\n" + para
                         tokens_used += para_tokens
                     else:
                         break
                 chunk = adjusted_chunk.strip()
             response = ""
@@ -333,7 +342,7 @@ Be concise and evidence-based:
                 message=prompt,
                 history=[],
                 temperature=0.1,
-                max_new_tokens=1024,
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
                 conversation=[],

     'allergies', 'summary', 'impression', 'findings', 'recommendations',
     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
+TOKENIZER = "cl100k_base"
+MAX_MODEL_LEN = 2048  # Matches your model's actual limit
+TARGET_CHUNK_TOKENS = 1500  # Leaves room for prompt and response
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def sanitize_utf8(text: str) -> str:
     return "\n".join(report)
+def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS) -> List[str]:
     """Split content into chunks that fit within token limits"""
     paragraphs = re.split(r"\n\s*\n", content)
     chunks = []
     for i, chunk in enumerate(chunks):
         try:
+            # Create minimal prompt to save tokens
             prompt = f"""
+Analyze this medical record section for:
+1. Critical findings (urgent)
+2. Missed diagnoses (with evidence)
+3. Medication issues
+4. Assessment gaps
+5. Follow-up needs
+Content:
 {chunk}
+Concise findings only:
 """
+            # Verify we're within token limits
             prompt_tokens = count_tokens(prompt)
             chunk_tokens = count_tokens(chunk)
+            if prompt_tokens + chunk_tokens > MAX_MODEL_LEN - 512:  # Leave room for response
+                # Find a natural truncation point
                 adjusted_chunk = ""
                 tokens_used = 0
+                max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 512
                 for para in re.split(r"\n\s*\n", chunk):
                     para_tokens = count_tokens(para)
+                    if tokens_used + para_tokens <= max_content_tokens:
                         adjusted_chunk += "\n\n" + para
                         tokens_used += para_tokens
                     else:
                         break
+                if not adjusted_chunk:
+                    # If even one paragraph is too long, split sentences
+                    sentences = re.split(r'(?<=[.!?])\s+', chunk)
+                    for sent in sentences:
+                        sent_tokens = count_tokens(sent)
+                        if tokens_used + sent_tokens <= max_content_tokens:
+                            adjusted_chunk += " " + sent
+                            tokens_used += sent_tokens
+                        else:
+                            break
                 chunk = adjusted_chunk.strip()
             response = ""
                 message=prompt,
                 history=[],
                 temperature=0.1,
+                max_new_tokens=512,  # Keep responses concise
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
                 conversation=[],