CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 21

Commit

cf765da

verified ·

1 Parent(s): abc0e81

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -55

app.py CHANGED Viewed

@@ -46,13 +46,12 @@ MEDICAL_KEYWORDS = {
     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
 TOKENIZER = "cl100k_base"
-MAX_MODEL_LEN = 2048  # Matches your model's actual limit
-TARGET_CHUNK_TOKENS = 1200  # Leaves room for prompt and response
-PROMPT_RESERVE = 300  # Tokens reserved for prompt structure
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def log_system_usage(tag=""):
-    """Log system resource usage."""
     try:
         cpu = psutil.cpu_percent(interval=1)
         mem = psutil.virtual_memory()
@@ -68,24 +67,17 @@ def log_system_usage(tag=""):
         print(f"[{tag}] GPU/CPU monitor failed: {e}")
 def sanitize_utf8(text: str) -> str:
-    """Ensure text is UTF-8 clean."""
     return text.encode("utf-8", "ignore").decode("utf-8")
 def file_hash(path: str) -> str:
-    """Generate MD5 hash of file content."""
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
 def count_tokens(text: str) -> int:
-    """Count tokens using the same method as the model"""
     encoding = tiktoken.get_encoding(TOKENIZER)
     return len(encoding.encode(text))
 def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
-    """
-    Extract all pages from PDF with token counting.
-    Returns (extracted_text, total_pages, total_tokens)
-    """
     try:
         text_chunks = []
         total_pages = 0
@@ -112,7 +104,6 @@ def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
         return f"PDF processing error: {str(e)}", 0, 0
 def convert_file_to_json(file_path: str, file_type: str) -> str:
-    """Convert file to JSON format with caching and token counting."""
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
@@ -162,7 +153,6 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def clean_response(text: str) -> str:
-    """Clean and format the model response."""
     text = sanitize_utf8(text)
     text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
     text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
@@ -172,7 +162,6 @@ def clean_response(text: str) -> str:
     return text
 def format_final_report(analysis_results: List[str], filename: str) -> str:
-    """Combine all analysis chunks into a well-formatted final report."""
     report = []
     report.append(f"COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS")
     report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
@@ -219,7 +208,6 @@ def format_final_report(analysis_results: List[str], filename: str) -> str:
     return "\n".join(report)
 def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS) -> List[str]:
-    """Split content into chunks that fit within token limits"""
     paragraphs = re.split(r"\n\s*\n", content)
     chunks = []
     current_chunk = []
@@ -252,7 +240,6 @@ def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS)
     return chunks
 def init_agent():
-    """Initialize the TxAgent with proper configuration."""
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
@@ -277,23 +264,18 @@ def init_agent():
     return agent
 def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
-    """Analyze complete document with strict token management"""
     chunks = split_content_by_tokens(content)
     analysis_results = []
     for i, chunk in enumerate(chunks):
         try:
-            # Ultra-minimal prompt to maximize content space
             base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
-            # Calculate available space for content
             prompt_tokens = count_tokens(base_prompt)
-            max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100  # Response buffer
-            # Ensure chunk fits
             chunk_tokens = count_tokens(chunk)
             if chunk_tokens > max_content_tokens:
-                # Find last paragraph that fits
                 adjusted_chunk = ""
                 tokens_used = 0
                 paragraphs = re.split(r"\n\s*\n", chunk)
@@ -307,7 +289,6 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent, tempe
                         break
                 if not adjusted_chunk:
-                    # If even one paragraph is too big, split sentences
                     sentences = re.split(r'(?<=[.!?])\s+', chunk)
                     for sent in sentences:
                         sent_tokens = count_tokens(sent)
@@ -326,7 +307,7 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent, tempe
                 message=prompt,
                 history=[],
                 temperature=temperature,
-                max_new_tokens=300,  # Keep responses very concise
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
                 conversation=[],
@@ -348,7 +329,6 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent, tempe
     return format_final_report(analysis_results, filename)
 def create_ui(agent):
-    """Create the Gradio interface with enhanced design."""
     with gr.Blocks(
         theme=gr.themes.Soft(
             primary_hue="indigo",
@@ -383,7 +363,6 @@ def create_ui(agent):
         }
         """
     ) as demo:
-        # Header Section
         gr.Markdown("""
         <div style='text-align: center; margin-bottom: 20px;'>
             <h1 style='color: #2b3a67; margin-bottom: 8px;'>🩺 Clinical Oversight Assistant</h1>
@@ -394,7 +373,6 @@ def create_ui(agent):
         """)
         with gr.Row(equal_height=False):
-            # Left Column - Inputs
             with gr.Column(scale=1, min_width=400):
                 with gr.Group(elem_classes="file-upload"):
                     file_upload = gr.File(
@@ -431,7 +409,6 @@ def create_ui(agent):
                     visible=True
                 )
-            # Right Column - Outputs
             with gr.Column(scale=2, min_width=600):
                 with gr.Tabs():
                     with gr.TabItem("Analysis Report", id="report"):
@@ -459,24 +436,22 @@ def create_ui(agent):
                     )
                     gr.Button("Save to EHR", visible=False)
-        # Analysis function with UI updates
         def analyze(files: List, message: str, temp: float):
             if not files:
                 return (
-                    {"value": "", "visible": True},  # report_output
-                    {"value": None, "visible": False},  # download_output
-                    {"value": "⚠️ Please upload at least one file to analyze.", "visible": True},  # status
-                    {"value": None, "visible": True}  # data_preview
                 )
             yield (
                 {"value": "", "visible": True},
-                {"value": None, "visible": False},
                 {"value": "⏳ Processing documents...", "visible": True},
                 {"value": None, "visible": True}
             )
-            # Process files
             file_contents = []
             filenames = []
             preview_data = []
@@ -484,36 +459,39 @@ def create_ui(agent):
             with ThreadPoolExecutor(max_workers=4) as executor:
                 futures = []
                 for f in files:
                     futures.append(executor.submit(
                         convert_file_to_json,
-                        f.name,
-                        f.name.split(".")[-1].lower()
                     ))
-                    filenames.append(os.path.basename(f.name))
                 results = []
                 for future in as_completed(futures):
                     result = sanitize_utf8(future.result())
                     try:
                         data = json.loads(result)
-                        results.append(result)
                         if "content" in data:
                             preview_data.append([data["filename"], data["content"][:500] + "..."])
-                    except:
-                        pass
             yield (
                 {"value": "", "visible": True},
-                {"value": None, "visible": False},
                 {"value": f"🔍 Analyzing {len(files)} documents...", "visible": True},
                 {"value": preview_data[:20], "visible": True}
             )
             try:
                 combined_content = "\n".join([
-                    json.loads(fc).get("content", "") if "content" in json.loads(fc)
-                    else str(json.loads(fc).get("rows", ""))
-                    for fc in results
                 ])
                 full_report = analyze_complete_document(
@@ -530,7 +508,7 @@ def create_ui(agent):
                 yield (
                     {"value": full_report, "visible": True},
-                    {"value": report_path if os.path.exists(report_path) else None, "visible": True},
                     {"value": "✅ Analysis complete!", "visible": True},
                     {"value": preview_data[:20], "visible": True}
                 )
@@ -540,12 +518,11 @@ def create_ui(agent):
                 print(error_msg)
                 yield (
                     {"value": "", "visible": True},
-                    {"value": None, "visible": False},
                     {"value": error_msg, "visible": True},
                     {"value": None, "visible": True}
                 )
-        # Event handlers
         send_btn.click(
             fn=analyze,
             inputs=[file_upload, msg_input, temperature],
@@ -555,12 +532,12 @@ def create_ui(agent):
         clear_btn.click(
             fn=lambda: (
-                None,  # file_upload
-                None,  # download_output
-                "",  # status
-                None,  # data_preview
-                {"value": 0.3},  # temperature
-                {"value": ""}  # msg_input
             ),
             inputs=None,
             outputs=[file_upload, download_output, status, data_preview, temperature, msg_input]
@@ -570,7 +547,6 @@ def create_ui(agent):
 if __name__ == "__main__":
     print("🚀 Launching app...")
-    # Install tiktoken if not available
     try:
         import tiktoken
     except ImportError:

     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
 TOKENIZER = "cl100k_base"
+MAX_MODEL_LEN = 2048
+TARGET_CHUNK_TOKENS = 1200
+PROMPT_RESERVE = 300
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def log_system_usage(tag=""):
     try:
         cpu = psutil.cpu_percent(interval=1)
         mem = psutil.virtual_memory()
         print(f"[{tag}] GPU/CPU monitor failed: {e}")
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
 def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
 def count_tokens(text: str) -> int:
     encoding = tiktoken.get_encoding(TOKENIZER)
     return len(encoding.encode(text))
 def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
     try:
         text_chunks = []
         total_pages = 0
         return f"PDF processing error: {str(e)}", 0, 0
 def convert_file_to_json(file_path: str, file_type: str) -> str:
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def clean_response(text: str) -> str:
     text = sanitize_utf8(text)
     text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
     text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
     return text
 def format_final_report(analysis_results: List[str], filename: str) -> str:
     report = []
     report.append(f"COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS")
     report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     return "\n".join(report)
 def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS) -> List[str]:
     paragraphs = re.split(r"\n\s*\n", content)
     chunks = []
     current_chunk = []
     return chunks
 def init_agent():
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
     return agent
 def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
     chunks = split_content_by_tokens(content)
     analysis_results = []
     for i, chunk in enumerate(chunks):
         try:
             base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
             prompt_tokens = count_tokens(base_prompt)
+            max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100
             chunk_tokens = count_tokens(chunk)
             if chunk_tokens > max_content_tokens:
                 adjusted_chunk = ""
                 tokens_used = 0
                 paragraphs = re.split(r"\n\s*\n", chunk)
                         break
                 if not adjusted_chunk:
                     sentences = re.split(r'(?<=[.!?])\s+', chunk)
                     for sent in sentences:
                         sent_tokens = count_tokens(sent)
                 message=prompt,
                 history=[],
                 temperature=temperature,
+                max_new_tokens=300,
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
                 conversation=[],
     return format_final_report(analysis_results, filename)
 def create_ui(agent):
     with gr.Blocks(
         theme=gr.themes.Soft(
             primary_hue="indigo",
         }
         """
     ) as demo:
         gr.Markdown("""
         <div style='text-align: center; margin-bottom: 20px;'>
             <h1 style='color: #2b3a67; margin-bottom: 8px;'>🩺 Clinical Oversight Assistant</h1>
         """)
         with gr.Row(equal_height=False):
             with gr.Column(scale=1, min_width=400):
                 with gr.Group(elem_classes="file-upload"):
                     file_upload = gr.File(
                     visible=True
                 )
             with gr.Column(scale=2, min_width=600):
                 with gr.Tabs():
                     with gr.TabItem("Analysis Report", id="report"):
                     )
                     gr.Button("Save to EHR", visible=False)
         def analyze(files: List, message: str, temp: float):
             if not files:
                 return (
+                    {"value": "", "visible": True},
+                    None,
+                    {"value": "⚠️ Please upload at least one file to analyze.", "visible": True},
+                    {"value": None, "visible": True}
                 )
             yield (
                 {"value": "", "visible": True},
+                None,
                 {"value": "⏳ Processing documents...", "visible": True},
                 {"value": None, "visible": True}
             )
             file_contents = []
             filenames = []
             preview_data = []
             with ThreadPoolExecutor(max_workers=4) as executor:
                 futures = []
                 for f in files:
+                    file_path = f.name
                     futures.append(executor.submit(
                         convert_file_to_json,
+                        file_path,
+                        os.path.splitext(file_path)[1][1:].lower()
                     ))
+                    filenames.append(os.path.basename(file_path))
                 results = []
                 for future in as_completed(futures):
                     result = sanitize_utf8(future.result())
                     try:
                         data = json.loads(result)
+                        results.append(data)
                         if "content" in data:
                             preview_data.append([data["filename"], data["content"][:500] + "..."])
+                    except Exception as e:
+                        print(f"Error processing result: {e}")
+                        continue
             yield (
                 {"value": "", "visible": True},
+                None,
                 {"value": f"🔍 Analyzing {len(files)} documents...", "visible": True},
                 {"value": preview_data[:20], "visible": True}
             )
             try:
                 combined_content = "\n".join([
+                    item.get("content", "") if isinstance(item, dict) and "content" in item
+                    else str(item.get("rows", "")) if isinstance(item, dict)
+                    else str(item)
+                    for item in results
                 ])
                 full_report = analyze_complete_document(
                 yield (
                     {"value": full_report, "visible": True},
+                    report_path if os.path.exists(report_path) else None,
                     {"value": "✅ Analysis complete!", "visible": True},
                     {"value": preview_data[:20], "visible": True}
                 )
                 print(error_msg)
                 yield (
                     {"value": "", "visible": True},
+                    None,
                     {"value": error_msg, "visible": True},
                     {"value": None, "visible": True}
                 )
         send_btn.click(
             fn=analyze,
             inputs=[file_upload, msg_input, temperature],
         clear_btn.click(
             fn=lambda: (
+                None,
+                None,
+                "",
+                None,
+                {"value": 0.3},
+                {"value": ""}
             ),
             inputs=None,
             outputs=[file_upload, download_output, status, data_preview, temperature, msg_input]
 if __name__ == "__main__":
     print("🚀 Launching app...")
     try:
         import tiktoken
     except ImportError: