CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 25

Commit

c5da27e

verified ·

1 Parent(s): 26faa43

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -116

app.py CHANGED Viewed

@@ -8,9 +8,10 @@ import hashlib
 import shutil
 import re
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
-# Setup directories
 persistent_dir = "/data/hf_cache"
 os.makedirs(persistent_dir, exist_ok=True)
@@ -19,13 +20,16 @@ tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
-for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
-    os.makedirs(d, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))
 from txagent.txagent import TxAgent
 MAX_MODEL_TOKENS = 32768
@@ -34,6 +38,10 @@ MAX_NEW_TOKENS = 2048
 PROMPT_OVERHEAD = 500
 def clean_response(text: str) -> str:
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
@@ -44,28 +52,35 @@ def estimate_tokens(text: str) -> int:
 def extract_text_from_excel(file_path: str) -> str:
     all_text = []
-    xls = pd.ExcelFile(file_path)
-    for sheet_name in xls.sheet_names:
-        df = xls.parse(sheet_name).astype(str).fillna("")
-        rows = df.apply(lambda row: " | ".join(row), axis=1)
-        sheet_text = [f"[{sheet_name}] {line}" for line in rows]
-        all_text.extend(sheet_text)
     return "\n".join(all_text)
 def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
-    effective_max = max_tokens - PROMPT_OVERHEAD
-    lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
     for line in lines:
-        t = estimate_tokens(line)
-        if curr_tokens + t > effective_max:
-            if curr_chunk:
-                chunks.append("\n".join(curr_chunk))
-            curr_chunk, curr_tokens = [line], t
         else:
-            curr_chunk.append(line)
-            curr_tokens += t
-    if curr_chunk:
-        chunks.append("\n".join(curr_chunk))
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
@@ -88,132 +103,158 @@ Respond in well-structured bullet points with medical reasoning.
 """
 def init_agent():
-    tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-    if not os.path.exists(tool_path):
-        shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-        tool_files_dict={"new_tool": tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=4,
-        seed=100
     )
     agent.init_model()
     return agent
 def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
     messages = chatbot_state if chatbot_state else []
     if file is None or not hasattr(file, "name"):
-        return messages + [{"role": "assistant", "content": "❌ Please upload a valid Excel file."}], None
-    messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
-    text = extract_text_from_excel(file.name)
-    chunks = split_text_into_chunks(text)
-    chunk_responses = [None] * len(chunks)
-    def analyze_chunk(i, chunk):
-        prompt = build_prompt_from_text(chunk)
-        response = ""
-        for res in agent.run_gradio_chat(message=prompt, history=[], temperature=0.2, max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS, call_agent=False, conversation=[]):
-            if isinstance(res, str):
-                response += res
-            elif hasattr(res, "content"):
-                response += res.content
-            elif isinstance(res, list):
-                for r in res:
                     if hasattr(r, "content"):
-                        response += r.content
-        return i, clean_response(response)
-    with ThreadPoolExecutor(max_workers=1) as executor:
-        futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
-        for f in as_completed(futures):
-            i, result = f.result()
-            chunk_responses[i] = result
-    valid = [r for r in chunk_responses if r and not r.startswith("❌")]
-    if not valid:
-        return messages + [{"role": "assistant", "content": "❌ No valid chunk results."}], None
-    summary_prompt = f"Summarize this analysis in a final structured report:\n\n" + "\n\n".join(valid)
-    messages.append({"role": "assistant", "content": "📊 Generating final report..."})
-    final_report = ""
-    for res in agent.run_gradio_chat(message=summary_prompt, history=[], temperature=0.2, max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS, call_agent=False, conversation=[]):
-        if isinstance(res, str):
-            final_report += res
-        elif hasattr(res, "content"):
-            final_report += res.content
-    cleaned = clean_response(final_report)
-    report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
-    with open(report_path, 'w') as f:
-        f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
-    messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{cleaned}"})
-    messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
     return messages, report_path
 def create_ui(agent):
     with gr.Blocks(css="""
         html, body, .gradio-container {
             height: 100vh;
-            background-color: #111827;
-            color: #e5e7eb;
             font-family: 'Inter', sans-serif;
-        }
-        .message-avatar {
-            width: 38px;
-            height: 38px;
-            border-radius: 50%;
-            margin-right: 10px;
-        }
-        .chat-message {
-            display: flex;
-            align-items: flex-start;
-            margin-bottom: 1rem;
-        }
-        .message-bubble {
-            background-color: #1f2937;
-            padding: 12px 16px;
-            border-radius: 12px;
-            max-width: 90%;
-        }
-        .chat-input {
-            background-color: #1f2937;
-            border: 1px solid #374151;
-            border-radius: 8px;
-            color: #e5e7eb;
-            padding: 0.75rem 1rem;
         }
         .gr-button.primary {
-            background: #2563eb;
-            color: white;
-            border-radius: 8px;
             font-weight: 600;
         }
         .gr-button.primary:hover {
-            background: #1e40af;
         }
     """) as demo:
-        gr.Markdown("""<h2 style='color:#60a5fa'>🩺 Patient History AI Assistant</h2><p>Upload a clinical Excel file and receive a structured diagnostic summary.</p>""")
         with gr.Row():
             with gr.Column(scale=3):
-                chatbot = gr.Chatbot(
-                    label="Clinical Assistant",
-                    height=700,
-                    type="messages",
-                    avatar_images=[
-                        "https://ui-avatars.com/api/?name=AI&background=2563eb&color=fff&size=128",
-                        "https://ui-avatars.com/api/?name=You&background=374151&color=fff&size=128"
-                    ]
-                )
             with gr.Column(scale=1):
-                with gr.Row():
-                    file_upload = gr.File(label="", file_types=[".xlsx"], elem_id="upload-btn")
-                    analyze_btn = gr.Button("🧠 Analyze", variant="primary")
                 report_output = gr.File(label="Download Report", visible=False, interactive=False)
         chatbot_state = gr.State(value=[])

 import shutil
 import re
 from datetime import datetime
+import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
+# Configuration and setup
 persistent_dir = "/data/hf_cache"
 os.makedirs(persistent_dir, exist_ok=True)
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
+for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
+    os.makedirs(directory, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
+current_dir = os.path.dirname(os.path.abspath(__file__))
+src_path = os.path.abspath(os.path.join(current_dir, "src"))
+sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
 MAX_MODEL_TOKENS = 32768
 PROMPT_OVERHEAD = 500
 def clean_response(text: str) -> str:
+    try:
+        text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
+    except UnicodeError:
+        text = text.encode('utf-8', 'replace').decode('utf-8')
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
 def extract_text_from_excel(file_path: str) -> str:
     all_text = []
+    try:
+        xls = pd.ExcelFile(file_path)
+        for sheet_name in xls.sheet_names:
+            df = xls.parse(sheet_name)
+            df = df.astype(str).fillna("")
+            rows = df.apply(lambda row: " | ".join(row), axis=1)
+            sheet_text = [f"[{sheet_name}] {line}" for line in rows]
+            all_text.extend(sheet_text)
+    except Exception as e:
+        raise ValueError(f"Failed to extract text from Excel file: {str(e)}")
     return "\n".join(all_text)
 def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
+    effective_max_tokens = max_tokens - PROMPT_OVERHEAD
+    if effective_max_tokens <= 0:
+        raise ValueError("Effective max tokens must be positive.")
+    lines = text.split("\n")
+    chunks, current_chunk, current_tokens = [], [], 0
     for line in lines:
+        line_tokens = estimate_tokens(line)
+        if current_tokens + line_tokens > effective_max_tokens:
+            if current_chunk:
+                chunks.append("\n".join(current_chunk))
+            current_chunk, current_tokens = [line], line_tokens
         else:
+            current_chunk.append(line)
+            current_tokens += line_tokens
+    if current_chunk:
+        chunks.append("\n".join(current_chunk))
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
 """
 def init_agent():
+    default_tool_path = os.path.abspath("data/new_tool.json")
+    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(target_tool_path):
+        shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=4,
+        seed=100,
+        additional_default_tools=[]
     )
     agent.init_model()
     return agent
 def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
     messages = chatbot_state if chatbot_state else []
+    report_path = None
     if file is None or not hasattr(file, "name"):
+        messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
+        return messages, report_path
+    try:
+        messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
+        extracted_text = extract_text_from_excel(file.name)
+        chunks = split_text_into_chunks(extracted_text)
+        chunk_responses = [None] * len(chunks)
+        def analyze_chunk(index: int, chunk: str) -> Tuple[int, str]:
+            prompt = build_prompt_from_text(chunk)
+            prompt_tokens = estimate_tokens(prompt)
+            if prompt_tokens > MAX_MODEL_TOKENS:
+                return index, f"❌ Chunk {index+1} prompt too long. Skipping..."
+            response = ""
+            try:
+                for result in agent.run_gradio_chat(
+                    message=prompt,
+                    history=[],
+                    temperature=0.2,
+                    max_new_tokens=MAX_NEW_TOKENS,
+                    max_token=MAX_MODEL_TOKENS,
+                    call_agent=False,
+                    conversation=[],
+                ):
+                    if isinstance(result, str):
+                        response += result
+                    elif isinstance(result, list):
+                        for r in result:
+                            if hasattr(r, "content"):
+                                response += r.content
+                    elif hasattr(result, "content"):
+                        response += result.content
+            except Exception as e:
+                return index, f"❌ Error analyzing chunk {index+1}: {str(e)}"
+            return index, clean_response(response)
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            futures = [executor.submit(analyze_chunk, i, chunk) for i, chunk in enumerate(chunks)]
+            for future in as_completed(futures):
+                i, result = future.result()
+                chunk_responses[i] = result
+                if result.startswith("❌"):
+                    messages.append({"role": "assistant", "content": result})
+        valid_responses = [res for res in chunk_responses if not res.startswith("❌")]
+        if not valid_responses:
+            messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
+            return messages, report_path
+        summary = "\n\n".join(valid_responses)
+        final_prompt = f"Provide a structured, consolidated clinical analysis from these results:\n\n{summary}"
+        messages.append({"role": "assistant", "content": "📊 Generating final report..."})
+        final_report_text = ""
+        for result in agent.run_gradio_chat(
+            message=final_prompt,
+            history=[],
+            temperature=0.2,
+            max_new_tokens=MAX_NEW_TOKENS,
+            max_token=MAX_MODEL_TOKENS,
+            call_agent=False,
+            conversation=[],
+        ):
+            if isinstance(result, str):
+                final_report_text += result
+            elif isinstance(result, list):
+                for r in result:
                     if hasattr(r, "content"):
+                        final_report_text += r.content
+            elif hasattr(result, "content"):
+                final_report_text += result.content
+        cleaned = clean_response(final_report_text)
+        report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
+        with open(report_path, 'w') as f:
+            f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
+        messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{cleaned}"})
+        messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
+    except Exception as e:
+        messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
     return messages, report_path
 def create_ui(agent):
     with gr.Blocks(css="""
         html, body, .gradio-container {
             height: 100vh;
+            width: 100vw;
+            padding: 0;
+            margin: 0;
             font-family: 'Inter', sans-serif;
+            background: #ffffff;
         }
         .gr-button.primary {
+            background: #1e88e5;
+            color: #fff;
+            border: none;
+            border-radius: 6px;
             font-weight: 600;
         }
         .gr-button.primary:hover {
+            background: #1565c0;
+        }
+        .gr-chatbot {
+            border: 1px solid #e0e0e0;
+            background: #f9f9f9;
+            border-radius: 10px;
+            padding: 1rem;
+            font-size: 15px;
+        }
+        .gr-markdown, .gr-file-upload {
+            background: #ffffff;
+            border-radius: 8px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.08);
         }
     """) as demo:
+        gr.Markdown("""
+        <h2 style='color:#1e88e5'>🩺 Patient History AI Assistant</h2>
+        <p>Upload a clinical Excel file and receive an advanced diagnostic summary.</p>
+        """)
         with gr.Row():
             with gr.Column(scale=3):
+                chatbot = gr.Chatbot(label="Clinical Assistant", height=700, type="messages")
             with gr.Column(scale=1):
+                file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
+                analyze_btn = gr.Button("🧠 Analyze", variant="primary")
                 report_output = gr.File(label="Download Report", visible=False, interactive=False)
         chatbot_state = gr.State(value=[])