import sys import os import pandas as pd import pdfplumber import json import gradio as gr from typing import List from concurrent.futures import ThreadPoolExecutor, as_completed import hashlib # ✅ Fix: Add src to Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))) from txagent.txagent import TxAgent def sanitize_utf8(text: str) -> str: return text.encode("utf-8", "ignore").decode("utf-8") def clean_final_response(text: str) -> str: cleaned = text.replace("[TOOL_CALLS]", "").strip() responses = cleaned.split("[Final Analysis]") if len(responses) <= 1: return f"

{cleaned}

" panels = [] for i, section in enumerate(responses[1:], 1): final = section.strip() panels.append( f"
" f"
🧠 Final Analysis #{i}
" f"
{final.replace(chr(10), '
')}
" f"
" ) return "".join(panels) def file_hash(path): with open(path, "rb") as f: return hashlib.md5(f.read()).hexdigest() def convert_file_to_json(file_path: str, file_type: str) -> str: try: cache_dir = os.path.join("cache") os.makedirs(cache_dir, exist_ok=True) h = file_hash(file_path) cache_path = os.path.join(cache_dir, f"{h}.json") if os.path.exists(cache_path): return open(cache_path, "r", encoding="utf-8").read() if file_type == "csv": df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip") elif file_type in ["xls", "xlsx"]: try: df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str) except: df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str) elif file_type == "pdf": with pdfplumber.open(file_path) as pdf: text = "\n".join([page.extract_text() or "" for page in pdf.pages]) result = json.dumps({"filename": os.path.basename(file_path), "content": text.strip()}) open(cache_path, "w", encoding="utf-8").write(result) return result else: return json.dumps({"error": f"Unsupported file type: {file_type}"}) if df is None or df.empty: return json.dumps({"warning": f"No data extracted from: {file_path}"}) df = df.fillna("") content = df.astype(str).values.tolist() result = json.dumps({"filename": os.path.basename(file_path), "rows": content}) open(cache_path, "w", encoding="utf-8").write(result) return result except Exception as e: return json.dumps({"error": f"Error reading {os.path.basename(file_path)}: {str(e)}"}) def chunk_text(text: str, max_tokens: int = 6000) -> List[str]: chunks = [] words = text.split() chunk = [] token_count = 0 for word in words: token_count += len(word) // 4 + 1 if token_count > max_tokens: chunks.append(" ".join(chunk)) chunk = [word] token_count = len(word) // 4 + 1 else: chunk.append(word) if chunk: chunks.append(" ".join(chunk)) return chunks def create_ui(agent: TxAgent): with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("

📋 CPS: Clinical Patient Support System

") chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages") file_upload = gr.File( label="Upload Medical File", file_types=[".pdf", ".txt", ".docx", ".jpg", ".png", ".csv", ".xls", ".xlsx"], file_count="multiple" ) message_input = gr.Textbox(placeholder="Ask a biomedical question or just upload the files...", show_label=False) send_button = gr.Button("Send", variant="primary") conversation_state = gr.State([]) def handle_chat(message: str, history: list, conversation: list, uploaded_files: list, progress=gr.Progress()): context = ( "You are an expert clinical AI assistant reviewing medical form or interview data. " "Your job is to analyze this data and reason about any information or red flags that a human doctor might have overlooked. " "Provide a **detailed and structured response**, including examples, supporting evidence from the form, and clinical rationale for why these items matter. " "Ensure the output is informative and helpful for improving patient care. " "Do not hallucinate. Base the response only on the provided form content. " "End with a section labeled '[Final Analysis]' where you summarize key findings the doctor may have missed." ) try: history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": "⏳ Processing your request..."}) yield history extracted_text = "" if uploaded_files and isinstance(uploaded_files, list): for file in uploaded_files: if not hasattr(file, 'name'): continue path = file.name ext = path.split(".")[-1].lower() json_text = convert_file_to_json(path, ext) extracted_text += sanitize_utf8(json_text) + "\n" chunks = chunk_text(extracted_text.strip()) def process_chunk(i, chunk): chunked_prompt = ( f"{context}\n\n--- Uploaded File Content (Chunk {i+1}/{len(chunks)}) ---\n\n{chunk}\n\n" f"--- End of Chunk ---\n\nNow begin your analysis:" ) try: generator = agent.run_gradio_chat( message=chunked_prompt, history=[], temperature=0.3, max_new_tokens=1024, max_token=8192, call_agent=False, conversation=conversation, uploaded_files=uploaded_files, max_round=30 ) result = "" for update in generator: if update is None: print(f"[Warning] Empty response in chunk {i+1}") continue if isinstance(update, str): result += update elif isinstance(update, list): for msg in update: if hasattr(msg, 'content'): result += msg.content return result if result.strip() else f"[Chunk {i+1}] ⚠️ No response received." except Exception as err: print(f"[Error in chunk {i+1}] {err}") return f"[Chunk {i+1}] ❌ Failed to process due to error." with ThreadPoolExecutor(max_workers=min(8, len(chunks))) as executor: futures = [executor.submit(process_chunk, i, chunk) for i, chunk in enumerate(chunks)] results = [f.result() for f in as_completed(futures)] full_response = "\n\n".join(results) full_response = clean_final_response(full_response.strip()) history[-1] = {"role": "assistant", "content": full_response} yield history except Exception as chat_error: print(f"Chat handling error: {chat_error}") history[-1] = {"role": "assistant", "content": "❌ An error occurred while processing your request."} yield history inputs = [message_input, chatbot, conversation_state, file_upload] send_button.click(fn=handle_chat, inputs=inputs, outputs=chatbot) message_input.submit(fn=handle_chat, inputs=inputs, outputs=chatbot) gr.Examples([ ["Upload your medical form and ask what the doctor might've missed."], ["This patient was treated with antibiotics for UTI. What else should we check?"], ["Is there anything abnormal in the attached blood work report?"] ], inputs=message_input) return demo