CPS-Test-Mobile

Paused

App Files Files Community

CPS-Test-Mobile / app.py

Ali2206

Update app.py

a1a096d verified 4 months ago

raw

history blame

8.28 kB

	import sys, os, json, shutil, re, time, gc, hashlib
	import pandas as pd
	from datetime import datetime
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from typing import List, Tuple, Dict, Union

	import gradio as gr

	# Constants
	MAX_MODEL_TOKENS = 131072
	MAX_NEW_TOKENS = 4096
	MAX_CHUNK_TOKENS = 8192
	PROMPT_OVERHEAD = 300

	# Paths
	persistent_dir = "/data/hf_cache"
	model_cache_dir = os.path.join(persistent_dir, "txagent_models")
	tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
	file_cache_dir = os.path.join(persistent_dir, "cache")
	report_dir = os.path.join(persistent_dir, "reports")

	for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
	os.makedirs(d, exist_ok=True)

	os.environ["HF_HOME"] = model_cache_dir
	os.environ["TRANSFORMERS_CACHE"] = model_cache_dir

	current_dir = os.path.dirname(os.path.abspath(__file__))
	src_path = os.path.abspath(os.path.join(current_dir, "src"))
	sys.path.insert(0, src_path)

	from txagent.txagent import TxAgent

	def estimate_tokens(text: str) -> int:
	return len(text) // 4 + 1

	def clean_response(text: str) -> str:
	text = re.sub(r"\[.*?\]\|\bNone\b", "", text, flags=re.DOTALL)
	text = re.sub(r"\n{3,}", "\n\n", text)
	text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
	return text.strip()

	def extract_text_from_excel(path: str) -> str:
	all_text = []
	try:
	xls = pd.ExcelFile(path)
	for sheet in xls.sheet_names:
	df = xls.parse(sheet).astype(str).fillna("")
	rows = df.apply(lambda row: " \| ".join(row), axis=1)
	all_text += [f"[{sheet}] {line}" for line in rows]
	except Exception as e:
	raise ValueError(f"Error reading Excel file: {str(e)}")
	return "\n".join(all_text)

	def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
	effective_limit = max_tokens - PROMPT_OVERHEAD
	chunks, current, current_tokens = [], [], 0
	for line in text.split("\n"):
	tokens = estimate_tokens(line)
	if current_tokens + tokens > effective_limit:
	if current:
	chunks.append("\n".join(current))
	current, current_tokens = [line], tokens
	else:
	current.append(line)
	current_tokens += tokens
	if current:
	chunks.append("\n".join(current))
	return chunks

	def build_prompt(chunk: str) -> str:
	return f"""### Unstructured Clinical Records

	Analyze the clinical notes below and summarize with:
	- Diagnostic Patterns
	- Medication Issues
	- Missed Opportunities
	- Inconsistencies
	- Follow-up Recommendations

	---

	{chunk}

	---
	Respond concisely in bullet points with clinical reasoning."""

	def init_agent() -> TxAgent:
	tool_path = os.path.join(tool_cache_dir, "new_tool.json")
	if not os.path.exists(tool_path):
	shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)

	agent = TxAgent(
	model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
	rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
	tool_files_dict={"new_tool": tool_path},
	force_finish=True,
	enable_checker=True,
	step_rag_num=4,
	seed=100
	)
	agent.init_model()
	return agent

	def analyze_chunks_parallel(agent, chunks: List[str]) -> List[str]:
	results = [None] * len(chunks)

	def analyze(i, chunk):
	prompt = build_prompt(chunk)
	try:
	if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
	return i, f"❌ Chunk {i+1} too long. Skipped."
	response = ""
	for r in agent.run_gradio_chat(
	message=prompt,
	history=[],
	temperature=0.2,
	max_new_tokens=MAX_NEW_TOKENS,
	max_token=MAX_MODEL_TOKENS,
	call_agent=False,
	conversation=[]
	):
	if isinstance(r, str):
	response += r
	elif isinstance(r, list):
	for m in r:
	if hasattr(m, "content"):
	response += m.content
	elif hasattr(r, "content"):
	response += r.content
	gc.collect()
	return i, clean_response(response)
	except Exception as e:
	return i, f"❌ Error in chunk {i+1}: {str(e)}"

	with ThreadPoolExecutor(max_workers=4) as executor:
	futures = [executor.submit(analyze, i, chunk) for i, chunk in enumerate(chunks)]
	for future in as_completed(futures):
	i, res = future.result()
	results[i] = res

	return results

	def generate_final_summary(agent, combined: str) -> str:
	final_prompt = f"""Provide a structured medical report based on the following summaries:

	{combined}

	Respond in detailed medical bullet points."""
	full_report = ""
	for r in agent.run_gradio_chat(
	message=final_prompt,
	history=[],
	temperature=0.2,
	max_new_tokens=MAX_NEW_TOKENS,
	max_token=MAX_MODEL_TOKENS,
	call_agent=False,
	conversation=[]
	):
	if isinstance(r, str):
	full_report += r
	elif isinstance(r, list):
	for m in r:
	if hasattr(m, "content"):
	full_report += m.content
	elif hasattr(r, "content"):
	full_report += r.content
	return clean_response(full_report)

	def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
	if not file or not hasattr(file, "name"):
	messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
	return messages, None

	messages.append({"role": "user", "content": f"📂 Processing file: {os.path.basename(file.name)}"})
	try:
	extracted = extract_text_from_excel(file.name)
	chunks = split_text(extracted)
	messages.append({"role": "assistant", "content": f"🔍 Split into {len(chunks)} chunks. Analyzing..."})

	chunk_results = analyze_chunks_parallel(agent, chunks)
	valid = [res for res in chunk_results if not res.startswith("❌")]

	if not valid:
	messages.append({"role": "assistant", "content": "❌ No valid chunk outputs."})
	return messages, None

	summary = generate_final_summary(agent, "\n\n".join(valid))
	report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
	with open(report_path, 'w', encoding='utf-8') as f:
	f.write(f"# 🧠 Final Medical Report\n\n{summary}")

	messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{summary}"})
	messages.append({"role": "assistant", "content": f"✅ Report saved: {os.path.basename(report_path)}"})
	return messages, report_path

	except Exception as e:
	messages.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
	return messages, None

	def create_ui(agent):
	with gr.Blocks() as demo:
	gr.Markdown("<h2 style='color:#1e88e5'>🩺 Patient AI Assistant</h2><p>Upload a clinical Excel file and receive a diagnostic summary.</p>")
	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(label="Assistant", height=700, type="messages")
	with gr.Column(scale=1):
	upload = gr.File(label="Upload Excel", file_types=[".xlsx"])
	analyze = gr.Button("🧠 Analyze", variant="primary")
	download = gr.File(label="Download Report", visible=False, interactive=False)

	state = gr.State(value=[])

	def handle_analysis(file, chat):
	messages, report_path = process_report(agent, file, chat)
	return messages, gr.update(visible=bool(report_path), value=report_path), messages

	analyze.click(fn=handle_analysis, inputs=[upload, state], outputs=[chatbot, download, state])

	return demo

	if __name__ == "__main__":
	try:
	agent = init_agent()
	ui = create_ui(agent)
	ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
	except Exception as err:
	print(f"Startup failed: {err}")
	sys.exit(1)