TxAgent-Api / utils.py
Ali2206's picture
Create utils.py
5eef1a8 verified
raw
history blame
2.64 kB
import re
import hashlib
import io
import json
from datetime import datetime
from typing import Dict, List, Tuple
from bson import ObjectId
import logging
from config import logger
def clean_text_response(text: str) -> str:
text = re.sub(r'\n\s*\n', '\n\n', text)
text = re.sub(r'[ ]+', ' ', text)
return text.replace("**", "").replace("__", "").strip()
def extract_section(text: str, heading: str) -> str:
try:
pattern = rf"{re.escape(heading)}:\s*\n(.*?)(?=\n[A-Z][^\n]*:|\Z)"
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
return match.group(1).strip() if match else ""
except Exception as e:
logger.error(f"Section extraction failed for heading '{heading}': {e}")
return ""
def structure_medical_response(text: str) -> Dict:
def extract_improved(text: str, heading: str) -> str:
patterns = [
rf"{re.escape(heading)}:\s*\n(.*?)(?=\n\s*\n|\Z)",
rf"\*\*{re.escape(heading)}\*\*:\s*\n(.*?)(?=\n\s*\n|\Z)",
rf"{re.escape(heading)}[\s\-]+(.*?)(?=\n\s*\n|\Z)",
rf"\n{re.escape(heading)}\s*\n(.*?)(?=\n\s*\n|\Z)"
]
for pattern in patterns:
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
if match:
content = match.group(1).strip()
content = re.sub(r'^\s*[\-\*]\s*', '', content, flags=re.MULTILINE)
return content
return ""
text = text.replace('**', '').replace('__', '')
return {
"summary": extract_improved(text, "Summary of Patient's Medical History") or
extract_improved(text, "Summarize the patient's medical history"),
"risks": extract_improved(text, "Identify Risks or Red Flags") or
extract_improved(text, "Risks or Red Flags"),
"missed_issues": extract_improved(text, "Missed Diagnoses or Treatments") or
extract_improved(text, "What the doctor might have missed"),
"recommendations": extract_improved(text, "Suggest Next Clinical Steps") or
extract_improved(text, "Suggested Clinical Actions")
}
def serialize_patient(patient: dict) -> dict:
patient_copy = patient.copy()
if "_id" in patient_copy:
patient_copy["_id"] = str(patient_copy["_id"])
return patient_copy
def compute_patient_data_hash(data: dict) -> str:
serialized = json.dumps(data, sort_keys=True)
return hashlib.sha256(serialized.encode()).hexdigest()
def compute_file_content_hash(file_content: bytes) -> str:
return hashlib.sha256(file_content).hexdigest()