Ali2206 commited on
Commit
3069ccd
·
verified ·
1 Parent(s): ac9926b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -31
app.py CHANGED
@@ -19,6 +19,7 @@ import speech_recognition as sr
19
  from gtts import gTTS
20
  from pydub import AudioSegment
21
  import PyPDF2
 
22
  from txagent.txagent import TxAgent
23
  from db.mongo import get_mongo_client
24
 
@@ -27,7 +28,7 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(
27
  logger = logging.getLogger("TxAgentAPI")
28
 
29
  # App
30
- app = FastAPI(title="TxAgent API", version="2.4.0") # Updated version for file upload support
31
 
32
  app.add_middleware(
33
  CORSMiddleware,
@@ -55,10 +56,6 @@ class VoiceOutputRequest(BaseModel):
55
  slow: bool = False
56
  return_format: str = "mp3" # mp3 or base64
57
 
58
- class PatientReportAnalysisRequest(BaseModel):
59
- patient_id: str
60
- file_type: str = "text" # text, pdf, audio
61
-
62
  # Enums
63
  class RiskLevel(str, Enum):
64
  NONE = "none"
@@ -191,6 +188,10 @@ def compute_patient_data_hash(data: dict) -> str:
191
  serialized = json.dumps(data, sort_keys=True)
192
  return hashlib.sha256(serialized.encode()).hexdigest()
193
 
 
 
 
 
194
  def extract_text_from_pdf(pdf_data: bytes) -> str:
195
  """Extract text from a PDF file."""
196
  try:
@@ -203,18 +204,19 @@ def extract_text_from_pdf(pdf_data: bytes) -> str:
203
  logger.error(f"Error extracting text from PDF: {e}")
204
  raise HTTPException(status_code=400, detail="Failed to extract text from PDF")
205
 
206
- async def analyze_patient_report(patient_id: str, report_content: str, file_type: str):
207
  """Analyze a patient report and store results."""
208
  try:
209
- # Compute hash of report content
210
- report_data = {"patient_id": patient_id, "content": report_content, "file_type": file_type}
 
211
  report_hash = compute_patient_data_hash(report_data)
212
- logger.info(f"🧾 Analyzing report for patient: {patient_id}")
213
 
214
  # Check if analysis exists and hash matches
215
- existing_analysis = await analysis_collection.find_one({"patient_id": patient_id, "report_hash": report_hash})
216
  if existing_analysis:
217
- logger.info(f"✅ No changes in report data for {patient_id}, skipping analysis")
218
  return existing_analysis
219
 
220
  # Construct analysis prompt
@@ -246,7 +248,8 @@ async def analyze_patient_report(patient_id: str, report_content: str, file_type
246
 
247
  # Store analysis
248
  analysis_doc = {
249
- "patient_id": patient_id,
 
250
  "timestamp": datetime.utcnow(),
251
  "summary": structured_response,
252
  "suicide_risk": suicide_risk,
@@ -256,16 +259,16 @@ async def analyze_patient_report(patient_id: str, report_content: str, file_type
256
  }
257
 
258
  await analysis_collection.update_one(
259
- {"patient_id": patient_id, "report_hash": report_hash},
260
  {"$set": analysis_doc},
261
  upsert=True
262
  )
263
 
264
- # Create alert for high-risk cases
265
- if risk_level in [RiskLevel.MODERATE, RiskLevel.HIGH, RiskLevel.SEVERE]:
266
  await create_alert(patient_id, suicide_risk)
267
 
268
- logger.info(f"✅ Stored analysis for patient report {patient_id}")
269
  return analysis_doc
270
 
271
  except Exception as e:
@@ -314,6 +317,7 @@ async def analyze_patient(patient: dict):
314
  }
315
 
316
  analysis_doc = {
 
317
  "patient_id": patient_id,
318
  "timestamp": datetime.utcnow(),
319
  "summary": structured,
@@ -323,7 +327,7 @@ async def analyze_patient(patient: dict):
323
  }
324
 
325
  await analysis_collection.update_one(
326
- {"patient_id": patient_id},
327
  {"$set": analysis_doc},
328
  upsert=True
329
  )
@@ -401,7 +405,7 @@ async def status():
401
  return {
402
  "status": "running",
403
  "timestamp": datetime.utcnow().isoformat(),
404
- "version": "2.4.0",
405
  "features": ["chat", "voice-input", "voice-output", "patient-analysis", "report-upload"]
406
  }
407
 
@@ -420,7 +424,7 @@ async def get_patient_analysis_results(name: Optional[str] = Query(None)):
420
  analyses = await analysis_collection.find(query).sort("timestamp", -1).to_list(length=100)
421
  enriched_results = []
422
  for analysis in analyses:
423
- patient = await patients_collection.find_one({"fhir_id": analysis["patient_id"]})
424
  if patient:
425
  analysis["full_name"] = patient.get("full_name", "Unknown")
426
  analysis["_id"] = str(analysis["_id"])
@@ -540,20 +544,36 @@ async def voice_chat_endpoint(
540
 
541
  @app.post("/patient/upload-report")
542
  async def upload_patient_report(
543
- patient_id: str = Form(...),
544
- file_type: str = Form("text", description="Type of file: text, pdf, audio"),
545
  file: UploadFile = File(...)
546
  ):
547
  """Upload and analyze a patient report (text, PDF, or audio)."""
548
  try:
549
- # Validate patient ID
550
- patient = await patients_collection.find_one({"fhir_id": patient_id})
551
- if not patient:
552
- raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found")
553
-
554
- # Validate file type
555
- if file_type not in ["text", "pdf", "audio"]:
556
- raise HTTPException(status_code=400, detail="Invalid file type. Supported: text, pdf, audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
 
558
  # Read and process file
559
  file_content = await file.read()
@@ -565,10 +585,10 @@ async def upload_patient_report(
565
  elif file_type == "audio":
566
  report_content = recognize_speech(file_content, language="en-US")
567
  else:
568
- raise HTTPException(status_code=400, detail="Unsupported file type")
569
 
570
  # Analyze the report
571
- analysis = await analyze_patient_report(patient_id, report_content, file_type)
572
  return JSONResponse(content=analysis)
573
 
574
  except HTTPException:
 
19
  from gtts import gTTS
20
  from pydub import AudioSegment
21
  import PyPDF2
22
+ import mimetypes
23
  from txagent.txagent import TxAgent
24
  from db.mongo import get_mongo_client
25
 
 
28
  logger = logging.getLogger("TxAgentAPI")
29
 
30
  # App
31
+ app = FastAPI(title="TxAgent API", version="2.6.0") # Updated version for optional patient_id
32
 
33
  app.add_middleware(
34
  CORSMiddleware,
 
56
  slow: bool = False
57
  return_format: str = "mp3" # mp3 or base64
58
 
 
 
 
 
59
  # Enums
60
  class RiskLevel(str, Enum):
61
  NONE = "none"
 
188
  serialized = json.dumps(data, sort_keys=True)
189
  return hashlib.sha256(serialized.encode()).hexdigest()
190
 
191
+ def compute_file_content_hash(file_content: bytes) -> str:
192
+ """Compute SHA-256 hash of file content."""
193
+ return hashlib.sha256(file_content).hexdigest()
194
+
195
  def extract_text_from_pdf(pdf_data: bytes) -> str:
196
  """Extract text from a PDF file."""
197
  try:
 
204
  logger.error(f"Error extracting text from PDF: {e}")
205
  raise HTTPException(status_code=400, detail="Failed to extract text from PDF")
206
 
207
+ async def analyze_patient_report(patient_id: Optional[str], report_content: str, file_type: str, file_content: bytes):
208
  """Analyze a patient report and store results."""
209
  try:
210
+ # Use file content hash as identifier if no patient_id is provided
211
+ identifier = patient_id if patient_id else compute_file_content_hash(file_content)
212
+ report_data = {"identifier": identifier, "content": report_content, "file_type": file_type}
213
  report_hash = compute_patient_data_hash(report_data)
214
+ logger.info(f"🧾 Analyzing report for identifier: {identifier}")
215
 
216
  # Check if analysis exists and hash matches
217
+ existing_analysis = await analysis_collection.find_one({"identifier": identifier, "report_hash": report_hash})
218
  if existing_analysis:
219
+ logger.info(f"✅ No changes in report data for {identifier}, skipping analysis")
220
  return existing_analysis
221
 
222
  # Construct analysis prompt
 
248
 
249
  # Store analysis
250
  analysis_doc = {
251
+ "identifier": identifier,
252
+ "patient_id": patient_id, # May be None
253
  "timestamp": datetime.utcnow(),
254
  "summary": structured_response,
255
  "suicide_risk": suicide_risk,
 
259
  }
260
 
261
  await analysis_collection.update_one(
262
+ {"identifier": identifier, "report_hash": report_hash},
263
  {"$set": analysis_doc},
264
  upsert=True
265
  )
266
 
267
+ # Create alert for high-risk cases only if patient_id is provided
268
+ if patient_id and risk_level in [RiskLevel.MODERATE, RiskLevel.HIGH, RiskLevel.SEVERE]:
269
  await create_alert(patient_id, suicide_risk)
270
 
271
+ logger.info(f"✅ Stored analysis for identifier {identifier}")
272
  return analysis_doc
273
 
274
  except Exception as e:
 
317
  }
318
 
319
  analysis_doc = {
320
+ "identifier": patient_id,
321
  "patient_id": patient_id,
322
  "timestamp": datetime.utcnow(),
323
  "summary": structured,
 
327
  }
328
 
329
  await analysis_collection.update_one(
330
+ {"identifier": patient_id},
331
  {"$set": analysis_doc},
332
  upsert=True
333
  )
 
405
  return {
406
  "status": "running",
407
  "timestamp": datetime.utcnow().isoformat(),
408
+ "version": "2.6.0",
409
  "features": ["chat", "voice-input", "voice-output", "patient-analysis", "report-upload"]
410
  }
411
 
 
424
  analyses = await analysis_collection.find(query).sort("timestamp", -1).to_list(length=100)
425
  enriched_results = []
426
  for analysis in analyses:
427
+ patient = await patients_collection.find_one({"fhir_id": analysis.get("patient_id")})
428
  if patient:
429
  analysis["full_name"] = patient.get("full_name", "Unknown")
430
  analysis["_id"] = str(analysis["_id"])
 
544
 
545
  @app.post("/patient/upload-report")
546
  async def upload_patient_report(
547
+ patient_id: Optional[str] = Form(None),
 
548
  file: UploadFile = File(...)
549
  ):
550
  """Upload and analyze a patient report (text, PDF, or audio)."""
551
  try:
552
+ # Validate patient_id if provided
553
+ if patient_id:
554
+ patient = await patients_collection.find_one({"fhir_id": patient_id})
555
+ if not patient:
556
+ raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found")
557
+ logger.info(f"Uploading report for patient: {patient_id}")
558
+ else:
559
+ logger.info("Uploading report without patient ID")
560
+
561
+ # Determine file type from extension or MIME type
562
+ file_extension = file.filename.lower().split('.')[-1] if file.filename else ''
563
+ mime_type = file.content_type or mimetypes.guess_type(file.filename)[0] or 'application/octet-stream'
564
+ logger.info(f"Detected file extension: {file_extension}, MIME type: {mime_type}")
565
+
566
+ if file_extension in ['txt'] or mime_type.startswith('text'):
567
+ file_type = 'text'
568
+ elif file_extension == 'pdf' or mime_type == 'application/pdf':
569
+ file_type = 'pdf'
570
+ elif file_extension in ['wav', 'mp3', 'ogg', 'flac'] or mime_type.startswith('audio'):
571
+ file_type = 'audio'
572
+ else:
573
+ raise HTTPException(
574
+ status_code=400,
575
+ detail=f"Unsupported file type: {file_extension}. Supported: text (.txt), PDF (.pdf), audio (.wav, .mp3, .ogg, .flac)"
576
+ )
577
 
578
  # Read and process file
579
  file_content = await file.read()
 
585
  elif file_type == "audio":
586
  report_content = recognize_speech(file_content, language="en-US")
587
  else:
588
+ raise HTTPException(status_code=400, detail="Error processing file type")
589
 
590
  # Analyze the report
591
+ analysis = await analyze_patient_report(patient_id, report_content, file_type, file_content)
592
  return JSONResponse(content=analysis)
593
 
594
  except HTTPException: