Bonosa2 commited on
Commit
fc51947
Β·
verified Β·
1 Parent(s): 9b82c5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +499 -646
app.py CHANGED
@@ -1,718 +1,571 @@
1
- # -*- coding: utf-8
 
 
2
 
3
- # πŸ₯ Gemma 3N SOAP Note Generator
4
- #-*- coding: utf-8
5
-
6
- # πŸ₯ Gemma 3N SOAP Note Generator
7
-
8
-
9
- # Enable widgets
10
-
11
-
12
-
13
- # Enable widgets
14
-
15
-
16
- import torch
17
- from transformers import AutoProcessor, AutoModelForImageTextToText
18
- import gradio as gr
19
- import ipywidgets as widgets
20
- from IPython.display import display, clear_output
21
- import io
22
- import base64
23
- from datetime import datetime
24
- from huggingface_hub import login
25
- import getpass
26
-
27
- # Authenticate with HuggingFace
28
- # Replace the authentication section (lines around the getpass part) with this:
29
-
30
- # Import libraries and authenticate
31
  import torch
32
- from transformers import AutoProcessor, AutoModelForImageTextToText
33
  import gradio as gr
34
- import ipywidgets as widgets
35
- from IPython.display import display, clear_output
36
  import io
37
  import base64
38
  from datetime import datetime
39
- from huggingface_hub import login
40
  import os
41
  import easyocr
42
- from PIL import Image
43
-
44
-
45
-
46
- # Authenticate with HuggingFace
47
- print("πŸ” HuggingFace Authentication Required")
48
-
49
- # Try to get token from environment variable first (for production/HF Spaces)
50
- hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN')
51
 
52
- if hf_token:
53
- print("βœ… Found HF token in environment variables")
54
  try:
55
- login(token=hf_token)
56
- print("βœ… Successfully authenticated with HuggingFace!")
57
- except Exception as e:
58
- print(f"❌ Authentication failed: {e}")
59
- print("Please check your token and try again.")
60
- # Check GPU availability
61
- import os
62
- os.environ["CUDA_VISIBLE_DEVICES"] = "" # Hide all GPUs from PyTorch
63
-
64
- device = "cuda" if torch.cuda.is_available() else "cpu"
65
- device = "cpu"
66
- print(f"πŸ–₯️ Using device: {device}")
67
- if torch.cuda.is_available():
68
- print(f"πŸš€ GPU: {torch.cuda.get_device_name(0)}")
69
- print(f"πŸ’Ύ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
70
- else:
71
- print("⚠️ Running on CPU - this will be slower")
72
-
73
- # Load Gemma 3N model
74
- print("πŸ“‘ Loading Gemma 3N model...")
75
- model_id = "google/gemma-3n-e2b-it"
76
-
77
- print("πŸ”§ Loading processor...")
78
- processor = AutoProcessor.from_pretrained(model_id)
79
-
80
- print("πŸ€– Loading Gemma 3N model (this may take a few minutes)...")
81
- model = AutoModelForImageTextToText.from_pretrained(
82
- model_id,
83
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
84
- low_cpu_mem_usage=True,
85
- ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- print("βœ… Gemma 3N model loaded successfully!")
88
- print(f"πŸ“Š Model size: ~2.9GB")
89
- print(f"🎯 Ready for SOAP note generation!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- # SOAP Note Generation Function
92
- def generate_soap_note(doctor_notes, include_timestamp=True):
93
- """
94
- Generate a SOAP note from unstructured doctor's notes
95
- """
96
  if not doctor_notes.strip():
97
  return "❌ Please enter some medical notes to process."
98
-
99
- prompt = f"""You are a medical AI assistant. Convert the following unstructured doctor's notes into a professional SOAP note format.
100
-
101
- Doctor's Notes:
 
 
 
 
 
102
  {doctor_notes}
103
 
104
- Please generate a structured SOAP note with the following sections:
105
- - SUBJECTIVE: Patient's reported symptoms and history
106
- - OBJECTIVE: Physical examination findings, vital signs, and test results
107
- - ASSESSMENT: Clinical diagnosis and reasoning
108
- - PLAN: Treatment plan, medications, and follow-up
109
 
110
- Format your response as a proper medical SOAP note with specific details extracted from the notes."""
 
 
111
 
112
- try:
113
- # Process input
114
- inputs = processor(text=prompt, return_tensors="pt").to(device)
115
 
116
- # Generate response
117
- print("πŸ”„ Generating SOAP note with Gemma 3N...")
 
 
 
 
 
 
 
 
 
118
  with torch.no_grad():
119
  outputs = model.generate(
120
  **inputs,
121
- max_new_tokens=512,
122
- temperature=0.3, # Lower temperature for medical precision
 
123
  do_sample=True,
124
- pad_token_id=processor.tokenizer.eos_token_id
 
 
125
  )
126
-
127
  # Decode response
128
- generated_text = processor.decode(outputs[0], skip_special_tokens=True)
129
-
130
- # Extract only the generated part (remove the prompt)
131
- soap_response = generated_text[len(prompt):].strip()
132
-
133
- # Add header if requested
 
 
 
 
 
 
134
  if include_timestamp:
135
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
136
- header = f"""πŸ“‹ SOAP NOTE - Generated by Gemma 3N
137
  πŸ• Timestamp: {timestamp}
138
- πŸ€– Model: google/gemma-3n-e2b-it
139
- πŸ”’ Processed locally on device: {device.upper()}
 
140
 
141
  {'='*60}
142
  """
143
  return header + soap_response
144
-
145
  return soap_response
146
-
147
  except Exception as e:
148
- return f"❌ Error generating SOAP note: {str(e)}"
149
-
150
- print("βœ… SOAP generation function ready!")
151
-
152
- """## πŸ“ Interactive SOAP Note Generator
153
- ### Enter medical notes below and generate professional SOAP documentation
154
- """
155
-
156
- # Create interactive widgets
157
- print("🎨 Creating interactive interface...")
158
-
159
- # Text input area
160
- notes_input = widgets.Textarea(
161
- value='',
162
- placeholder='Enter unstructured doctor notes here...\n\nExample:\nPatient John Smith, 45yo male, came in complaining of chest pain for 2 days. Pain is sharp, 7/10 intensity, worse with movement. Vital signs: BP 140/90, HR 88, Temp 98.6F...',
163
- description='Medical Notes:',
164
- layout=widgets.Layout(width='100%', height='200px')
165
- )
166
-
167
- # File upload widget
168
- file_upload = widgets.FileUpload(
169
- accept='.txt,.doc,.docx,.pdf',
170
- multiple=False,
171
- description='Or upload file:',
172
- layout=widgets.Layout(width='300px')
173
- )
174
-
175
- # Generate button
176
- generate_btn = widgets.Button(
177
- description='πŸ€– Generate SOAP Note',
178
- button_style='primary',
179
- layout=widgets.Layout(width='200px', height='40px')
180
- )
181
-
182
- # Output area
183
- output_area = widgets.HTML(
184
- value='<p style="color: #666;">πŸ“‹ Ready to generate SOAP notes! Enter medical notes above or upload a file.</p>',
185
- layout=widgets.Layout(width='100%', height='400px', overflow='auto', border='1px solid #ddd', padding='10px')
186
- )
187
-
188
- # Example buttons
189
- example1_btn = widgets.Button(description='πŸ“‹ Chest Pain Example', button_style='info', layout=widgets.Layout(width='180px'))
190
- example2_btn = widgets.Button(description='🩺 Diabetes Follow-up', button_style='info', layout=widgets.Layout(width='180px'))
191
- example3_btn = widgets.Button(description='πŸ‘Ά Pediatric Visit', button_style='info', layout=widgets.Layout(width='180px'))
192
-
193
- # Clear button
194
- clear_btn = widgets.Button(description='πŸ—‘οΈ Clear', button_style='warning', layout=widgets.Layout(width='100px'))
195
-
196
- print("βœ… Interface widgets created!")
197
-
198
- # Example medical notes
199
- examples = {
200
- 'chest_pain': """Patient John Smith, 45yo male, came in complaining of chest pain for 2 days. Pain is sharp, 7/10 intensity, worse with movement. No radiation to arms. Vital signs: BP 140/90, HR 88, Temp 98.6F, RR 16, O2 sat 98%. Physical exam shows tenderness over left chest wall, no murmurs. EKG normal sinus rhythm. Chest X-ray clear. Diagnosed with costochondritis. Prescribed ibuprofen 600mg TID and advised rest. Follow up in 1 week if symptoms persist.""",
201
-
202
- 'diabetes': """Sarah Johnson, 62yo female with Type 2 diabetes, here for routine follow-up. Says blood sugars have been running high lately, 180-220 mg/dL. Taking metformin 1000mg BID. Diet has been poor due to holiday stress. Weight increased 5 lbs since last visit. BP 150/85, BMI 32. HbA1c 8.2% (was 7.1% 3 months ago). Feet exam normal, no neuropathy. Plan to increase metformin to 1000mg TID, refer to nutritionist, recheck labs in 3 months.""",
203
-
204
- 'pediatric': """Tommy Rodriguez, 8yo male, brought by mother for fever and cough x3 days. Fever up to 102F, productive cough with yellow sputum. Decreased appetite, no vomiting or diarrhea. Vital signs: Temp 101.2F, HR 110, RR 24, BP 95/60. Exam shows bilateral crackles in lower lobes, no wheeze. Throat clear. Diagnosed with bacterial pneumonia. Prescribed amoxicillin 500mg BID x10 days. Return if fever persists >48 hours on antibiotics."""
205
- }
206
-
207
- # Event handlers
208
- def on_generate_click(b):
209
- with output_area:
210
- output_area.value = '<p style="color: #007bff;">πŸ”„ Processing with Gemma 3N... Please wait...</p>'
211
-
212
- # Get text from input or uploaded file
213
- text_to_process = notes_input.value
214
-
215
- # Check if file was uploaded
216
- if file_upload.value and len(file_upload.value) > 0:
217
- try:
218
- uploaded_file = list(file_upload.value.values())[0]
219
- file_content = uploaded_file['content'].decode('utf-8')
220
- text_to_process = file_content
221
- notes_input.value = file_content # Show in text area
222
- except Exception as e:
223
- output_area.value = f'<p style="color: #dc3545;">❌ Error reading file: {str(e)}</p>'
224
- return
225
-
226
- if not text_to_process.strip():
227
- output_area.value = '<p style="color: #dc3545;">❌ Please enter medical notes or upload a file!</p>'
228
- return
229
-
230
- # Generate SOAP note
231
- soap_note = generate_soap_note(text_to_process)
232
-
233
- # Format output as HTML
234
- formatted_output = f'<pre style="font-family: monospace; font-size: 12px; line-height: 1.4; white-space: pre-wrap;">{soap_note}</pre>'
235
- output_area.value = formatted_output
236
-
237
- def on_example1_click(b):
238
- notes_input.value = examples['chest_pain']
239
- output_area.value = '<p style="color: #28a745;">βœ… Chest pain example loaded! Click "Generate SOAP Note" to process.</p>'
240
-
241
- def on_example2_click(b):
242
- notes_input.value = examples['diabetes']
243
- output_area.value = '<p style="color: #28a745;">βœ… Diabetes follow-up example loaded! Click "Generate SOAP Note" to process.</p>'
244
-
245
- def on_example3_click(b):
246
- notes_input.value = examples['pediatric']
247
- output_area.value = '<p style="color: #28a745;">βœ… Pediatric example loaded! Click "Generate SOAP Note" to process.</p>'
248
-
249
- def on_clear_click(b):
250
- notes_input.value = ''
251
- file_upload.value = ()
252
- output_area.value = '<p style="color: #666;">πŸ“‹ Ready to generate SOAP notes! Enter medical notes above or upload a file.</p>'
253
-
254
- # Bind event handlers
255
- generate_btn.on_click(on_generate_click)
256
- example1_btn.on_click(on_example1_click)
257
- example2_btn.on_click(on_example2_click)
258
- example3_btn.on_click(on_example3_click)
259
- clear_btn.on_click(on_clear_click)
260
-
261
- print("βœ… Event handlers configured!")
262
-
263
- # Define example medical notes first
264
- example_notes_1 = """
265
- Patient: John Smith, 45-year-old male
266
- Chief Complaint: Chest pain for 2 hours
267
- History: Patient reports sudden onset of sharp chest pain while at work. Pain is 7/10 intensity, located substernal, radiating to left arm. Associated with shortness of breath and diaphoresis. No previous cardiac history. Denies nausea or vomiting.
268
- Physical Exam: VS: BP 150/90, HR 110, RR 22, O2 Sat 96% on RA. Patient appears anxious and diaphoretic. Heart: Regular rhythm, no murmurs. Lungs: Clear bilaterally. Extremities: No edema.
269
- Assessment: Acute chest pain, rule out myocardial infarction
270
- Plan: EKG, cardiac enzymes, chest X-ray, aspirin 325mg, continuous cardiac monitoring
271
- """
272
-
273
- example_notes_2 = """
274
- Patient: Sarah Johnson, 28-year-old female
275
- Chief Complaint: Severe headache and fever
276
- History: 3-day history of progressive headache, fever up to 101.5Β°F, photophobia, and neck stiffness. Patient reports this is the worst headache of her life. No recent travel or sick contacts. No rash noted.
277
- Physical Exam: VS: T 101.2Β°F, BP 130/80, HR 95, RR 18. Patient appears ill and photophobic. HEENT: Pupils equal and reactive. Neck: Stiff with positive Kernig's sign. Neurologic: Alert and oriented x3, no focal deficits.
278
- Assessment: Suspected meningitis
279
- Plan: Lumbar puncture, blood cultures, empiric antibiotics, supportive care
280
- """
281
 
282
- example_notes_3 = """
283
- Patient: Robert Davis, 62-year-old male
284
- Chief Complaint: Shortness of breath and leg swelling
285
- History: 2-week history of progressive dyspnea on exertion, orthopnea, and bilateral lower extremity edema. Patient has history of hypertension and diabetes. Reports sleeping on 3 pillows due to breathing difficulty.
286
- Physical Exam: VS: BP 140/85, HR 88, RR 24, O2 Sat 92% on RA. Heart: S3 gallop present, JVD elevated. Lungs: Bilateral rales in lower fields. Extremities: 2+ pitting edema bilaterally.
287
- Assessment: Congestive heart failure exacerbation
288
- Plan: Chest X-ray, BNP, echocardiogram, furosemide, ACE inhibitor, daily weights
289
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- # Event handlers
292
- def on_generate_click(b):
293
- try:
294
- # Update the HTML widget directly
295
- output_area.value = '<p style="color: #007bff;">πŸ”„ Processing with Gemma 3N... Please wait...</p>'
296
-
297
- # Get input text
298
- input_text = notes_input.value.strip()
299
-
300
- # Check if file was uploaded
301
- if file_upload.value:
302
- try:
303
- # Process uploaded file
304
- uploaded_file = list(file_upload.value.values())[0]
305
- file_content = uploaded_file['content'].decode('utf-8')
306
- input_text = file_content
307
- except Exception as upload_error:
308
- output_area.value = f'<p style="color: #ff6b6b;">❌ File upload error: {str(upload_error)}</p>'
309
- return
310
-
311
- if not input_text:
312
- output_area.value = '<p style="color: #ff6b6b;">⚠️ Please enter medical notes or upload a file first!</p>'
313
- return
314
-
315
- # Check if generate_soap_note function exists
316
- if 'generate_soap_note' not in globals():
317
- output_area.value = '<p style="color: #ff6b6b;">❌ Error: generate_soap_note function not found. Please define it first.</p>'
318
- return
319
-
320
- # Generate SOAP note using Gemma
321
- soap_note = generate_soap_note(input_text)
322
-
323
- # Escape HTML in soap_note to prevent rendering issues
324
- import html
325
- escaped_soap_note = html.escape(soap_note)
326
-
327
- # Display result
328
- output_area.value = f'''
329
- <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #28a745;">
330
- <h4 style="color: #28a745; margin-top: 0;">βœ… Generated SOAP Note:</h4>
331
- <pre style="white-space: pre-wrap; font-family: 'Courier New', monospace; background: white; padding: 15px; border-radius: 5px; border: 1px solid #ddd;">{escaped_soap_note}</pre>
332
- </div>
333
- '''
334
-
335
- except Exception as e:
336
- import traceback
337
- error_details = traceback.format_exc()
338
- output_area.value = f'''
339
- <div style="color: #ff6b6b; background: #ffe6e6; padding: 15px; border-radius: 5px;">
340
- <h4>❌ Error Details:</h4>
341
- <p><strong>Error:</strong> {str(e)}</p>
342
- <details>
343
- <summary>Click for full traceback</summary>
344
- <pre style="font-size: 12px; background: #fff; padding: 10px; border-radius: 3px; margin-top: 10px;">{error_details}</pre>
345
- </details>
346
- </div>
347
- '''
348
-
349
- def on_clear_click(b):
350
  try:
351
- notes_input.value = ""
352
- file_upload.value = ()
353
- output_area.value = '<p>πŸ“‹ Ready to generate SOAP notes! Enter medical notes above or upload a file.</p>'
 
354
  except Exception as e:
355
- output_area.value = f'<p style="color: #ff6b6b;">❌ Clear error: {str(e)}</p>'
356
-
357
- def on_example_click(example_text):
358
- def handler(b):
359
- try:
360
- notes_input.value = example_text
361
- output_area.value = '<p style="color: #28a745;">πŸ“‹ Example loaded! Click "Generate SOAP Note" to process.</p>'
362
- except Exception as e:
363
- output_area.value = f'<p style="color: #ff6b6b;">❌ Example load error: {str(e)}</p>'
364
- return handler
365
-
366
- # Connect event handlers to buttons
367
- try:
368
- generate_btn.on_click(on_generate_click)
369
- clear_btn.on_click(on_clear_click)
370
- example1_btn.on_click(on_example_click(example_notes_1))
371
- example2_btn.on_click(on_example_click(example_notes_2))
372
- example3_btn.on_click(on_example_click(example_notes_3))
373
-
374
- print("βœ… Event handlers connected successfully!")
375
- print("πŸ“‹ Example notes loaded:")
376
- print(" - Example 1: Chest pain case")
377
- print(" - Example 2: Suspected meningitis")
378
- print(" - Example 3: Heart failure")
379
-
380
- except Exception as e:
381
- print(f"❌ Error connecting event handlers: {str(e)}")
382
- import traceback
383
- traceback.print_exc()
384
-
385
- """## 🌐 Alternative: Gradio Web Interface
386
- ### Run this cell for a shareable web interface
387
- """
388
-
389
- # Install required packages for image processing and OCR
390
-
391
- import gradio as gr
392
- import torch
393
- from PIL import Image
394
- import pytesseract
395
- import cv2
396
- import numpy as np
397
- import easyocr
398
- import io
399
-
400
- # First, make sure you have the examples dictionary defined
401
- examples = {
402
- 'chest_pain': """Patient: John Smith, 45-year-old male
403
- Chief Complaint: Chest pain for 2 hours
404
- History: Patient reports sudden onset of sharp chest pain while at work. Pain is 7/10 intensity, located substernal, radiating to left arm. Associated with shortness of breath and diaphoresis. No previous cardiac history. Denies nausea or vomiting.
405
- Physical Exam: VS: BP 150/90, HR 110, RR 22, O2 Sat 96% on RA. Patient appears anxious and diaphoretic. Heart: Regular rhythm, no murmurs. Lungs: Clear bilaterally. Extremities: No edema.
406
- Assessment: Acute chest pain, rule out myocardial infarction
407
- Plan: EKG, cardiac enzymes, chest X-ray, aspirin 325mg, continuous cardiac monitoring""",
408
-
409
- 'diabetes': """Patient: Maria Garcia, 52-year-old female
410
- Chief Complaint: Increased thirst and frequent urination for 3 weeks
411
- History: Patient reports polyuria, polydipsia, and unintentional weight loss of 10 lbs over past month. Family history of diabetes. Denies fever, abdominal pain, or vision changes.
412
- Physical Exam: VS: BP 140/85, HR 88, RR 16, BMI 28. Patient appears well but slightly dehydrated. HEENT: Dry mucous membranes. Cardiovascular: Regular rate and rhythm. Extremities: No diabetic foot changes noted.
413
- Assessment: New onset diabetes mellitus, likely Type 2
414
- Plan: HbA1c, fasting glucose, comprehensive metabolic panel, diabetic education, metformin initiation""",
415
-
416
- 'pediatric': """Patient: Emma Thompson, 8-year-old female
417
- Chief Complaint: Fever and sore throat for 2 days
418
- History: Mother reports fever up to 102Β°F, sore throat, difficulty swallowing, and decreased appetite. No cough or runny nose. Several classmates have been sick with similar symptoms.
419
- Physical Exam: VS: T 101.8Β°F, HR 110, RR 20, O2 Sat 99%. Patient appears mildly ill but alert. HEENT: Throat erythematous with tonsillar exudate, anterior cervical lymphadenopathy. Heart and lungs: Normal.
420
- Assessment: Streptococcal pharyngitis (probable)
421
- Plan: Rapid strep test, throat culture, amoxicillin if positive, supportive care, return if worsening"""
422
- }
423
-
424
- # Initialize EasyOCR reader (better for handwritten text)
425
- try:
426
- ocr_reader = easyocr.Reader(['en'])
427
- print("βœ… EasyOCR initialized successfully")
428
- except:
429
- ocr_reader = None
430
- print("⚠️ EasyOCR not available, using Tesseract only")
431
-
432
- def preprocess_image_for_ocr(image):
433
- """
434
- Preprocess image to improve OCR accuracy
435
- """
436
- # Convert PIL Image to numpy array
437
- img_array = np.array(image)
438
-
439
- # Convert to grayscale if needed
440
- if len(img_array.shape) == 3:
441
- gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
442
- else:
443
- gray = img_array
444
-
445
- # Apply image preprocessing for better OCR
446
- # 1. Resize image if too small
447
- height, width = gray.shape
448
- if height < 300 or width < 300:
449
- scale_factor = max(300/height, 300/width)
450
- new_width = int(width * scale_factor)
451
- new_height = int(height * scale_factor)
452
- gray = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
453
-
454
- # 2. Noise removal
455
- denoised = cv2.medianBlur(gray, 3)
456
 
457
- # 3. Contrast enhancement
458
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
459
- enhanced = clahe.apply(denoised)
460
-
461
- # 4. Thresholding
462
- _, thresh = cv2.threshold(enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
463
-
464
- return thresh
465
-
466
- def extract_text_from_image(image):
467
- """
468
- Extract text from image using multiple OCR methods
469
- """
470
  if image is None:
471
  return "❌ No image provided"
472
-
473
  try:
474
- # Preprocess image
475
- processed_img = preprocess_image_for_ocr(image)
476
-
477
- # Method 1: Try EasyOCR (better for handwritten text)
 
 
478
  if ocr_reader is not None:
479
  try:
480
- # Convert back to PIL Image for EasyOCR
481
- pil_img = Image.fromarray(processed_img)
482
- results = ocr_reader.readtext(np.array(pil_img))
483
-
484
- # Extract text from EasyOCR results
485
- easyocr_text = ' '.join([result[1] for result in results])
486
-
487
- if len(easyocr_text.strip()) > 20: # If we got good results
488
- return clean_extracted_text(easyocr_text)
489
-
490
  except Exception as e:
491
  print(f"EasyOCR failed: {e}")
492
-
493
- # Method 2: Tesseract OCR (fallback)
494
  try:
495
- # Configure Tesseract for medical text
496
- custom_config = r'--oem 3 --psm 6'
 
 
 
497
  tesseract_text = pytesseract.image_to_string(processed_img, config=custom_config)
498
-
499
- if len(tesseract_text.strip()) > 10:
500
- return clean_extracted_text(tesseract_text)
501
-
502
  except Exception as e:
503
  print(f"Tesseract failed: {e}")
504
-
505
- return "❌ Could not extract text from image. Please try a clearer image or enter text manually."
506
-
507
  except Exception as e:
508
  return f"❌ Error processing image: {str(e)}"
509
 
510
- def clean_extracted_text(text):
511
- """
512
- Clean up extracted text
513
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  # Remove excessive whitespace and empty lines
515
  lines = [line.strip() for line in text.split('\n') if line.strip()]
516
- cleaned_text = '\n'.join(lines)
517
-
518
- # Remove special characters that might interfere
519
- cleaned_text = cleaned_text.replace('|', '').replace('_', ' ')
520
-
521
- return cleaned_text.strip()
522
-
523
- def gradio_generate_soap(medical_notes, uploaded_image):
524
- """
525
- Modified Gradio interface function for SOAP generation from images
526
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  text_to_process = medical_notes.strip() if medical_notes else ""
528
-
529
- # If image is uploaded, extract text using OCR
530
  if uploaded_image is not None:
531
  try:
532
- print("πŸ” Extracting text from uploaded image...")
533
- extracted_text = extract_text_from_image(uploaded_image)
534
-
535
- # Check if OCR was successful
536
- if extracted_text.startswith("❌"):
537
- return extracted_text
538
-
539
- # Use extracted text if manual text is empty or append to manual text
540
- if not text_to_process:
541
- text_to_process = extracted_text
542
  else:
543
- text_to_process = f"{text_to_process}\n\n--- Extracted from image ---\n{extracted_text}"
544
-
545
  except Exception as e:
546
  return f"❌ Error processing image: {str(e)}"
547
-
548
  if not text_to_process:
549
- return "❌ Please enter medical notes manually or upload a PNG/JPG image with medical text"
550
-
551
- # Check if generate_soap_note function exists
552
- if 'generate_soap_note' not in globals():
553
- return "❌ Error: generate_soap_note function not found. Please define it first."
554
-
555
  try:
556
- return generate_soap_note(text_to_process)
557
  except Exception as e:
558
  return f"❌ Error generating SOAP note: {str(e)}"
559
 
560
- # Create example images (you can replace these with actual medical note images)
561
- def create_example_image(text, filename):
562
- """
563
- Create example images from text (for demonstration)
564
- """
565
- from PIL import Image, ImageDraw, ImageFont
566
-
567
- # Create a white image
568
- img = Image.new('RGB', (800, 600), color='white')
569
- draw = ImageDraw.Draw(img)
570
-
571
- try:
572
- # Try to use a default font
573
- font = ImageFont.load_default()
574
- except:
575
- font = None
576
-
577
- # Add text to image
578
- lines = text.split('\n')
579
- y_offset = 20
580
- for line in lines[:15]: # Limit to first 15 lines
581
- draw.text((20, y_offset), line, fill='black', font=font)
582
- y_offset += 25
583
-
584
- return img
585
-
586
- # Create Gradio interface
587
- gradio_interface = gr.Interface(
588
- fn=gradio_generate_soap,
589
- inputs=[
590
- gr.Textbox(
591
- lines=6,
592
- placeholder="Enter medical notes manually (optional)...\n\nOr upload an image below and text will be extracted automatically.",
593
- label="πŸ“ Medical Notes (Manual Entry)"
594
- ),
595
- gr.Image(
596
- type="pil",
597
- label="πŸ“· Upload Medical Image (PNG/JPG only)",
598
- sources=["upload", "webcam"], # FIXED: Changed "camera" to "webcam"
599
- image_mode="RGB"
600
- )
601
- ],
602
- outputs=[
603
- gr.Textbox(
604
- lines=15,
605
- label="πŸ“‹ Generated SOAP Note",
606
- show_copy_button=True
607
- )
608
- ],
609
- title="πŸ₯ Medical Image SOAP Note Generator",
610
- description="""
611
- Transform medical images (PNG/JPG) into professional SOAP documentation using OCR + Gemma 3N model.
612
-
613
- πŸ“Έ **How to use:**
614
- 1. Upload a PNG or JPG image of medical notes (typed or handwritten)
615
- 2. Or enter text manually in the text box above
616
- 3. The system will extract text from images using OCR
617
- 4. Generate structured SOAP notes automatically
618
-
619
- πŸ’‘ **Tips for better OCR results:**
620
- - Use clear, high-resolution images
621
- - Ensure good lighting and contrast
622
- - Keep text horizontal (not tilted)
623
- - Handwritten text works best when clearly written
624
- """,
625
- examples=[
626
- [examples['chest_pain'], None],
627
- [examples['diabetes'], None],
628
- [examples['pediatric'], None]
629
- ],
630
- theme=gr.themes.Soft(),
631
- flagging_mode="never"
632
- )
633
-
634
- # Launch Gradio interface with flexible port selection
635
- print("πŸš€ Launching Medical Image SOAP Generator...")
636
 
637
- try:
638
- # Try different ports if 7860 is busy
639
- for port in [7860, 7861, 7862, 7863, 7864]:
640
- try:
641
- gradio_interface.launch(
642
- share=True, # Creates a public shareable link
643
- server_port=port,
644
- show_error=True,
645
- quiet=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
  )
647
- print(f"βœ… Interface launched successfully on port {port}")
648
- break
649
- except OSError as port_error:
650
- print(f"⚠️ Port {port} is busy, trying next port...")
651
- continue
652
- else:
653
- # If all ports are busy, let Gradio choose automatically
654
- print("πŸ”„ All preferred ports busy, letting Gradio choose automatically...")
655
- gradio_interface.launch(
656
- share=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  show_error=True,
658
  quiet=False
659
  )
660
-
661
- except Exception as e:
662
- print(f"❌ Error launching Gradio interface: {str(e)}")
663
- print("πŸ’‘ Alternative: Try running without share=True:")
664
- print("gradio_interface.launch(show_error=True)")
665
-
666
- print("🎯 Medical Image SOAP Generator ready!")
667
- print("πŸ“Έ Upload PNG/JPG images of medical notes for automatic text extraction and SOAP generation")
668
-
669
- """## πŸ“Š Usage Statistics & Model Info"""
670
-
671
- # Display model and system information
672
- import psutil
673
- import GPUtil
674
-
675
- def show_system_info():
676
- print("πŸ”§ SYSTEM INFORMATION")
677
- print("="*50)
678
- print(f"πŸ–₯️ Device: {device.upper()}")
679
- print(f"🧠 CPU Usage: {psutil.cpu_percent(interval=1):.1f}%")
680
- print(f"πŸ’Ύ RAM Usage: {psutil.virtual_memory().percent:.1f}%")
681
-
682
- if torch.cuda.is_available():
683
- try:
684
- gpus = GPUtil.getGPUs()
685
- if gpus:
686
- gpu = gpus[0]
687
- print(f"πŸš€ GPU: {gpu.name}")
688
- print(f"πŸ“Š GPU Usage: {gpu.load*100:.1f}%")
689
- print(f"πŸ”₯ GPU Memory: {gpu.memoryUsed}/{gpu.memoryTotal} MB ({gpu.memoryPercent:.1f}%)")
690
- print(f"🌑️ GPU Temp: {gpu.temperature}°C")
691
- except:
692
- print(f"πŸš€ GPU Memory: {torch.cuda.memory_allocated()/1e9:.1f}GB / {torch.cuda.memory_reserved()/1e9:.1f}GB")
693
-
694
- print("\nπŸ€– MODEL INFORMATION")
695
- print("="*50)
696
- print(f"πŸ“‘ Model ID: {model_id}")
697
- print(f"🎯 Model Type: Multimodal (Text, Image, Audio)")
698
- print(f"πŸ“Š Model Size: ~2.9GB")
699
- print(f"πŸ”’ Parameters: ~2.9B")
700
- print(f"🌍 Languages: 140 text + 35 multimodal")
701
- print(f"πŸ’½ Precision: {model.dtype}")
702
-
703
- print("\nβœ… Ready for SOAP note generation!")
704
-
705
- show_system_info()
706
-
707
- """---
708
- ## πŸ“‹ SOAP Note Format Reference
709
-
710
- **S - SUBJECTIVE**: Patient's reported symptoms and history
711
- **O - OBJECTIVE**: Observable clinical findings
712
- **A - ASSESSMENT**: Clinical diagnosis/impression
713
- **P - PLAN**: Treatment and follow-up plan
714
-
715
- ---
716
- *πŸ€– Powered by Google's Gemma 3N Model | πŸ”’ All processing performed locally*
717
- """
718
-
 
1
+ # -*- coding: utf-8 -*-
2
+ # πŸ₯ Gemma 3N SOAP Note Generator with Unsloth
3
+ # Optimized for offline medical documentation
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import torch
 
6
  import gradio as gr
 
 
7
  import io
8
  import base64
9
  from datetime import datetime
 
10
  import os
11
  import easyocr
12
+ from PIL import Image, ImageDraw, ImageFont
13
+ import cv2
14
+ import numpy as np
15
+ import psutil
 
 
 
 
 
16
 
17
+ # Import Unsloth for optimized Gemma 3n
 
18
  try:
19
+ from unsloth import FastModel
20
+ print("βœ… Unsloth imported successfully")
21
+ UNSLOTH_AVAILABLE = True
22
+ except ImportError:
23
+ print("❌ Unsloth not available. Install with: pip install unsloth")
24
+ UNSLOTH_AVAILABLE = False
25
+
26
+ # Device setup
27
+ def setup_device():
28
+ device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ print(f"πŸ–₯️ Using device: {device}")
30
+
31
+ if torch.cuda.is_available():
32
+ print(f"πŸš€ GPU: {torch.cuda.get_device_name(0)}")
33
+ print(f"πŸ’Ύ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
34
+ else:
35
+ print("⚠️ Running on CPU - will be slower but works offline")
36
+
37
+ return device
38
+
39
+ # Load Unsloth Gemma 3n model
40
+ def load_unsloth_gemma_model(device):
41
+ """Load optimized Gemma 3n model using Unsloth"""
42
+
43
+ if not UNSLOTH_AVAILABLE:
44
+ print("❌ Unsloth not available. Using fallback method.")
45
+ return load_fallback_model()
46
+
47
+ try:
48
+ print("πŸ“‘ Loading Unsloth-optimized Gemma 3n model...")
49
+
50
+ # Use the 4-bit quantized model for efficiency
51
+ model_name = "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit"
52
+
53
+ print(f"πŸ”§ Loading model: {model_name}")
54
+
55
+ # Load with Unsloth optimizations
56
+ model, tokenizer = FastModel.from_pretrained(
57
+ model_name=model_name,
58
+ dtype=None, # Auto-detect
59
+ max_seq_length=1024, # Good for medical notes
60
+ load_in_4bit=True, # 4-bit quantization for efficiency
61
+ full_finetuning=False,
62
+ )
63
+
64
+ print("βœ… Unsloth Gemma 3n model loaded successfully!")
65
+ print(f"πŸ“Š Model: {model_name}")
66
+ print(f"πŸ’Ύ Memory optimized with 4-bit quantization")
67
+ print(f"🎯 Ready for medical SOAP note generation!")
68
+
69
+ return model, tokenizer
70
+
71
+ except Exception as e:
72
+ print(f"❌ Error loading Unsloth model: {e}")
73
+ print("πŸ’‘ Trying fallback model...")
74
+ return load_fallback_model()
75
 
76
+ def load_fallback_model():
77
+ """Fallback model if Unsloth fails"""
78
+ try:
79
+ from transformers import AutoTokenizer, AutoModelForCausalLM
80
+
81
+ print("πŸ”„ Loading fallback model...")
82
+ model_name = "microsoft/DialoGPT-medium"
83
+
84
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
85
+ if tokenizer.pad_token is None:
86
+ tokenizer.pad_token = tokenizer.eos_token
87
+
88
+ model = AutoModelForCausalLM.from_pretrained(
89
+ model_name,
90
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
91
+ low_cpu_mem_usage=True
92
+ )
93
+
94
+ print("βœ… Fallback model loaded!")
95
+ return model, tokenizer
96
+
97
+ except Exception as e:
98
+ print(f"❌ Fallback model also failed: {e}")
99
+ return None, None
100
 
101
+ # Enhanced SOAP Note Generation with Gemma 3n
102
+ def generate_soap_note_gemma(doctor_notes, model=None, tokenizer=None, include_timestamp=True):
103
+ """Generate SOAP note using Gemma 3n model"""
104
+
 
105
  if not doctor_notes.strip():
106
  return "❌ Please enter some medical notes to process."
107
+
108
+ if model is None or tokenizer is None:
109
+ return generate_template_soap(doctor_notes, include_timestamp)
110
+
111
+ # Medical-specific prompt for Gemma 3n
112
+ prompt = f"""<bos><start_of_turn>user
113
+ You are a medical AI assistant specialized in creating SOAP notes. Convert the following unstructured medical notes into a professional SOAP note format.
114
+
115
+ Medical Notes:
116
  {doctor_notes}
117
 
118
+ Please create a structured SOAP note with these sections:
119
+ - SUBJECTIVE: Patient's reported symptoms, complaints, and relevant history
120
+ - OBJECTIVE: Physical examination findings, vital signs, and observable data
121
+ - ASSESSMENT: Clinical diagnosis, differential diagnosis, and medical reasoning
122
+ - PLAN: Treatment recommendations, medications, tests, and follow-up care
123
 
124
+ <end_of_turn>
125
+ <start_of_turn>model
126
+ SOAP NOTE:
127
 
128
+ SUBJECTIVE:"""
 
 
129
 
130
+ try:
131
+ # Tokenize input
132
+ inputs = tokenizer(
133
+ prompt,
134
+ return_tensors="pt",
135
+ truncation=True,
136
+ max_length=512,
137
+ padding=True
138
+ )
139
+
140
+ # Generate with optimized settings for medical text
141
  with torch.no_grad():
142
  outputs = model.generate(
143
  **inputs,
144
+ max_new_tokens=400,
145
+ temperature=0.2, # Lower temperature for medical precision
146
+ top_p=0.9,
147
  do_sample=True,
148
+ repetition_penalty=1.1,
149
+ pad_token_id=tokenizer.eos_token_id,
150
+ eos_token_id=tokenizer.eos_token_id
151
  )
152
+
153
  # Decode response
154
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
155
+
156
+ # Extract only the SOAP note part
157
+ if "SOAP NOTE:" in generated_text:
158
+ soap_response = generated_text.split("SOAP NOTE:")[1].strip()
159
+ else:
160
+ soap_response = generated_text[len(prompt):].strip()
161
+
162
+ # Clean up response
163
+ soap_response = clean_soap_response(soap_response)
164
+
165
+ # Add professional header
166
  if include_timestamp:
167
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
168
+ header = f"""πŸ“‹ SOAP NOTE - Generated by Gemma 3n
169
  πŸ• Timestamp: {timestamp}
170
+ πŸ€– Model: Unsloth-optimized Gemma 3n (4-bit quantized)
171
+ πŸ”’ Processed locally on device
172
+ πŸ₯ Medical Documentation Assistant
173
 
174
  {'='*60}
175
  """
176
  return header + soap_response
177
+
178
  return soap_response
179
+
180
  except Exception as e:
181
+ print(f"❌ Generation error: {e}")
182
+ return generate_template_soap(doctor_notes, include_timestamp)
183
+
184
+ def clean_soap_response(response):
185
+ """Clean and format SOAP note response"""
186
+
187
+ # Remove any incomplete sentences at the end
188
+ lines = response.split('\n')
189
+ cleaned_lines = []
190
+
191
+ for line in lines:
192
+ line = line.strip()
193
+ if line:
194
+ # Ensure proper SOAP section headers
195
+ if line.upper().startswith(('SUBJECTIVE', 'OBJECTIVE', 'ASSESSMENT', 'PLAN')):
196
+ if not line.endswith(':'):
197
+ line += ':'
198
+ cleaned_lines.append(f"\n{line}")
199
+ else:
200
+ cleaned_lines.append(line)
201
+
202
+ return '\n'.join(cleaned_lines).strip()
203
+
204
+ # Template-based SOAP generation (enhanced fallback)
205
+ def generate_template_soap(doctor_notes, include_timestamp=True):
206
+ """Enhanced template-based SOAP note generation"""
207
+
208
+ notes_lower = doctor_notes.lower()
209
+ lines = doctor_notes.split('\n')
210
+
211
+ # Enhanced keyword extraction
212
+ subjective_info = extract_section_info(lines, [
213
+ 'complains', 'reports', 'states', 'denies', 'pain', 'symptoms',
214
+ 'history', 'onset', 'duration', 'patient says', 'chief complaint'
215
+ ])
216
+
217
+ objective_info = extract_section_info(lines, [
218
+ 'vital signs', 'vs:', 'bp', 'hr', 'temp', 'examination', 'exam',
219
+ 'physical', 'inspection', 'palpation', 'auscultation', 'laboratory'
220
+ ])
221
+
222
+ assessment_info = extract_section_info(lines, [
223
+ 'diagnosis', 'impression', 'assessment', 'likely', 'possible',
224
+ 'rule out', 'differential', 'icd', 'condition'
225
+ ])
226
+
227
+ plan_info = extract_section_info(lines, [
228
+ 'plan', 'treatment', 'medication', 'prescribe', 'follow', 'return',
229
+ 'therapy', 'intervention', 'monitoring', 'referral'
230
+ ])
231
+
232
+ # Build comprehensive SOAP note
233
+ soap_note = build_soap_sections(subjective_info, objective_info, assessment_info, plan_info)
234
+
235
+ if include_timestamp:
236
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
237
+ header = f"""πŸ“‹ SOAP NOTE (Template-Enhanced)
238
+ πŸ• Timestamp: {timestamp}
239
+ πŸ”’ Processed locally - HIPAA compliant
240
+ πŸ₯ Scribbled Docs Medical Assistant
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
+ {'='*60}
 
 
 
 
 
 
243
  """
244
+ return header + soap_note
245
+
246
+ return soap_note
247
+
248
+ def extract_section_info(lines, keywords):
249
+ """Extract relevant lines for each SOAP section"""
250
+ relevant_lines = []
251
+ for line in lines:
252
+ if any(keyword in line.lower() for keyword in keywords):
253
+ relevant_lines.append(line.strip())
254
+ return relevant_lines
255
+
256
+ def build_soap_sections(subjective, objective, assessment, plan):
257
+ """Build formatted SOAP sections"""
258
+
259
+ soap = "SUBJECTIVE:\n"
260
+ if subjective:
261
+ soap += '\n'.join(f"β€’ {line}" for line in subjective[:5]) # Limit to 5 most relevant
262
+ else:
263
+ soap += "β€’ Patient complaints and reported symptoms as documented"
264
+
265
+ soap += "\n\nOBJECTIVE:\n"
266
+ if objective:
267
+ soap += '\n'.join(f"β€’ {line}" for line in objective[:5])
268
+ else:
269
+ soap += "β€’ Physical examination findings and clinical observations as documented"
270
+
271
+ soap += "\n\nASSESSMENT:\n"
272
+ if assessment:
273
+ soap += '\n'.join(f"β€’ {line}" for line in assessment[:3])
274
+ else:
275
+ soap += "β€’ Clinical assessment based on presenting symptoms and examination findings"
276
+
277
+ soap += "\n\nPLAN:\n"
278
+ if plan:
279
+ soap += '\n'.join(f"β€’ {line}" for line in plan[:5])
280
+ else:
281
+ soap += "β€’ Treatment plan and follow-up care as clinically indicated"
282
+
283
+ return soap
284
 
285
+ # OCR Functions (same as before but optimized)
286
+ def initialize_ocr():
287
+ """Initialize OCR reader for handwritten notes"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  try:
289
+ # Initialize with English and medical text optimization
290
+ reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
291
+ print("βœ… EasyOCR initialized for handwritten medical notes")
292
+ return reader
293
  except Exception as e:
294
+ print(f"⚠️ EasyOCR initialization failed: {e}")
295
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
+ def extract_text_from_image(image, ocr_reader=None):
298
+ """Enhanced OCR for medical handwriting"""
 
 
 
 
 
 
 
 
 
 
 
299
  if image is None:
300
  return "❌ No image provided"
301
+
302
  try:
303
+ # Preprocess specifically for medical handwriting
304
+ processed_img = preprocess_medical_image(image)
305
+
306
+ extracted_text = ""
307
+
308
+ # Try EasyOCR (better for handwritten text)
309
  if ocr_reader is not None:
310
  try:
311
+ results = ocr_reader.readtext(processed_img, detail=0, paragraph=True)
312
+ if results:
313
+ extracted_text = ' '.join(results)
314
+ if len(extracted_text.strip()) > 10:
315
+ return clean_medical_text(extracted_text)
 
 
 
 
 
316
  except Exception as e:
317
  print(f"EasyOCR failed: {e}")
318
+
319
+ # Fallback to Tesseract with medical optimization
320
  try:
321
+ import pytesseract
322
+
323
+ # Medical-optimized Tesseract config
324
+ custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,;:()[]{}/-+= '
325
+
326
  tesseract_text = pytesseract.image_to_string(processed_img, config=custom_config)
327
+
328
+ if len(tesseract_text.strip()) > 5:
329
+ return clean_medical_text(tesseract_text)
330
+
331
  except Exception as e:
332
  print(f"Tesseract failed: {e}")
333
+
334
+ return "❌ Could not extract text from image. Please ensure the image is clear and try again."
335
+
336
  except Exception as e:
337
  return f"❌ Error processing image: {str(e)}"
338
 
339
+ def preprocess_medical_image(image):
340
+ """Optimized preprocessing for medical handwriting"""
341
+ try:
342
+ img_array = np.array(image)
343
+
344
+ # Convert to grayscale
345
+ if len(img_array.shape) == 3:
346
+ gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
347
+ else:
348
+ gray = img_array
349
+
350
+ # Resize for optimal OCR (medical notes are often small)
351
+ height, width = gray.shape
352
+ if height < 400 or width < 400:
353
+ scale_factor = max(400/height, 400/width)
354
+ new_width = int(width * scale_factor)
355
+ new_height = int(height * scale_factor)
356
+ gray = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
357
+
358
+ # Advanced preprocessing for handwritten medical text
359
+ # 1. Noise reduction
360
+ denoised = cv2.fastNlMeansDenoising(gray)
361
+
362
+ # 2. Contrast enhancement specifically for handwriting
363
+ clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
364
+ enhanced = clahe.apply(denoised)
365
+
366
+ # 3. Morphological operations to clean up text
367
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
368
+ cleaned = cv2.morphologyEx(enhanced, cv2.MORPH_CLOSE, kernel)
369
+
370
+ # 4. Adaptive thresholding (better for varying lighting)
371
+ thresh = cv2.adaptiveThreshold(
372
+ cleaned, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
373
+ )
374
+
375
+ return thresh
376
+
377
+ except Exception as e:
378
+ print(f"❌ Image preprocessing error: {e}")
379
+ return np.array(image)
380
+
381
+ def clean_medical_text(text):
382
+ """Clean extracted text with medical context awareness"""
383
  # Remove excessive whitespace and empty lines
384
  lines = [line.strip() for line in text.split('\n') if line.strip()]
385
+
386
+ # Medical text cleaning
387
+ cleaned_lines = []
388
+ for line in lines:
389
+ # Remove obvious OCR artifacts
390
+ line = line.replace('|', 'l').replace('_', ' ').replace('~', '-')
391
+
392
+ # Fix common medical abbreviations that OCR might misread
393
+ medical_corrections = {
394
+ 'BP': 'BP', 'HR': 'HR', 'RR': 'RR', 'O2': 'O2',
395
+ 'mg': 'mg', 'ml': 'ml', 'cc': 'cc', 'cm': 'cm'
396
+ }
397
+
398
+ for wrong, correct in medical_corrections.items():
399
+ line = line.replace(wrong.lower(), correct)
400
+
401
+ if len(line) > 1: # Filter out single characters
402
+ cleaned_lines.append(line)
403
+
404
+ return '\n'.join(cleaned_lines)
405
+
406
+ # Enhanced Gradio Interface
407
+ def gradio_generate_soap(medical_notes, uploaded_image, model_data):
408
+ """Main Gradio interface function"""
409
+ model, tokenizer = model_data if model_data else (None, None)
410
+ ocr_reader = getattr(gradio_generate_soap, 'ocr_reader', None)
411
+
412
  text_to_process = medical_notes.strip() if medical_notes else ""
413
+
414
+ # Process uploaded image with enhanced OCR
415
  if uploaded_image is not None:
416
  try:
417
+ print("πŸ” Extracting text from medical image...")
418
+ extracted_text = extract_text_from_image(uploaded_image, ocr_reader)
419
+
420
+ if not extracted_text.startswith("❌"):
421
+ if not text_to_process:
422
+ text_to_process = f"--- Extracted from uploaded image ---\n{extracted_text}"
423
+ else:
424
+ text_to_process = f"{text_to_process}\n\n--- Additional text from image ---\n{extracted_text}"
 
 
425
  else:
426
+ return extracted_text
427
+
428
  except Exception as e:
429
  return f"❌ Error processing image: {str(e)}"
430
+
431
  if not text_to_process:
432
+ return "❌ Please enter medical notes manually or upload an image with medical text"
433
+
434
+ # Generate SOAP note using Gemma 3n
 
 
 
435
  try:
436
+ return generate_soap_note_gemma(text_to_process, model, tokenizer)
437
  except Exception as e:
438
  return f"❌ Error generating SOAP note: {str(e)}"
439
 
440
+ # Example medical notes for testing
441
+ medical_examples = {
442
+ 'chest_pain': """Patient: John Smith, 45yo M
443
+ CC: Chest pain x 2 hours
444
+ HPI: Sudden onset sharp substernal chest pain 7/10, radiating to L arm. Associated SOB, diaphoresis. No N/V.
445
+ PMH: HTN, no CAD
446
+ VS: BP 150/90, HR 110, RR 22, O2 96% RA
447
+ PE: Anxious, diaphoretic. RRR, no murmur. CTAB. No edema.
448
+ A: Acute chest pain, r/o MI
449
+ P: EKG, troponins, CXR, ASA 325mg, monitor""",
450
+
451
+ 'diabetes': """Patient: Maria Garcia, 52yo F
452
+ CC: Increased thirst, urination x 3 weeks
453
+ HPI: Polyuria, polydipsia, 10lb weight loss. FH DM. No fever, abd pain.
454
+ VS: BP 140/85, HR 88, BMI 28
455
+ PE: Mild dehydration, dry MM. RRR. No diabetic foot changes.
456
+ Labs: Random glucose 280, HbA1c pending
457
+ A: New onset DM Type 2
458
+ P: HbA1c, CMP, diabetic education, metformin, f/u 2 weeks""",
459
+
460
+ 'pediatric': """Patient: Emma Thompson, 8yo F
461
+ CC: Fever, sore throat x 2 days
462
+ HPI: Fever 102F, sore throat, odynophagia, decreased appetite. No cough/rhinorrhea.
463
+ VS: T 101.8F, HR 110, RR 20, O2 99%
464
+ PE: Alert, mildly ill. Throat erythematous w/ tonsillar exudate. Anterior cervical LAD.
465
+ A: Strep pharyngitis (probable)
466
+ P: Rapid strep, throat culture, amoxicillin if +, supportive care, RTC PRN"""
467
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
 
469
+ # Initialize everything
470
+ def initialize_app():
471
+ """Initialize the complete application"""
472
+ print("πŸš€ Initializing Scribbled Docs SOAP Generator...")
473
+
474
+ # Setup device
475
+ device = setup_device()
476
+
477
+ # Load model
478
+ model, tokenizer = load_unsloth_gemma_model(device)
479
+
480
+ # Initialize OCR
481
+ ocr_reader = initialize_ocr()
482
+ gradio_generate_soap.ocr_reader = ocr_reader
483
+
484
+ return model, tokenizer
485
+
486
+ # Create the main Gradio interface
487
+ def create_interface(model, tokenizer):
488
+ """Create the main Gradio interface"""
489
+
490
+ interface = gr.Interface(
491
+ fn=lambda notes, image: gradio_generate_soap(notes, image, (model, tokenizer)),
492
+ inputs=[
493
+ gr.Textbox(
494
+ lines=8,
495
+ placeholder="Enter medical notes here...\n\nExample:\nPatient: John Doe, 45yo M\nCC: Chest pain\nVS: BP 140/90, HR 88\n...",
496
+ label="πŸ“ Medical Notes (Manual Entry)",
497
+ info="Enter unstructured medical notes or upload an image below"
498
+ ),
499
+ gr.Image(
500
+ type="pil",
501
+ label="πŸ“· Upload Medical Image (Handwritten/Typed Notes)",
502
+ sources=["upload", "webcam"],
503
+ info="Upload PNG/JPG images of medical notes - handwritten or typed"
504
  )
505
+ ],
506
+ outputs=[
507
+ gr.Textbox(
508
+ lines=20,
509
+ label="πŸ“‹ Generated SOAP Note",
510
+ show_copy_button=True,
511
+ info="Professional SOAP note generated from your input"
512
+ )
513
+ ],
514
+ title="πŸ₯ Scribbled Docs - Medical SOAP Note Generator",
515
+ description="""
516
+ **Transform medical notes into professional SOAP documentation using Gemma 3n AI**
517
+
518
+ πŸ”’ **100% Offline & HIPAA Compliant** - All processing happens locally on your device
519
+ πŸ€– **Powered by Unsloth-optimized Gemma 3n** - 4-bit quantized for efficiency
520
+ πŸ“ **Supports handwritten & typed notes** - Advanced OCR for medical handwriting
521
+
522
+ **Instructions:**
523
+ 1. Enter medical notes manually OR upload an image
524
+ 2. Click Submit to generate a structured SOAP note
525
+ 3. Copy the result for use in your medical records
526
+
527
+ **Perfect for:** Emergency medicine, family practice, internal medicine, pediatrics
528
+ """,
529
+ examples=[
530
+ [medical_examples['chest_pain'], None],
531
+ [medical_examples['diabetes'], None],
532
+ [medical_examples['pediatric'], None]
533
+ ],
534
+ theme=gr.themes.Soft(
535
+ primary_hue="blue",
536
+ secondary_hue="green"
537
+ ),
538
+ allow_flagging="never",
539
+ analytics_enabled=False
540
+ )
541
+
542
+ return interface
543
+
544
+ # Main execution
545
+ if __name__ == "__main__":
546
+ try:
547
+ # Initialize app
548
+ model, tokenizer = initialize_app()
549
+
550
+ # Create and launch interface
551
+ interface = create_interface(model, tokenizer)
552
+
553
+ print("\n🎯 Scribbled Docs SOAP Generator Ready!")
554
+ print("πŸ“± Features:")
555
+ print(" βœ… Offline processing (HIPAA compliant)")
556
+ print(" βœ… Unsloth-optimized Gemma 3n model")
557
+ print(" βœ… Handwritten note OCR")
558
+ print(" βœ… Professional SOAP formatting")
559
+ print(" βœ… Medical terminology aware")
560
+
561
+ # Launch interface
562
+ interface.launch(
563
+ share=True, # Creates public link
564
+ server_port=7860,
565
  show_error=True,
566
  quiet=False
567
  )
568
+
569
+ except Exception as e:
570
+ print(f"❌ Error launching application: {e}")
571
+ print("πŸ’‘ Make sure you have installed: pip install unsloth gradio easyocr opencv-python")