|
import gradio as gr |
|
import os |
|
from typing import Tuple, Optional |
|
import os |
|
import shutil |
|
import sys |
|
from pathlib import Path |
|
import cv2 |
|
import gradio as gr |
|
import numpy as np |
|
import spaces |
|
|
|
import torch |
|
from PIL import Image |
|
from tqdm import tqdm |
|
import sys |
|
from pathlib import Path |
|
from huggingface_hub import login |
|
|
|
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig |
|
|
|
|
|
token = os.getenv("HF_TOKEN") |
|
if token: |
|
login(token=token) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
current_dir = Path(__file__).parent |
|
sys.path.append(str(current_dir)) |
|
|
|
|
|
|
|
|
|
from modeling.BaseModel import BaseModel |
|
from modeling import build_model |
|
from utilities.arguments import load_opt_from_config_files |
|
from utilities.constants import BIOMED_CLASSES |
|
from inference_utils.inference import interactive_infer_image |
|
from inference_utils.output_processing import check_mask_stats |
|
from inference_utils.processing_utils import read_rgb |
|
|
|
import spaces |
|
|
|
|
|
MARKDOWN = """ |
|
<div align="center" style="padding: 20px 0;"> |
|
<h1 style="font-size: 3em; margin: 0;"> |
|
ሀ<span style="color: #32CD32;">A</span>ኪ<span style="color: #FFD700;">i</span>ም |
|
<sup style="font-size: 0.5em;">AI</sup> |
|
</h1> |
|
|
|
<div style="display: flex; justify-content: center; align-items: center; gap: 15px; margin: 15px 0;"> |
|
<a href="https://cyberbrainai.com/"> |
|
<img src="https://cyberbrainai.com/assets/logo.svg" alt="CyberBrain AI" style="width:40px; height:40px; vertical-align: middle;"> |
|
</a> |
|
<a href="https://colab.research.google.com/drive/1p3Yf_6xdZPMz5RUtt_NyxrDjrbSgvTDy#scrollTo=t30NqIrCKdAI"> |
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="ድinቅneሽ" style="vertical-align: middle;"> |
|
</a> |
|
<a href="https://www.youtube.com/watch?v=Dv003fTyO-Y"> |
|
<img src="https://badges.aleen42.com/src/youtube.svg" alt="YouTube" style="vertical-align: middle;"> |
|
</a> |
|
</div> |
|
</div> |
|
<div> |
|
<p style="font-size: 1.4em; line-height: 1.5; margin: 15px 0; text-align: left;"> |
|
This demo integrates BiomedParse, a foundation model for joint segmentation, detection, and recognition across 9 biomedical imaging modalities. |
|
The model supports <span style="color: #FF4500;">CT</span>, <span style="color: #4169E1;">MRI</span>, <span style="color: #32CD32;">X-Ray</span>, <span style="color: #9370DB;">Pathology</span>, <span style="color: #FFD700;">Ultrasound</span>, <span style="color: #FF69B4;">Endoscope</span>, <span style="color: #20B2AA;">Fundus</span>, <span style="color: #FF8C00;">Dermoscopy</span>, and <span style="color: #8B008B;">OCT</span>. |
|
</p> |
|
</div> |
|
|
|
""" |
|
|
|
IMAGE_PROCESSING_EXAMPLES = [ |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/T0011.jpg", |
|
"Optic disc in retinal Fundus"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/Part_3_226_pathology_breast.png", |
|
"optic disc, optic cup"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/covid_1585.png", |
|
"COVID-19 infection in chest X-Ray"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/TCGA_HT_7856_19950831_8_MRI-FLAIR_brain.png", |
|
"Lower-grade glioma in brain MRI"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/LIDC-IDRI-0140_143_280_CT_lung.png", |
|
"COVID-19 infection in chest CT"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/144DME_as_F.jpeg", |
|
"Cystoid macular edema in retinal OCT"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/Part_1_516_pathology_breast.png", |
|
"Glandular structure in colon Pathology"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/ISIC_0015551.jpg", |
|
"Melanoma in skin Dermoscopy"], |
|
["BiomedParse Segmentation", |
|
"https://raw.githubusercontent.com/microsoft/BiomedParse/main/examples/C3_EndoCV2021_00462.jpg", |
|
"Neoplastic polyp in colon Endoscope"] |
|
] |
|
|
|
BIOMEDPARSE_MODES = { |
|
"CT-Abdomen": ["abdomen", "liver"], |
|
"CT-Chest": ["lung"], |
|
"CT-Liver": ["liver"], |
|
"MRI-Abdomen": ["abdomen"], |
|
"MRI-Cardiac": ["heart"], |
|
"MRI-FLAIR-Brain": ["brain"], |
|
"MRI-T1-Gd-Brain": ["brain"], |
|
"Pathology": ["bladder", "breast", "cervix", "colon", "esophagus", "kidney", |
|
"liver", "ovarian", "prostate", "stomach", "testis", "thyroid", "uterus"], |
|
"X-Ray-Chest": ["chest"], |
|
"Ultrasound-Cardiac": ["heart"], |
|
"Endoscopy": ["colon"], |
|
"Fundus": ["retinal"], |
|
"Dermoscopy": ["skin"], |
|
"OCT": ["retinal"] |
|
} |
|
|
|
IMAGE_INFERENCE_MODES = [ |
|
"BIOMED SEGMENTATION", |
|
"BIOMED DETECTION", |
|
"BIOMED RECOGNITION", |
|
"BIOMED SEGMENTATION + DETECTION", |
|
"BIOMED SEGMENTATION + RECOGNITION", |
|
"BIOMED DETECTION + RECOGNITION", |
|
"BIOMED SEGMENTATION + DETECTION + RECOGNITION" |
|
] |
|
|
|
MODALITY_PROMPTS = { |
|
"CT-Abdomen": ["postcava", "aorta", "right kidney", "kidney", "left kidney", "duodenum", "pancreas", "liver", "spleen", "stomach", "gallbladder", "left adrenal gland", "adrenal gland", "right adrenal gland", "esophagus"], |
|
"CT-Chest": ["nodule", "COVID-19 infection", "tumor"], |
|
"MRI-Abdomen": ["aorta", "postcava", "right kidney", "duodenum", "kidney", "left kidney", "liver", "pancreas", "gallbladder", "stomach", "spleen", "left adrenal gland", "adrenal gland", "right adrenal gland", "esophagus"], |
|
"MRI-Cardiac": ["left heart ventricle", "myocardium", "right heart ventricle"], |
|
"MRI-FLAIR-Brain": ["edema", "tumor core", "whole tumor"], |
|
"MRI-T1-Gd-Brain": ["enhancing tumor", "non-enhancing tumor", "tumor core"], |
|
"Pathology": ["connective tissue cells", "inflammatory cells", "neoplastic cells", "epithelial cells"], |
|
"X-Ray-Chest": ["left lung", "lung", "right lung"], |
|
"Ultrasound-Cardiac": ["left heart atrium", "left heart ventricle"], |
|
"Endoscopy": ["neoplastic polyp", "polyp", "non-neoplastic polyp"], |
|
"Fundus": ["optic cup", "optic disc"], |
|
"Dermoscopy": ["lesion", "melanoma"], |
|
"OCT": ["edema"] } |
|
|
|
|
|
def on_mode_dropdown_change(selected_mode): |
|
if selected_mode in IMAGE_INFERENCE_MODES: |
|
|
|
return [ |
|
gr.Dropdown(visible=True, choices=list(BIOMEDPARSE_MODES.keys()), label="Modality"), |
|
gr.Dropdown(visible=True, label="Anatomical Site"), |
|
gr.Textbox(visible=False), |
|
gr.Textbox(visible=False) |
|
] |
|
else: |
|
|
|
return [ |
|
gr.Dropdown(visible=False), |
|
gr.Dropdown(visible=False), |
|
gr.Textbox(visible=True), |
|
gr.Textbox(visible=(selected_mode == None)) |
|
] |
|
|
|
def on_modality_change(modality): |
|
if modality: |
|
return gr.Dropdown(choices=BIOMEDPARSE_MODES[modality], visible=True) |
|
return gr.Dropdown(visible=False) |
|
|
|
|
|
def initialize_model(): |
|
opt = load_opt_from_config_files(["configs/biomedparse_inference.yaml"]) |
|
pretrained_pth = 'hf_hub:microsoft/BiomedParse' |
|
opt['device'] = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
model = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth).eval() |
|
with torch.no_grad(): |
|
model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings( |
|
BIOMED_CLASSES + ["background"], is_eval=True |
|
) |
|
return model |
|
|
|
def initialize_llm(): |
|
try: |
|
print("Starting LLM initialization...") |
|
model = AutoModel.from_pretrained( |
|
"ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", |
|
device_map="auto", |
|
torch_dtype=torch.float16, |
|
trust_remote_code=True, |
|
low_cpu_mem_usage=True |
|
) |
|
print("Model loaded successfully") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
"ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", |
|
trust_remote_code=True |
|
) |
|
print("Tokenizer loaded successfully") |
|
return model, tokenizer |
|
except Exception as e: |
|
print(f"Failed to initialize LLM: {str(e)}") |
|
return None, None |
|
|
|
model = initialize_model() |
|
llm_model, llm_tokenizer = initialize_llm() |
|
|
|
def update_example_prompts(modality): |
|
if modality in MODALITY_PROMPTS: |
|
examples = MODALITY_PROMPTS[modality] |
|
return f"Example prompts for {modality}:\n" + ", ".join(examples) |
|
return "" |
|
|
|
|
|
@spaces.GPU |
|
@torch.inference_mode() |
|
@torch.autocast(device_type="cuda", dtype=torch.bfloat16) |
|
def process_image(image_path, text_prompts, modality): |
|
try: |
|
|
|
if not image_path: |
|
raise ValueError("Please upload an image") |
|
if not text_prompts or text_prompts.strip() == "": |
|
raise ValueError("Please enter prompts for analysis") |
|
if not modality: |
|
raise ValueError("Please select a modality") |
|
|
|
|
|
image = read_rgb(image_path) |
|
text_prompts = [prompt.strip() for prompt in text_prompts.split(',')] |
|
pred_masks = interactive_infer_image(model, Image.fromarray(image), text_prompts) |
|
|
|
|
|
results = [] |
|
analysis_results = [] |
|
|
|
|
|
for i, prompt in enumerate(text_prompts): |
|
p_value = check_mask_stats(image, pred_masks[i] * 255, modality, prompt) |
|
analysis_results.append(f"P-value for '{prompt}' ({modality}): {p_value:.4f}") |
|
|
|
overlay_image = image.copy() |
|
overlay_image[pred_masks[i] > 0.5] = [255, 0, 0] |
|
results.append(overlay_image) |
|
|
|
|
|
if llm_model is not None and llm_tokenizer is not None: |
|
print("LLM model and tokenizer are available") |
|
try: |
|
pil_image = Image.fromarray(image) |
|
question = 'Give the modality, organ, analysis, abnormalities (if any), treatment (if abnormalities are present)?' |
|
msgs = [{'role': 'user', 'content': [pil_image, question]}] |
|
|
|
print("Starting LLM inference...") |
|
llm_response = "" |
|
for new_text in llm_model.chat( |
|
image=pil_image, |
|
msgs=msgs, |
|
tokenizer=llm_tokenizer, |
|
sampling=True, |
|
temperature=0.95, |
|
stream=True |
|
): |
|
llm_response += new_text |
|
print(f"LLM generated response: {llm_response}") |
|
|
|
|
|
combined_analysis = "\n\n" + "="*50 + "\n" |
|
combined_analysis += "BiomedParse Analysis:\n" |
|
combined_analysis += "\n".join(analysis_results) |
|
combined_analysis += "\n\n" + "="*50 + "\n" |
|
combined_analysis += "LLM Analysis:\n" |
|
combined_analysis += llm_response |
|
combined_analysis += "\n" + "="*50 |
|
|
|
except Exception as e: |
|
print(f"LLM analysis failed with error: {str(e)}") |
|
combined_analysis = "\n".join(analysis_results) |
|
else: |
|
print("LLM model or tokenizer is not available") |
|
combined_analysis = "\n".join(analysis_results) |
|
|
|
return results, combined_analysis |
|
|
|
except Exception as e: |
|
error_msg = f"⚠️ An error occurred: {str(e)}" |
|
print(f"Error details: {str(e)}", flush=True) |
|
return None, error_msg |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML(MARKDOWN) |
|
with gr.Row(): |
|
with gr.Column(): |
|
image_input = gr.Image(type="filepath", label="Input Image") |
|
prompts_input = gr.Textbox( |
|
lines=2, |
|
placeholder="Enter prompts separated by commas...", |
|
label="Prompts" |
|
) |
|
modality_dropdown = gr.Dropdown( |
|
choices=list(BIOMEDPARSE_MODES.keys()), |
|
value=list(BIOMEDPARSE_MODES.keys())[0], |
|
label="Modality" |
|
) |
|
submit_btn = gr.Button("Submit") |
|
with gr.Column(): |
|
output_gallery = gr.Gallery(label="Findings") |
|
pvalue_output = gr.Textbox( |
|
label="Results", |
|
interactive=False, |
|
show_label=True |
|
) |
|
with gr.Accordion("Example Prompts by Modality", open=False): |
|
for modality, prompts in MODALITY_PROMPTS.items(): |
|
prompt_str = ", ".join(prompts) |
|
gr.Markdown(f"**{modality}**: {prompt_str}") |
|
|
|
submit_btn.click( |
|
fn=process_image, |
|
inputs=[image_input, prompts_input, modality_dropdown], |
|
outputs=[output_gallery, pvalue_output], |
|
api_name="process" |
|
) |
|
|
|
demo.launch() |