Spaces:

ayyuce
/

MAIRA-2

Running

App Files Files Community

ayyuce commited on 13 days ago

Commit

3424243

verified ·

1 Parent(s): b012996

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -143

app.py CHANGED Viewed

@@ -1,170 +1,211 @@
-from transformers import AutoModelForCausalLM, AutoProcessor
-from PIL import Image
-import torch
 import gradio as gr
 import requests
 import tempfile
-import os
-MODEL_STATE = {
-    "model": None,
-    "processor": None,
-    "authenticated": False
-}
-def login(hf_token):
-    """Authenticate and load the model"""
-    try:
-        MODEL_STATE.update({"model": None, "processor": None, "authenticated": False})
-        MODEL_STATE["model"] = AutoModelForCausalLM.from_pretrained(
-            "microsoft/maira-2",
-            trust_remote_code=True,
-            use_auth_token=hf_token
-        )
-        MODEL_STATE["processor"] = AutoProcessor.from_pretrained(
-            "microsoft/maira-2",
-            trust_remote_code=True,
-            use_auth_token=hf_token
-        )
-        MODEL_STATE["model"] = MODEL_STATE["model"].eval().to("cpu")
-        MODEL_STATE["authenticated"] = True
-        return "🔓 Login successful! You can now use the model."
-    except Exception as e:
-        MODEL_STATE.update({"model": None, "processor": None, "authenticated": False})
-        return f"❌ Login failed: {str(e)}"
-def get_sample_data():
-    """Download sample medical images and data"""
-    frontal_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-1001.png"
-    lateral_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-2001.png"
-    def download_image(url):
         response = requests.get(url, headers={"User-Agent": "MAIRA-2"}, stream=True)
-        return Image.open(response.raw)
     return {
-        "frontal": download_image(frontal_url),
-        "lateral": download_image(lateral_url),
         "indication": "Dyspnea.",
         "technique": "PA and lateral views of the chest.",
         "comparison": "None.",
         "phrase": "Pleural effusion."
     }
-def save_temp_image(img):
-    """Save PIL image to temporary file"""
     temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
     img.save(temp_file.name)
     return temp_file.name
 def load_sample_findings():
     sample = get_sample_data()
     return [
-        save_temp_image(sample["frontal"]),
-        save_temp_image(sample["lateral"]),
         sample["indication"],
         sample["technique"],
         sample["comparison"],
-        None, None, None, False
     ]
 def load_sample_phrase():
     sample = get_sample_data()
     return [save_temp_image(sample["frontal"]), sample["phrase"]]
-def generate_report(frontal_path, lateral_path, indication, technique, comparison,
-                   prior_frontal_path, prior_lateral_path, prior_report, grounding):
-    """Generate radiology report with authentication check"""
-    if not MODEL_STATE["authenticated"]:
-        return "⚠️ Please authenticate with your Hugging Face token first!"
-    try:
-        current_frontal = Image.open(frontal_path) if frontal_path else None
-        current_lateral = Image.open(lateral_path) if lateral_path else None
-        prior_frontal = Image.open(prior_frontal_path) if prior_frontal_path else None
-        prior_lateral = Image.open(prior_lateral_path) if prior_lateral_path else None
-        if not current_frontal or not current_lateral:
-            return "❌ Missing required current study images"
-        prior_report = prior_report or ""
-        processed = MODEL_STATE["processor"].format_and_preprocess_reporting_input(
-            current_frontal=current_frontal,
-            current_lateral=current_lateral,
-            prior_frontal=prior_frontal,
-            prior_lateral=prior_lateral,
-            indication=indication,
-            technique=technique,
-            comparison=comparison,
-            prior_report=prior_report,
-            return_tensors="pt",
-            get_grounding=grounding
-        ).to("cpu")
-        processed = dict(processed)
-        image_size_keys = [k for k in processed.keys() if "image_sizes" in k]
-        for k in image_size_keys:
-            processed.pop(k, None)
-        outputs = MODEL_STATE["model"].generate(
-            **processed,
-            max_new_tokens=450 if grounding else 300,
-            use_cache=True
-        )
-        prompt_length = processed["input_ids"].shape[-1]
-        decoded = MODEL_STATE["processor"].decode(outputs[0][prompt_length:], skip_special_tokens=True)
-        return MODEL_STATE["processor"].convert_output_to_plaintext_or_grounded_sequence(decoded.lstrip())
-    except Exception as e:
-        return f"❌ Generation error: {str(e)}"
-def ground_phrase(frontal_path, phrase):
-    """Perform phrase grounding with authentication check"""
-    if not MODEL_STATE["authenticated"]:
-        return "⚠️ Please authenticate with your Hugging Face token first!"
-    try:
-        if not frontal_path:
-            return "❌ Missing frontal view image"
-        frontal = Image.open(frontal_path)
-        processed = MODEL_STATE["processor"].format_and_preprocess_phrase_grounding_input(
-            frontal_image=frontal,
-            phrase=phrase,
-            return_tensors="pt"
-        ).to("cpu")
-        # Convert to regular dict and remove image size related keys
-        processed = dict(processed)
-        image_size_keys = [k for k in processed.keys() if "image_sizes" in k]
-        for k in image_size_keys:
-            processed.pop(k, None)
-        outputs = MODEL_STATE["model"].generate(
-            **processed,
-            max_new_tokens=150,
-            use_cache=True
-        )
-        prompt_length = processed["input_ids"].shape[-1]
-        decoded = MODEL_STATE["processor"].decode(outputs[0][prompt_length:], skip_special_tokens=True)
-        return MODEL_STATE["processor"].convert_output_to_plaintext_or_grounded_sequence(decoded)
-    except Exception as e:
-        return f"❌ Grounding error: {str(e)}"
 with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
-    gr.Markdown("""# MAIRA-2 Medical Assistant
-    **Authentication required** - You need a Hugging Face account and access token to use this model.
-    1. Get your access token from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
-    2. Request model access at [https://huggingface.co/microsoft/maira-2](https://huggingface.co/microsoft/maira-2)
-    3. Paste your token below to begin
-    """)
     with gr.Row():
         hf_token = gr.Textbox(
@@ -176,7 +217,7 @@ with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
         login_status = gr.Textbox(label="Authentication Status", interactive=False)
     login_btn.click(
-        login,
         inputs=hf_token,
         outputs=login_status
     )
@@ -199,7 +240,6 @@ with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
                     grounding = gr.Checkbox(label="Include Grounding")
                     sample_btn = gr.Button("Load Sample Data")
                 with gr.Column():
                     report_output = gr.Textbox(label="Generated Report", lines=10)
                     generate_btn = gr.Button("Generate Report")
@@ -207,12 +247,12 @@ with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
             sample_btn.click(
                 load_sample_findings,
                 outputs=[frontal, lateral, indication, technique, comparison,
-                       prior_frontal, prior_lateral, prior_report, grounding]
             )
             generate_btn.click(
-                generate_report,
-                inputs=[frontal, lateral, indication, technique, comparison,
-                      prior_frontal, prior_lateral, prior_report, grounding],
                 outputs=report_output
             )
@@ -231,8 +271,8 @@ with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
                 outputs=[pg_frontal, phrase]
             )
             pg_btn.click(
-                ground_phrase,
-                inputs=[pg_frontal, phrase],
                 outputs=pg_output
             )

 import gradio as gr
+import torch
 import requests
 import tempfile
+from pathlib import Path
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor
+_model_cache = {}
+def load_model_and_processor(hf_token: str):
+    """
+    Loads the MAIRA-2 model and processor from Hugging Face using the provided token.
+    The loaded objects are cached keyed by the token.
+    """
+    if hf_token in _model_cache:
+        return _model_cache[hf_token]
+    device = torch.device("cpu")
+    model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/maira-2",
+        trust_remote_code=True,
+        use_auth_token=hf_token
+    )
+    processor = AutoProcessor.from_pretrained(
+        "microsoft/maira-2",
+        trust_remote_code=True,
+        use_auth_token=hf_token
+    )
+    model.eval()
+    model.to(device)
+    _model_cache[hf_token] = (model, processor)
+    return model, processor
+def get_sample_data() -> dict:
+    """
+    Download sample chest X-ray images and associated data.
+    """
+    frontal_image_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-1001.png"
+    lateral_image_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-2001.png"
+    def download_and_open(url: str) -> Image.Image:
         response = requests.get(url, headers={"User-Agent": "MAIRA-2"}, stream=True)
+        return Image.open(response.raw).convert("RGB")
+    frontal = download_and_open(frontal_image_url)
+    lateral = download_and_open(lateral_image_url)
     return {
+        "frontal": frontal,
+        "lateral": lateral,
         "indication": "Dyspnea.",
         "technique": "PA and lateral views of the chest.",
         "comparison": "None.",
         "phrase": "Pleural effusion."
     }
+def generate_report(hf_token, frontal, lateral, indication, technique, comparison, use_grounding):
+    """
+    Generates a radiology report using the MAIRA-2 model.
+    If any image/text input is missing, sample data is used.
+    """
+    try:
+        model, processor = load_model_and_processor(hf_token)
+    except Exception as e:
+        return f"Error loading model: {str(e)}"
+    device = torch.device("cpu")
+    sample = get_sample_data()
+    if frontal is None:
+        frontal = sample["frontal"]
+    if lateral is None:
+        lateral = sample["lateral"]
+    if not indication:
+        indication = sample["indication"]
+    if not technique:
+        technique = sample["technique"]
+    if not comparison:
+        comparison = sample["comparison"]
+    processed_inputs = processor.format_and_preprocess_reporting_input(
+        current_frontal=frontal,
+        current_lateral=lateral,
+        prior_frontal=None,  # No prior study is used in this demo.
+        indication=indication,
+        technique=technique,
+        comparison=comparison,
+        prior_report=None,
+        return_tensors="pt",
+        get_grounding=use_grounding,
+    )
+    processed_inputs = {k: v.to(device) for k, v in processed_inputs.items()}
+    max_tokens = 450 if use_grounding else 300
+    with torch.no_grad():
+        output_decoding = model.generate(
+            **processed_inputs,
+            max_new_tokens=max_tokens,
+            use_cache=True,
+        )
+    prompt_length = processed_inputs["input_ids"].shape[-1]
+    decoded_text = processor.decode(output_decoding[0][prompt_length:], skip_special_tokens=True)
+    decoded_text = decoded_text.lstrip()  # Remove any leading whitespace
+    prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)
+    return prediction
+def run_phrase_grounding(hf_token, frontal, phrase):
+    """
+    Runs phrase grounding using the MAIRA-2 model.
+    If image or phrase is missing, sample data is used.
+    """
+    try:
+        model, processor = load_model_and_processor(hf_token)
+    except Exception as e:
+        return f"Error loading model: {str(e)}"
+    device = torch.device("cpu")
+    sample = get_sample_data()
+    if frontal is None:
+        frontal = sample["frontal"]
+    if not phrase:
+        phrase = sample["phrase"]
+    processed_inputs = processor.format_and_preprocess_phrase_grounding_input(
+        frontal_image=frontal,
+        phrase=phrase,
+        return_tensors="pt",
+    )
+    processed_inputs = {k: v.to(device) for k, v in processed_inputs.items()}
+    with torch.no_grad():
+        output_decoding = model.generate(
+            **processed_inputs,
+            max_new_tokens=150,
+            use_cache=True,
+        )
+    prompt_length = processed_inputs["input_ids"].shape[-1]
+    decoded_text = processor.decode(output_decoding[0][prompt_length:], skip_special_tokens=True)
+    prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)
+    return prediction
+def login_ui(hf_token):
+    """Authenticate the user by loading the model."""
+    try:
+        load_model_and_processor(hf_token)
+        return "🔓 Login successful! You can now use the model."
+    except Exception as e:
+        return f"❌ Login failed: {str(e)}"
+def generate_report_ui(hf_token, frontal_path, lateral_path, indication, technique, comparison,
+                         prior_frontal_path, prior_lateral_path, prior_report, grounding):
+    """
+    Wrapper for generate_report that accepts file paths (from the UI) for images.
+    Prior study fields are ignored.
+    """
+    try:
+        frontal = Image.open(frontal_path) if frontal_path else None
+        lateral = Image.open(lateral_path) if lateral_path else None
+    except Exception as e:
+        return f"❌ Error loading images: {str(e)}"
+    return generate_report(hf_token, frontal, lateral, indication, technique, comparison, grounding)
+def run_phrase_grounding_ui(hf_token, frontal_path, phrase):
+    """
+    Wrapper for run_phrase_grounding that accepts a file path for the frontal image.
+    """
+    try:
+        frontal = Image.open(frontal_path) if frontal_path else None
+    except Exception as e:
+        return f"❌ Error loading image: {str(e)}"
+    return run_phrase_grounding(hf_token, frontal, phrase)
+def save_temp_image(img: Image.Image) -> str:
+    """Save a PIL image to a temporary file and return the file path."""
     temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
     img.save(temp_file.name)
     return temp_file.name
 def load_sample_findings():
+    """
+    Loads sample data for the report generation tab.
+    Returns file paths for current study images, sample text fields, and dummy values for prior study.
+    """
     sample = get_sample_data()
     return [
+        save_temp_image(sample["frontal"]),  # frontal image file path
+        save_temp_image(sample["lateral"]),  # lateral image file path
         sample["indication"],
         sample["technique"],
         sample["comparison"],
+        None,  # prior frontal (not used)
+        None,  # prior lateral (not used)
+        None,  # prior report (not used)
+        False
     ]
 def load_sample_phrase():
+    """
+    Loads sample data for the phrase grounding tab.
+    Returns file path for the frontal image and a sample phrase.
+    """
     sample = get_sample_data()
     return [save_temp_image(sample["frontal"]), sample["phrase"]]
 with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
+    gr.Markdown(
+        """
+        # MAIRA-2 Medical Assistant
+        **Authentication required** - You need a Hugging Face account and access token to use this model.
+        1. Get your access token from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
+        2. Request model access at [https://huggingface.co/microsoft/maira-2](https://huggingface.co/microsoft/maira-2)
+        3. Paste your token below to begin
+        """
+    )
     with gr.Row():
         hf_token = gr.Textbox(
         login_status = gr.Textbox(label="Authentication Status", interactive=False)
     login_btn.click(
+        login_ui,
         inputs=hf_token,
         outputs=login_status
     )
                     grounding = gr.Checkbox(label="Include Grounding")
                     sample_btn = gr.Button("Load Sample Data")
                 with gr.Column():
                     report_output = gr.Textbox(label="Generated Report", lines=10)
                     generate_btn = gr.Button("Generate Report")
             sample_btn.click(
                 load_sample_findings,
                 outputs=[frontal, lateral, indication, technique, comparison,
+                         prior_frontal, prior_lateral, prior_report, grounding]
             )
             generate_btn.click(
+                generate_report_ui,
+                inputs=[hf_token, frontal, lateral, indication, technique, comparison,
+                        prior_frontal, prior_lateral, prior_report, grounding],
                 outputs=report_output
             )
                 outputs=[pg_frontal, phrase]
             )
             pg_btn.click(
+                run_phrase_grounding_ui,
+                inputs=[hf_token, pg_frontal, phrase],
                 outputs=pg_output
             )