Spaces:

ayyuce
/

MAIRA-2

Running

App Files Files Community

ayyuce commited on Mar 11

Commit

8c00a90

verified ·

1 Parent(s): aad1f20

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -21

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 import gradio as gr
 import requests
 import tempfile
 MODEL_STATE = {
     "model": None,
@@ -75,20 +76,22 @@ def load_sample_phrase():
     return [save_temp_image(sample["frontal"]), sample["phrase"]]
 def generate_report(frontal_path, lateral_path, indication, technique, comparison,
-                    prior_frontal_path, prior_lateral_path, prior_report, grounding):
     """Generate radiology report with authentication check"""
     if not MODEL_STATE["authenticated"]:
         return "⚠️ Please authenticate with your Hugging Face token first!"
-    if not frontal_path or not lateral_path:
-        return "❌ Please upload both the frontal and lateral images for the current study."
     try:
-        current_frontal = Image.open(frontal_path)
-        current_lateral = Image.open(lateral_path)
         prior_frontal = Image.open(prior_frontal_path) if prior_frontal_path else None
         prior_lateral = Image.open(prior_lateral_path) if prior_lateral_path else None
         processed = MODEL_STATE["processor"].format_and_preprocess_reporting_input(
             current_frontal=current_frontal,
             current_lateral=current_lateral,
@@ -97,20 +100,20 @@ def generate_report(frontal_path, lateral_path, indication, technique, compariso
             indication=indication,
             technique=technique,
             comparison=comparison,
-            prior_report=prior_report or None,
             return_tensors="pt",
             get_grounding=grounding
         ).to("cpu")
-        processed.pop("image_sizes", None)
         outputs = MODEL_STATE["model"].generate(
-            **processed,
             max_new_tokens=450 if grounding else 300,
             use_cache=True
         )
-        prompt_length = processed["input_ids"].shape[-1]
         decoded = MODEL_STATE["processor"].decode(outputs[0][prompt_length:], skip_special_tokens=True)
         return MODEL_STATE["processor"].convert_output_to_plaintext_or_grounded_sequence(decoded.lstrip())
@@ -122,28 +125,26 @@ def ground_phrase(frontal_path, phrase):
     if not MODEL_STATE["authenticated"]:
         return "⚠️ Please authenticate with your Hugging Face token first!"
-    # Check that the required image is provided.
-    if not frontal_path:
-        return "❌ Please upload the frontal image for phrase grounding."
     try:
         frontal = Image.open(frontal_path)
         processed = MODEL_STATE["processor"].format_and_preprocess_phrase_grounding_input(
             frontal_image=frontal,
             phrase=phrase,
             return_tensors="pt"
         ).to("cpu")
-        # Remove the unexpected key if present.
-        processed.pop("image_sizes", None)
         outputs = MODEL_STATE["model"].generate(
-            **processed,
             max_new_tokens=150,
             use_cache=True
         )
-        prompt_length = processed["input_ids"].shape[-1]
         decoded = MODEL_STATE["processor"].decode(outputs[0][prompt_length:], skip_special_tokens=True)
         return MODEL_STATE["processor"].convert_output_to_plaintext_or_grounded_sequence(decoded)
@@ -199,12 +200,12 @@ with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
             sample_btn.click(
                 load_sample_findings,
                 outputs=[frontal, lateral, indication, technique, comparison,
-                         prior_frontal, prior_lateral, prior_report, grounding]
             )
             generate_btn.click(
                 generate_report,
                 inputs=[frontal, lateral, indication, technique, comparison,
-                        prior_frontal, prior_lateral, prior_report, grounding],
                 outputs=report_output
             )

 import gradio as gr
 import requests
 import tempfile
+import os
 MODEL_STATE = {
     "model": None,
     return [save_temp_image(sample["frontal"]), sample["phrase"]]
 def generate_report(frontal_path, lateral_path, indication, technique, comparison,
+                   prior_frontal_path, prior_lateral_path, prior_report, grounding):
     """Generate radiology report with authentication check"""
     if not MODEL_STATE["authenticated"]:
         return "⚠️ Please authenticate with your Hugging Face token first!"
     try:
+        current_frontal = Image.open(frontal_path) if frontal_path else None
+        current_lateral = Image.open(lateral_path) if lateral_path else None
         prior_frontal = Image.open(prior_frontal_path) if prior_frontal_path else None
         prior_lateral = Image.open(prior_lateral_path) if prior_lateral_path else None
+        if not current_frontal or not current_lateral:
+            return "❌ Missing required current study images"
+        prior_report = prior_report or ""
         processed = MODEL_STATE["processor"].format_and_preprocess_reporting_input(
             current_frontal=current_frontal,
             current_lateral=current_lateral,
             indication=indication,
             technique=technique,
             comparison=comparison,
+            prior_report=prior_report,
             return_tensors="pt",
             get_grounding=grounding
         ).to("cpu")
+        processed_inputs = {k: v for k, v in processed.items() if k != 'image_sizes'}
         outputs = MODEL_STATE["model"].generate(
+            **processed_inputs,
             max_new_tokens=450 if grounding else 300,
             use_cache=True
         )
+        prompt_length = processed_inputs["input_ids"].shape[-1]
         decoded = MODEL_STATE["processor"].decode(outputs[0][prompt_length:], skip_special_tokens=True)
         return MODEL_STATE["processor"].convert_output_to_plaintext_or_grounded_sequence(decoded.lstrip())
     if not MODEL_STATE["authenticated"]:
         return "⚠️ Please authenticate with your Hugging Face token first!"
     try:
+        if not frontal_path:
+            return "❌ Missing frontal view image"
         frontal = Image.open(frontal_path)
         processed = MODEL_STATE["processor"].format_and_preprocess_phrase_grounding_input(
             frontal_image=frontal,
             phrase=phrase,
             return_tensors="pt"
         ).to("cpu")
+        processed_inputs = {k: v for k, v in processed.items() if k != 'image_sizes'}
         outputs = MODEL_STATE["model"].generate(
+            **processed_inputs,
             max_new_tokens=150,
             use_cache=True
         )
+        prompt_length = processed_inputs["input_ids"].shape[-1]
         decoded = MODEL_STATE["processor"].decode(outputs[0][prompt_length:], skip_special_tokens=True)
         return MODEL_STATE["processor"].convert_output_to_plaintext_or_grounded_sequence(decoded)
             sample_btn.click(
                 load_sample_findings,
                 outputs=[frontal, lateral, indication, technique, comparison,
+                       prior_frontal, prior_lateral, prior_report, grounding]
             )
             generate_btn.click(
                 generate_report,
                 inputs=[frontal, lateral, indication, technique, comparison,
+                      prior_frontal, prior_lateral, prior_report, grounding],
                 outputs=report_output
             )