import gradio as gr from PIL import Image import clipGPT import vitGPT import skimage.io as io import PIL.Image import difflib import ViTCoAtt from build_vocab import Vocabulary def render_image(image_path_or_url): img = Image.open(io.imread(image_path_or_url)) img = img.resize((80, 80)) # Adjust size as needed buf = io.BytesIO() img.save(buf, format='JPEG') return buf.getvalue() # Caption generation functions def generate_caption_clipgpt(image): caption = clipGPT.generate_caption_clipgpt(image) return caption def generate_caption_vitgpt(image): caption = vitGPT.generate_caption(image) return caption def generate_caption_vitCoAtt(image): caption = ViTCoAtt.CaptionSampler.main(image) return caption with gr.Blocks() as demo: gr.HTML("
You can generate captions by uploading an X-Ray and selecting a model of your choice below
") with gr.Row(): model_choice = gr.Radio(["CLIP-GPT2", "ViT-GPT2", "ViT-CoAttention"], label="Select Model") generate_button = gr.Button("Generate Caption") caption = gr.Textbox(label="Generated Caption") def predict(img, model_name): if model_name == "CLIP-GPT2": return generate_caption_clipgpt(img) elif model_name == "ViT-GPT2": return generate_caption_vitgpt(img) elif model_name == "ViT-CoAttention": return generate_caption_vitCoAtt(img) else: return "Caption generation for this model is not yet implemented." # Event handlers generate_button.click(predict, [image, model_choice], caption) # Trigger prediction on button click demo.launch()