Spaces:
Runtime error
Runtime error
added git-base-coco model
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoProcessor, BlipForConditionalGeneration
|
3 |
|
4 |
# from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel
|
5 |
import torch
|
@@ -11,8 +11,8 @@ torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/0000000397
|
|
11 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png', 'stop_sign.png')
|
12 |
torch.hub.download_url_to_file('https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg', 'astronaut.jpg')
|
13 |
|
14 |
-
|
15 |
-
|
16 |
|
17 |
# git_processor_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
18 |
# git_model_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
@@ -76,7 +76,7 @@ def generate_caption_coca(model, transform, image):
|
|
76 |
|
77 |
|
78 |
def generate_captions(image):
|
79 |
-
|
80 |
|
81 |
# caption_git_large_coco = generate_caption(git_processor_large_coco, git_model_large_coco, image)
|
82 |
|
@@ -101,7 +101,7 @@ def generate_captions(image):
|
|
101 |
|
102 |
examples = [["cats.jpg"], ["stop_sign.png"], ["astronaut.jpg"]]
|
103 |
# outputs = [gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on COCO"), gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on TextCaps"), gr.outputs.Textbox(label="Caption generated by BLIP-large"), gr.outputs.Textbox(label="Caption generated by CoCa"), gr.outputs.Textbox(label="Caption generated by BLIP-2 OPT 6.7b")]
|
104 |
-
outputs = [gr.outputs.Textbox(label="Caption generated by BLIP-base")
|
105 |
|
106 |
title = "Interactive demo: comparing image captioning models"
|
107 |
description = "Gradio Demo to compare GIT, BLIP, CoCa, and BLIP-2, 4 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoProcessor, BlipForConditionalGeneration, AutoModelForCausalLM
|
3 |
|
4 |
# from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel
|
5 |
import torch
|
|
|
11 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png', 'stop_sign.png')
|
12 |
torch.hub.download_url_to_file('https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg', 'astronaut.jpg')
|
13 |
|
14 |
+
git_processor_base = AutoProcessor.from_pretrained("microsoft/git-base-coco")
|
15 |
+
git_model_base = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
|
16 |
|
17 |
# git_processor_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
18 |
# git_model_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
|
|
76 |
|
77 |
|
78 |
def generate_captions(image):
|
79 |
+
caption_git_base = generate_caption(git_processor_base, git_model_base, image)
|
80 |
|
81 |
# caption_git_large_coco = generate_caption(git_processor_large_coco, git_model_large_coco, image)
|
82 |
|
|
|
101 |
|
102 |
examples = [["cats.jpg"], ["stop_sign.png"], ["astronaut.jpg"]]
|
103 |
# outputs = [gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on COCO"), gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on TextCaps"), gr.outputs.Textbox(label="Caption generated by BLIP-large"), gr.outputs.Textbox(label="Caption generated by CoCa"), gr.outputs.Textbox(label="Caption generated by BLIP-2 OPT 6.7b")]
|
104 |
+
outputs = [gr.outputs.Textbox(label="Caption generated by GIT-base fine-tuned on COCO"), gr.outputs.Textbox(label="Caption generated by BLIP-base")]
|
105 |
|
106 |
title = "Interactive demo: comparing image captioning models"
|
107 |
description = "Gradio Demo to compare GIT, BLIP, CoCa, and BLIP-2, 4 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
|