sourabhbargi11's picture
Update app.py
7e847dc verified
raw
history blame
2.49 kB
import streamlit as st
from PIL import Image
from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel,RobertaTokenizerFast
import requests
from PIL import Image
from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
#import torch
#from transformers import BlipProcessor, BlipForConditionalGeneration
def set_page_config():
st.set_page_config(
page_title='Caption an Image',
page_icon=':camera:',
layout='wide',
)
def initialize_model():
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# load a fine-tuned image captioning model and corresponding tokenizer and image processor
model = VisionEncoderDecoderModel.from_pretrained("sourabhbargi11/caption4").to(device)
tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
return image_processor, model,tokenizer, device
def upload_image():
return st.sidebar.file_uploader("Upload an image (we aren't storing anything)", type=["jpg", "jpeg", "png"])
def resize_image(image, max_width):
image = image.resize((max_width, height))
if image.mode == "L":
image = image.convert("RGB")
return image
def generate_caption(processor, model, device, image):
#inputs = processor(image, return_tensors='pt').to(device)
#out = model.generate(**inputs, max_new_tokens=20)
#caption = processor.decode(out[0], skip_special_tokens=True)
caption="im here "
return caption
def main():
set_page_config()
st.header("Caption an Image :camera:")
uploaded_image = upload_image()
if uploaded_image is not None:
image = Image.open(uploaded_image)
image = resize_image(image, max_width=300)
st.image(image, caption='Your image')
with st.sidebar:
st.divider()
if st.sidebar.button('Generate Caption'):
with st.spinner('Generating caption...'):
#processor, model, device = initialize_model()
#caption = generate_caption(processor, model, device, image)
caption="im here man"
st.header("Caption:")
st.markdown(f'**{caption}**')
if __name__ == '__main__':
main()
st.markdown("""
---
You are looking at Finetuned image Caption model """)