Spaces:
Sleeping
Sleeping
File size: 3,318 Bytes
594130d b830598 594130d b830598 594130d b830598 594130d b830598 594130d abcfd78 594130d abcfd78 594130d b830598 594130d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# import streamlit as st
# from transformers import pipeline
# # Initialize the image captioning pipeline
# captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# # Streamlit app title
# st.title("Image to Text Captioning")
# # Input for image URL
# image_url = st.text_input("Enter the URL of the image:")
# # If an image URL is provided
# if image_url:
# try:
# # Display the image
# st.image(image_url, caption="Provided Image", use_column_width=True)
# # Generate the caption
# caption = captioner(image_url)
# # Display the caption
# st.write("**Generated Caption:**")
# st.write(caption[0]['generated_text'])
# except Exception as e:
# st.error(f"An error occurred: {e}")
# # To run this app, save this code to a file (e.g., `app.py`) and run `streamlit run app.py` in your terminal.
import streamlit as st
import torch
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
nltk.download('punkt')
@st.cache_resource
def load_models():
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
model2 = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
return processor, model, tokenizer, model2
processor, model, tokenizer, model2 = load_models()
def get_image_caption_and_tags(img_url):
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
# conditional image captioning
alltexts = "a photography of"
inputs = processor(raw_image, alltexts, return_tensors="pt")
out = model.generate(**inputs)
conditional_caption = processor.decode(out[0], skip_special_tokens=True)
# unconditional image captioning
inputs = processor(raw_image, return_tensors="pt")
out = model.generate(**inputs)
unconditional_caption = processor.decode(out[0], skip_special_tokens=True)
inputs = tokenizer([alltexts], max_length=512, truncation=True, return_tensors="pt")
output = model2.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
tags = list(set(decoded_output.strip().split(", ")))
return raw_image, conditional_caption, unconditional_caption, tags
st.title('Image Captioning and Tag Generation')
img_url = st.text_input("Enter Image URL:")
if st.button("Generate Captions and Tags"):
with st.spinner('Processing...'):
try:
image, cond_caption, uncond_caption, tags = get_image_caption_and_tags(img_url)
st.image(image, caption='Input Image', use_column_width=True)
st.subheader("Conditional Caption:")
st.write(cond_caption)
st.subheader("Unconditional Caption:")
st.write(uncond_caption)
st.subheader("Generated Tags:")
st.write(", ".join(tags))
except Exception as e:
st.error(f"An error occurred: {e}")
|