Spaces:

Segizu
/

imagedescription

Sleeping

Segizu commited on Feb 11

Commit

5ef1757

1 Parent(s): f5df7cc

Image2caption simple

Files changed (4) hide show

__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (1.4 kB). View file

app.py ADDED Viewed

+# app.py
+import streamlit as st
+from utils import ImageCaptioningModel
+import tempfile
+# Initialize the BLIP Image Captioning model
+captioning_model = ImageCaptioningModel()
+# Streamlit UI
+st.title("🖼️ Image Captioning with BLIP")
+st.write("Upload an image and the model will generate a description.")
+# Upload Image
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    # Display uploaded image
+    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
+    # Save file temporarily
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+        temp_file.write(uploaded_file.getbuffer())
+        temp_file_path = temp_file.name
+    # Generate caption
+    with st.spinner("Generating caption..."):
+        caption = captioning_model.generate_caption(temp_file_path)
+    # Show caption result
+    st.success("Generated Caption:")
+    st.write(f"**{caption}**")

requirements.txt ADDED Viewed

+torch
+transformers
+Pillow
+streamlit

utils.py ADDED Viewed

+# utils.py
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from PIL import Image
+import torch
+class ImageCaptioningModel:
+    def __init__(self, model_name="Salesforce/blip-image-captioning-base"):
+        """
+        Initialize BLIP Image Captioning model.
+        """
+        self.processor = BlipProcessor.from_pretrained(model_name)
+        self.model = BlipForConditionalGeneration.from_pretrained(model_name)
+        self.model.eval()
+    def generate_caption(self, image_path):
+        """
+        Generate a caption for the given image.
+        :param image_path: Path to the input image
+        :return: Generated caption (string)
+        """
+        image = Image.open(image_path).convert("RGB")
+        inputs = self.processor(images=image, return_tensors="pt")
+        with torch.no_grad():
+            output = self.model.generate(**inputs)
+        caption = self.processor.tokenizer.decode(output[0], skip_special_tokens=True)
+        return caption