Spaces:

ThinkAI-Morocco
/

artigen

Build error

File size: 1,385 Bytes

e67b575
 
6903ffe
e67b575
6903ffe
e67b575
 
 
6903ffe
e67b575
 
 
6903ffe
e67b575
 
 
 
 
 
 
6903ffe
e67b575
 
 
 
6903ffe
e67b575
 
 
6903ffe
e67b575
 
6903ffe
e67b575

import torch
from transformers import AutoProcessor, AutoModel, VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
from PIL import Image
import streamlit as st

# Load the saved model state dictionary
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
model.load_state_dict(torch.load("model.pth", map_location=torch.device('cpu')))

# Load the necessary components
feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

# Function to generate a caption for an image
@st.cache_resource
def generate_caption(image):
    pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
    output_ids = model.generate(pixel_values, max_length=100, num_beams=5, early_stopping=True)
    caption = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
    return caption

# Streamlit app
def main():
    st.title("Image Captioning")
    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])

    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)

        caption = generate_caption(image)
        st.write(f"Caption: {caption}")

if __name__ == "__main__":
    main()