File size: 3,699 Bytes
7c56b7b
a93e14b
7c56b7b
11f5df3
 
 
 
 
 
 
af35fa6
 
 
 
7c56b7b
788e1b9
a93e14b
11f5df3
af35fa6
a93e14b
 
7c56b7b
788e1b9
a93e14b
788e1b9
a93e14b
79ec99d
a93e14b
 
 
79ec99d
7c56b7b
a93e14b
0760a62
a93e14b
 
 
788e1b9
a93e14b
 
 
788e1b9
11f5df3
 
 
 
a93e14b
 
788e1b9
a93e14b
7c56b7b
788e1b9
a93e14b
47ad849
a93e14b
 
 
 
 
 
 
7c56b7b
788e1b9
a93e14b
788e1b9
 
a93e14b
 
3dc8d07
a93e14b
 
 
 
 
788e1b9
a93e14b
 
3dc8d07
788e1b9
fb260dd
 
788e1b9
a93e14b
 
 
 
 
788e1b9
7c56b7b
a93e14b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import base64
import os
from PIL import Image
from io import BytesIO

# Function to compress and resize the image before base64 encoding
def compress_and_resize_image(image, max_size=(1024, 1024), quality=85):
    img = Image.open(image)
    img.thumbnail(max_size)  # Resize image while maintaining aspect ratio
    byte_io = BytesIO()
    img.save(byte_io, format="JPEG", quality=quality)  # Save with reduced quality
    byte_io.seek(0)  # Make sure the pointer is at the beginning of the BytesIO buffer
    return byte_io

# Function to convert uploaded image to base64
def convert_image_to_base64(image):
    compressed_image = compress_and_resize_image(image)
    image_bytes = compressed_image.read()  # Read from BytesIO before closing
    encoded_image = base64.b64encode(image_bytes).decode("utf-8")
    return encoded_image

# Function to generate caption using Nebius API
def generate_caption(encoded_image):
    API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
    API_KEY = os.environ.get("NEBIUS_API_KEY")

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "Qwen/Qwen2-VL-72B-Instruct",
        "messages": [
            {
                "role": "system",
                "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
            },
            {
                "role": "user",
                "content": "Write a caption for this image"
            },
            {
                "role": "user",
                "content": f"data:image/png;base64,{encoded_image}"  # This is where the image is passed as base64 directly
            }
        ],
        "temperature": 0
    }

    # Send request to Nebius API
    response = requests.post(API_URL, headers=headers, json=payload)

    if response.status_code == 200:
        result = response.json()
        caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
        return caption
    else:
        st.error(f"API Error {response.status_code}: {response.text}")
        return None

# Streamlit app layout
def main():
    st.set_page_config(page_title="Image Caption Generator", layout="centered", initial_sidebar_state="collapsed")
    st.title("🖼️ Image to Caption Generator")

    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])

    if uploaded_file:
        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

        if st.button("Generate Caption"):
            # Convert the uploaded image to base64
            with st.spinner("Generating caption..."):
                encoded_image = convert_image_to_base64(uploaded_file)

                # Debugging: Ensure the encoded image is valid and not too large
                st.write(f"Encoded image length: {len(encoded_image)} characters")

                # Get the generated caption from the API
                caption = generate_caption(encoded_image)

                if caption:
                    st.subheader("Generated Caption:")
                    st.text_area("", caption, height=100, key="caption_area")
                    st.success("Caption generated successfully!")

if __name__ == "__main__":
    main()