File size: 3,003 Bytes
7c56b7b
 
a93e14b
7c56b7b
 
a93e14b
 
 
 
 
7c56b7b
a93e14b
 
 
 
79ec99d
a93e14b
 
 
79ec99d
7c56b7b
a93e14b
47ad849
a93e14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47ad849
a93e14b
7c56b7b
a93e14b
47ad849
a93e14b
 
 
 
 
 
 
7c56b7b
a93e14b
 
 
 
 
47ad849
a93e14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47ad849
7c56b7b
a93e14b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import streamlit as st
import requests
import base64
import os

# Function to convert image to base64
def convert_image_to_base64(image):
    image_bytes = image.read()
    encoded_image = base64.b64encode(image_bytes).decode("utf-8")
    return encoded_image

# Function to generate a caption using Nebius API
def generate_caption(encoded_image):
    API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
    API_KEY = os.environ.get("NEBIUS_API_KEY")

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "Qwen/Qwen2-VL-7B-Instruct",
        "messages": [
            {
                "role": "system",
                "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
            },
            {
                "role": "user",
                "content": "write a detailed caption for this image"
            }
        ],
        "image": {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/png;base64,{encoded_image}"
            }
        },
        "temperature": 0
    }

    response = requests.post(API_URL, headers=headers, json=payload)

    if response.status_code == 200:
        result = response.json()
        caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
        return caption
    else:
        st.error(f"API Error {response.status_code}: {response.text}")
        return None

# Streamlit app
def main():
    st.set_page_config(page_title="Image to Caption Converter", layout="centered", initial_sidebar_state="collapsed")

    st.title("🖼️ Image to Caption Converter")
    st.markdown("Upload an image and let the AI generate a detailed caption for it.")

    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    if uploaded_file:
        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

        # Convert image to base64 and get caption
        if st.button("Generate Caption"):
            with st.spinner("Generating caption..."):
                encoded_image = convert_image_to_base64(uploaded_file)
                caption = generate_caption(encoded_image)

                if caption:
                    st.subheader("Generated Caption:")
                    st.text_area("", caption, height=100, key="caption_area")

                    # Copy button
                    st.button("Copy to Clipboard", on_click=lambda: st.session_state.update({"caption": caption}))

if __name__ == "__main__":
    main()