File size: 3,790 Bytes
9a78687
d5a06f6
 
 
11f5df3
d5a06f6
a93e14b
d5a06f6
a93e14b
 
7c56b7b
d5a06f6
a93e14b
788e1b9
a93e14b
79ec99d
a93e14b
 
 
79ec99d
7c56b7b
a93e14b
d5a06f6
a93e14b
 
 
788e1b9
a93e14b
 
 
d5a06f6
a93e14b
 
d5a06f6
 
 
 
 
 
 
a93e14b
7c56b7b
a93e14b
d5a06f6
a93e14b
 
 
 
 
 
 
7c56b7b
d5a06f6
a93e14b
d5a06f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a93e14b
d5a06f6
3dc8d07
d5a06f6
a93e14b
 
 
 
d5a06f6
a93e14b
 
 
 
 
 
 
 
d5a06f6
 
 
 
 
7c56b7b
a93e14b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
import requests
import base64
import os

# Function to convert image to base64
def convert_image_to_base64(image):
    image_bytes = image.read()
    encoded_image = base64.b64encode(image_bytes).decode("utf-8")
    return encoded_image

# Function to generate a caption using Nebius API
def generate_caption(encoded_image):
    API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
    API_KEY = os.environ.get("NEBIUS_API_KEY")

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "llava-hf/llava-1.5-7b-hf",
        "messages": [
            {
                "role": "system",
                "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
            },
            {
                "role": "user",
                "content": "write a detailed caption for this image"
            }
        ],
        "image": {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/png;base64,{encoded_image}"
            }
        },
        "temperature": 0.7
    }

    response = requests.post(API_URL, headers=headers, json=payload)
    
    if response.status_code == 200:
        result = response.json()
        caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
        return caption
    else:
        st.error(f"API Error {response.status_code}: {response.text}")
        return None

# Streamlit app
def main():
    st.set_page_config(page_title="Image to Caption Converter", layout="centered", initial_sidebar_state="collapsed")
    
    # Gradient background style
    st.markdown("""
        <style>
            body {
                background: linear-gradient(135deg, #1e3c72, #2a5298);
                color: white;
                font-family: 'Arial', sans-serif;
            }
            .uploaded-image {
                max-width: 100%;
                border: 2px solid #ffffff;
                border-radius: 10px;
            }
            .copy-button {
                background-color: #ff8800;
                color: white;
                border: none;
                border-radius: 5px;
                padding: 10px 15px;
                cursor: pointer;
            }
            .copy-button:hover {
                background-color: #cc6b00;
            }
        </style>
    """, unsafe_allow_html=True)

    st.title("🖼️ Image to Caption Converter")

    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    if uploaded_file:
        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

        # Convert image to base64 and get caption
        if st.button("Generate Caption"):
            with st.spinner("Generating caption..."):
                encoded_image = convert_image_to_base64(uploaded_file)
                caption = generate_caption(encoded_image)

                if caption:
                    st.subheader("Generated Caption:")
                    st.text_area("", caption, height=100, key="caption_area")

                    # Copy button
                    if st.button("Copy to Clipboard"):
                        st.code(caption, language="text")
                        st.success("Caption copied to clipboard!")

if __name__ == "__main__":
    main()