File size: 3,790 Bytes
7c56b7b
 
a93e14b
7c56b7b
 
a93e14b
 
 
 
 
7c56b7b
a93e14b
 
 
 
79ec99d
a93e14b
 
 
79ec99d
7c56b7b
a93e14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c56b7b
a93e14b
 
 
 
 
 
 
 
 
7c56b7b
a93e14b
 
 
7c56b7b
a93e14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c56b7b
a93e14b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
import requests
import base64
import os

# Function to convert image to base64
def convert_image_to_base64(image):
    image_bytes = image.read()
    encoded_image = base64.b64encode(image_bytes).decode("utf-8")
    return encoded_image

# Function to generate a caption using Nebius API
def generate_caption(encoded_image):
    API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
    API_KEY = os.environ.get("NEBIUS_API_KEY")

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "llava-hf/llava-1.5-7b-hf",
        "messages": [
            {
                "role": "system",
                "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
            },
            {
                "role": "user",
                "content": "write a detailed caption for this image"
            }
        ],
        "image": {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/png;base64,{encoded_image}"
            }
        },
        "temperature": 0.7
    }

    response = requests.post(API_URL, headers=headers, json=payload)
    
    if response.status_code == 200:
        result = response.json()
        caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
        return caption
    else:
        st.error(f"API Error {response.status_code}: {response.text}")
        return None

# Streamlit app
def main():
    st.set_page_config(page_title="Image to Caption Converter", layout="centered", initial_sidebar_state="collapsed")
    
    # Gradient background style
    st.markdown("""
        <style>
            body {
                background: linear-gradient(135deg, #1e3c72, #2a5298);
                color: white;
                font-family: 'Arial', sans-serif;
            }
            .uploaded-image {
                max-width: 100%;
                border: 2px solid #ffffff;
                border-radius: 10px;
            }
            .copy-button {
                background-color: #ff8800;
                color: white;
                border: none;
                border-radius: 5px;
                padding: 10px 15px;
                cursor: pointer;
            }
            .copy-button:hover {
                background-color: #cc6b00;
            }
        </style>
    """, unsafe_allow_html=True)

    st.title("🖼️ Image to Caption Converter")

    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    if uploaded_file:
        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

        # Convert image to base64 and get caption
        if st.button("Generate Caption"):
            with st.spinner("Generating caption..."):
                encoded_image = convert_image_to_base64(uploaded_file)
                caption = generate_caption(encoded_image)

                if caption:
                    st.subheader("Generated Caption:")
                    st.text_area("", caption, height=100, key="caption_area")

                    # Copy button
                    if st.button("Copy to Clipboard"):
                        st.code(caption, language="text")
                        st.success("Caption copied to clipboard!")

if __name__ == "__main__":
    main()