File size: 3,466 Bytes
7c56b7b
 
a93e14b
7c56b7b
 
a93e14b
 
 
 
 
7c56b7b
a93e14b
 
 
 
79ec99d
a93e14b
 
 
79ec99d
7c56b7b
fb260dd
 
 
a93e14b
47ad849
a93e14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47ad849
a93e14b
7c56b7b
fb260dd
a93e14b
47ad849
a93e14b
 
 
 
 
 
 
7c56b7b
a93e14b
 
 
 
 
47ad849
a93e14b
 
fb260dd
a93e14b
 
 
 
 
 
 
 
fb260dd
 
 
 
a93e14b
 
 
 
 
 
fb260dd
47ad849
7c56b7b
a93e14b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
import requests
import base64
import os

# Function to convert image to base64
def convert_image_to_base64(image):
    image_bytes = image.read()
    encoded_image = base64.b64encode(image_bytes).decode("utf-8")
    return encoded_image

# Function to generate a caption using Nebius API
def generate_caption(encoded_image):
    API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
    API_KEY = os.environ.get("NEBIUS_API_KEY")

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    # Print base64 to debug (make sure it's different for each image upload)
    print(f"Base64 encoded image: {encoded_image[:100]}...")  # Print only first 100 characters for brevity

    payload = {
        "model": "Qwen/Qwen2-VL-7B-Instruct",
        "messages": [
            {
                "role": "system",
                "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
            },
            {
                "role": "user",
                "content": "write a detailed caption for this image"
            }
        ],
        "image": {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/png;base64,{encoded_image}"
            }
        },
        "temperature": 0
    }

    # Send request to API
    response = requests.post(API_URL, headers=headers, json=payload)

    if response.status_code == 200:
        result = response.json()
        caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
        return caption
    else:
        st.error(f"API Error {response.status_code}: {response.text}")
        return None

# Streamlit app
def main():
    st.set_page_config(page_title="Image to Caption Converter", layout="centered", initial_sidebar_state="collapsed")

    st.title("🖼️ Image to Caption Converter")
    st.markdown("Upload an image and let the AI generate a detailed caption for it.")

    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    
    if uploaded_file:
        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

        # Convert image to base64 and get caption
        if st.button("Generate Caption"):
            with st.spinner("Generating caption..."):
                encoded_image = convert_image_to_base64(uploaded_file)
                
                # Debugging: print out the base64 string length to verify if it's changing
                st.write(f"Encoded image length: {len(encoded_image)} characters")

                caption = generate_caption(encoded_image)

                if caption:
                    st.subheader("Generated Caption:")
                    st.text_area("", caption, height=100, key="caption_area")

                    # Copy button (if you want to implement this, use st.session_state)
                    st.button("Copy to Clipboard", on_click=lambda: st.session_state.update({"caption": caption}))

if __name__ == "__main__":
    main()