Spaces:
Running
Running
File size: 3,460 Bytes
7c56b7b a93e14b 3dc8d07 7c56b7b 3dc8d07 7c56b7b a93e14b 7c56b7b a93e14b 79ec99d a93e14b 79ec99d 7c56b7b 3dc8d07 fb260dd a93e14b 0760a62 a93e14b 3dc8d07 a93e14b 47ad849 a93e14b 7c56b7b fb260dd a93e14b 47ad849 a93e14b 7c56b7b a93e14b 47ad849 a93e14b 3dc8d07 a93e14b 3dc8d07 fb260dd a93e14b fb260dd 47ad849 7c56b7b a93e14b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import streamlit as st
import base64
import requests
import os
import random
import time
# Function to convert image to base64
def convert_image_to_base64(image):
image_bytes = image.read()
encoded_image = base64.b64encode(image_bytes).decode("utf-8")
return encoded_image
# Function to generate a caption using Nebius API
def generate_caption(encoded_image):
API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
API_KEY = os.environ.get("NEBIUS_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
# Add a random string to the URL to prevent caching issues
random_string = str(random.randint(100000, 999999))
payload = {
"model": "Qwen/Qwen2-VL-72B-Instruct",
"messages": [
{
"role": "system",
"content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
},
{
"role": "user",
"content": "write a detailed caption for this image"
}
],
"image": {
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{encoded_image}?{random_string}" # Added random query to avoid cache
}
},
"temperature": 0
}
# Send request to API
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
result = response.json()
caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
return caption
else:
st.error(f"API Error {response.status_code}: {response.text}")
return None
# Streamlit app
def main():
st.set_page_config(page_title="Image to Caption Converter", layout="centered", initial_sidebar_state="collapsed")
st.title("🖼️ Image to Caption Converter")
st.markdown("Upload an image and let the AI generate a detailed caption for it.")
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_file:
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Convert image to base64 and get caption
if st.button("Generate Caption"):
with st.spinner("Generating caption..."):
encoded_image = convert_image_to_base64(uploaded_file)
# Debugging: print out the base64 string length to verify if it's changing
st.write(f"Encoded image length: {len(encoded_image)} characters")
caption = generate_caption(encoded_image)
if caption:
st.subheader("Generated Caption:")
st.text_area("", caption, height=100, key="caption_area")
# Copy button (if you want to implement this, use st.session_state)
st.button("Copy to Clipboard", on_click=lambda: st.session_state.update({"caption": caption}))
if __name__ == "__main__":
main()
|