import streamlit as st import base64 import requests import os import random import time # Function to convert image to base64 def convert_image_to_base64(image): image_bytes = image.read() encoded_image = base64.b64encode(image_bytes).decode("utf-8") return encoded_image # Function to generate a caption using Nebius API def generate_caption(encoded_image): API_URL = "https://api.studio.nebius.ai/v1/chat/completions" API_KEY = os.environ.get("NEBIUS_API_KEY") headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } # Add a random string to the URL to prevent caching issues random_string = str(random.randint(100000, 999999)) payload = { "model": "Qwen/Qwen2-VL-72B-Instruct", "messages": [ { "role": "system", "content": """describe this image in great detail""" }, { "role": "user", "content": "write a detailed caption for this image" } ], "image": { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{encoded_image}?{random_string}" # Added random query to avoid cache } }, "temperature": 1 } # Send request to API response = requests.post(API_URL, headers=headers, json=payload) if response.status_code == 200: result = response.json() caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.") return caption else: st.error(f"API Error {response.status_code}: {response.text}") return None # Streamlit app def main(): st.set_page_config(page_title="Image to Caption Converter", layout="centered", initial_sidebar_state="collapsed") st.title("🖼️ Image to Caption Converter") st.markdown("Upload an image and let the AI generate a detailed caption for it.") uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_file: # Display the uploaded image st.image(uploaded_file, caption="Uploaded Image", use_container_width=True) # Convert image to base64 and get caption if st.button("Generate Caption"): with st.spinner("Generating caption..."): encoded_image = convert_image_to_base64(uploaded_file) # Debugging: print out the base64 string length to verify if it's changing st.write(f"Encoded image length: {len(encoded_image)} characters") caption = generate_caption(encoded_image) if caption: st.subheader("Generated Caption:") st.text_area("", caption, height=100, key="caption_area") # Copy button (if you want to implement this, use st.session_state) st.button("Copy to Clipboard", on_click=lambda: st.session_state.update({"caption": caption})) if __name__ == "__main__": main()