Spaces:
Running
Running
File size: 3,631 Bytes
9a78687 d5a06f6 6e2e0c5 11f5df3 6e2e0c5 a93e14b 6e2e0c5 a93e14b 7c56b7b 6e2e0c5 a93e14b 788e1b9 a93e14b 79ec99d a93e14b 79ec99d 7c56b7b a93e14b 6e2e0c5 a93e14b 788e1b9 a93e14b 6e2e0c5 a93e14b 6e2e0c5 a93e14b 7c56b7b 6e2e0c5 a93e14b 6e2e0c5 a93e14b 7c56b7b 6e2e0c5 a93e14b 6e2e0c5 3dc8d07 d5a06f6 6e2e0c5 a93e14b 6e2e0c5 a93e14b 6e2e0c5 a93e14b 6e2e0c5 7c56b7b a93e14b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
import base64
import os
import requests
from PIL import Image
from io import BytesIO
# Function to compress and resize the image before base64 encoding
def compress_and_resize_image(image, max_size=(1024, 1024), quality=85):
img = Image.open(image)
img.thumbnail(max_size) # Resize image while maintaining aspect ratio
with BytesIO() as byte_io:
img.save(byte_io, format="JPEG", quality=quality) # Save with reduced quality
byte_io.seek(0)
return byte_io
# Function to convert uploaded image to base64
def convert_image_to_base64(image):
compressed_image = compress_and_resize_image(image)
image_bytes = compressed_image.read()
encoded_image = base64.b64encode(image_bytes).decode("utf-8")
return encoded_image
# Function to generate caption using Nebius API
def generate_caption(encoded_image):
API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
API_KEY = os.environ.get("NEBIUS_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": "Qwen/Qwen2-VL-72B-Instruct",
"messages": [
{
"role": "system",
"content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
},
{
"role": "user",
"content": "Write a caption for this image"
},
{
"role": "user",
"content": f"data:image/png;base64,{encoded_image}" # This is where the image is passed as base64 directly
}
],
"temperature": 0
}
# Send request to Nebius API
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
result = response.json()
caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
return caption
else:
st.error(f"API Error {response.status_code}: {response.text}")
return None
# Streamlit app layout
def main():
st.set_page_config(page_title="Image Caption Generator", layout="centered", initial_sidebar_state="collapsed")
st.title("🖼️ Image to Caption Generator")
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_file:
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
if st.button("Generate Caption"):
# Convert the uploaded image to base64
with st.spinner("Generating caption..."):
encoded_image = convert_image_to_base64(uploaded_file)
# Debugging: Ensure the encoded image is valid and not too large
st.write(f"Encoded image length: {len(encoded_image)} characters")
# Get the generated caption from the API
caption = generate_caption(encoded_image)
if caption:
st.subheader("Generated Caption:")
st.text_area("", caption, height=100, key="caption_area")
st.success("Caption generated successfully!")
if __name__ == "__main__":
main()
|