AlotaibiFahad
/

wd

StableDiffusionPipeline

diffusers-training

stable-diffusion

stable-diffusion-diffusers

Model card Files Files and versions Metrics Training metrics Community

wd / gpt_captions.py

AlotaibiFahad's picture

End of training

16b48db verified 6 months ago

history blame contribute delete

3.32 kB

	import base64
	from mimetypes import guess_type
	from openai import AzureOpenAI
	import os

	# Function to encode a local image into a data URL
	def local_image_to_data_url(image_path):
	# Guess the MIME type of the image based on the file extension
	mime_type, _ = guess_type(image_path)

	# If MIME type is not found or the file is .webp, set it explicitly
	if mime_type is None or mime_type == 'application/octet-stream':
	if image_path.lower().endswith('.webp'):
	mime_type = 'image/webp' # Explicitly set for .webp images
	else:
	mime_type = 'application/octet-stream' # Default MIME type if none is found

	# Read and encode the image file
	with open(image_path, "rb") as image_file:
	base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

	# Construct the data URL
	return f"data:{mime_type};base64,{base64_encoded_data}"

	# Images Path
	images_path = "/eph/nvme0/azureml/cr/j/8569d5e3aa08485780b67a53d671e109/exe/wd/1_2M_Dataset"

	# Images list
	imgs_list = [file for file in os.listdir(images_path)]

	# Azure - OpenAI Credential
	api_base = "https://allam-swn-gpt-01.openai.azure.com/" # your endpoint should look like the following https://YOUR_RESOURCE_NAME.openai.azure.com/
	api_key="8af2cca79fb34601ab829b44b7fa6dcf"
	deployment_name = "gpt-4o-900ptu"
	api_version = "2024-02-15-preview" # this might change in the future

	# Define a client
	client = AzureOpenAI(
	api_key=api_key,
	api_version=api_version,
	base_url=f"{api_base}openai/deployments/{deployment_name}",
	)


	# Iterate over all images
	for img_name in imgs_list:
	# Get image path
	img_path = os.path.join(images_path, img_name)

	# Get txt file
	txt_file_name = img_name.split(".")[0] + ".txt"
	txt_path = os.path.join(images_path, txt_file_name)

	# Make the local image to a url link to be accepted by the model
	data_url = local_image_to_data_url(img_path)

	response = client.chat.completions.create(
	model=deployment_name,
	messages=[
	{ "role": "system", "content": "You are an image captioning assistant." },
	{ "role": "user", "content": [
	{
	"type": "text",
	"text": """You are my captioning model, I will give you a punch of images with their main subject,
	and I want you to write a detailed caption based on what you see in the images alone. Take these consideration when writing the caption:
	Order the terms in the caption and use commas. The order of the words in the caption directly corresponds to their weight when generating the final image,
	so a main subject should always be at the start of the prompt. If we want to add more details,
	do it in a "narrative style" and using commas to help separate the terms for the FLUX model to read. The tag of this image is: 1/2M cup"""
	},
	{
	"type": "image_url",
	"image_url": {
	"url": data_url
	}
	}
	] }
	],
	max_tokens=2000
	)

	with open(txt_path, "w") as f:
	f.write(response.choices[0].message.content)