import gradio as gr import base64 import io from PIL import Image import json import os import asyncio from google import genai from google.genai import types # Function to convert PIL Image to bytes def pil_to_bytes(img, format="PNG"): img_byte_arr = io.BytesIO() img.save(img_byte_arr, format=format) return img_byte_arr.getvalue() # Function to load image as base64 async def load_image_base64(img): if isinstance(img, str): # If image is a URL or file path, load it raise ValueError("URL loading not implemented in this version") else: # If image is already a PIL Image return pil_to_bytes(img) # Main function to generate edited image using Gemini async def generate_image_gemini(prompt, image, api_key, temperature=0.4): SAFETY_SETTINGS = { types.HarmCategory.HARM_CATEGORY_HARASSMENT: types.HarmBlockThreshold.BLOCK_NONE, types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: types.HarmBlockThreshold.BLOCK_NONE, types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: types.HarmBlockThreshold.BLOCK_NONE, types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: types.HarmBlockThreshold.BLOCK_NONE, } try: # Initialize Gemini client with API key client = genai.Client(api_key=api_key) # Convert PIL image to bytes image_bytes = await load_image_base64(image) contents = [] # Add the image to the contents contents.append( types.Content( role="user", parts=[ types.Part.from_bytes( data=image_bytes, mime_type="image/png", ) ], ) ) # Add the prompt to the contents edit_prompt = f"Edit this image: {prompt}" contents.append( types.Content( role="user", parts=[ types.Part.from_text(text=edit_prompt), ], ) ) response = await client.aio.models.generate_content( model="gemini-2.0-flash-exp", contents=contents, config=types.GenerateContentConfig( temperature=temperature, safety_settings=[ types.SafetySetting( category=category, threshold=threshold ) for category, threshold in SAFETY_SETTINGS.items() ], response_modalities=['Text', 'Image'] ) ) edited_images = [] for part in response.candidates[0].content.parts: if part.inline_data is not None: image_bytes = part.inline_data.data edited_images.append(image_bytes) # Convert the first returned image bytes to PIL image if edited_images: result_image = Image.open(io.BytesIO(edited_images[0])) return result_image else: return None except Exception as e: print(f"Google GenAI client failed with error: {e}") return None # Function to process the image edit def process_image_edit(image, prompt, api_key, image_history, temperature): if not image or not prompt or not api_key: return None, image_history, "Please provide an image, prompt, and API key" # Store current image in history if not empty if image is not None and image_history is None: image_history = [] if image is not None: image_history.append(image) # Run the async function to edit the image try: edited_image = asyncio.run(generate_image_gemini(prompt, image, api_key, temperature)) if edited_image: return edited_image, image_history, "Image edited successfully" else: return image, image_history, "Failed to edit image. Please try again." except Exception as e: return image, image_history, f"Error: {str(e)}" # Function to undo the last edit def undo_edit(image_history): if image_history and len(image_history) > 1: # Remove current image image_history.pop() # Return the previous image return image_history[-1], image_history, "Reverted to previous image" else: return None, [], "No previous version available" # Function to set output image as input for continuous editing def continue_editing(output_image): if output_image is not None: return output_image, "Ready to continue editing the current image" else: return None, "No edited image available to continue editing" # Create Gradio UI def create_ui(): with gr.Blocks(title="Gemini Image Editor") as app: gr.Markdown("# Gemini Image Editor") gr.Markdown("Upload an image, enter a description of the edit you want, and let Gemini do the rest!") # Store image history in state image_history = gr.State([]) with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Upload Image") prompt = gr.Textbox(label="Edit Description", placeholder="Describe the edit you want...") api_key = gr.Textbox(label="Gemini API Key", placeholder="Enter your Gemini API key", type="password") # Replace hidden settings with accordion with gr.Accordion("Advanced Settings", open=False): temperature = gr.Slider( minimum=0.0, maximum=2.0, value=1, step=0.05, label="Temperature", info="Controls randomness in generation (0 = deterministic, 1 = creative, 2 = extreme)" ) with gr.Row(): edit_btn = gr.Button("Edit Image") undo_btn = gr.Button("Undo Last Edit") continue_btn = gr.Button("Continue Editing") with gr.Column(): output_image = gr.Image(type="pil", label="Edited Image") status = gr.Textbox(label="Status", interactive=False) # Set up event handlers edit_btn.click( fn=process_image_edit, inputs=[input_image, prompt, api_key, image_history, temperature], outputs=[output_image, image_history, status] ) undo_btn.click( fn=undo_edit, inputs=[image_history], outputs=[output_image, image_history, status] ) # Add handler for continue editing button continue_btn.click( fn=continue_editing, inputs=[output_image], outputs=[input_image, status] ) return app # Launch the app if __name__ == "__main__": app = create_ui() app.launch()