Spaces:

saq1b
/

gemini-image-editing

Running

App Files Files Community

saq1b commited on Mar 18

Commit

3566679

verified ·

1 Parent(s): 6849ccd

Upload 2 files

Browse files

Files changed (2) hide show

app.py +193 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import gradio as gr
+import base64
+import io
+from PIL import Image
+import json
+import os
+import asyncio
+from google import genai
+from google.genai import types
+# Function to convert PIL Image to bytes
+def pil_to_bytes(img, format="PNG"):
+    img_byte_arr = io.BytesIO()
+    img.save(img_byte_arr, format=format)
+    return img_byte_arr.getvalue()
+# Function to save API key in browser local storage
+def save_api_key(api_key):
+    return api_key
+# Function to load image as base64
+async def load_image_base64(img):
+    if isinstance(img, str):
+        # If image is a URL or file path, load it
+        raise ValueError("URL loading not implemented in this version")
+    else:
+        # If image is already a PIL Image
+        return pil_to_bytes(img)
+# Main function to generate edited image using Gemini
+async def generate_image_gemini(prompt, image, api_key):
+    SAFETY_SETTINGS = {
+        types.HarmCategory.HARM_CATEGORY_HARASSMENT: types.HarmBlockThreshold.BLOCK_NONE,
+        types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: types.HarmBlockThreshold.BLOCK_NONE,
+        types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: types.HarmBlockThreshold.BLOCK_NONE,
+        types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: types.HarmBlockThreshold.BLOCK_NONE,
+    }
+    try:
+        # Initialize Gemini client with API key
+        client = genai.Client(api_key=api_key)
+        # Convert PIL image to bytes
+        image_bytes = await load_image_base64(image)
+        contents = []
+        # Add the image to the contents
+        contents.append(
+            types.Content(
+                role="user",
+                parts=[
+                    types.Part.from_bytes(
+                        data=image_bytes,
+                        mime_type="image/png",
+                    )
+                ],
+            )
+        )
+        # Add the prompt to the contents
+        edit_prompt = f"Edit this image: {prompt}"
+        contents.append(
+            types.Content(
+                role="user",
+                parts=[
+                    types.Part.from_text(text=edit_prompt),
+                ],
+            )
+        )
+        response = await client.aio.models.generate_content(
+            model="gemini-2.0-flash-exp",
+            contents=contents,
+            config=types.GenerateContentConfig(
+                safety_settings=[
+                    types.SafetySetting(
+                        category=category, threshold=threshold
+                    ) for category, threshold in SAFETY_SETTINGS.items()
+                ],
+                response_modalities=['Text', 'Image']
+            )
+        )
+        edited_images = []
+        for part in response.candidates[0].content.parts:
+            if part.inline_data is not None:
+                image_bytes = part.inline_data.data
+                edited_images.append(image_bytes)
+        # Convert the first returned image bytes to PIL image
+        if edited_images:
+            result_image = Image.open(io.BytesIO(edited_images[0]))
+            return result_image
+        else:
+            return None
+    except Exception as e:
+        print(f"Google GenAI client failed with error: {e}")
+        return None
+# Function to process the image edit
+def process_image_edit(image, prompt, api_key, image_history):
+    if not image or not prompt or not api_key:
+        return None, image_history, "Please provide an image, prompt, and API key"
+    # Store current image in history if not empty
+    if image is not None and image_history is None:
+        image_history = []
+    if image is not None:
+        image_history.append(image)
+    # Run the async function to edit the image
+    try:
+        edited_image = asyncio.run(generate_image_gemini(prompt, image, api_key))
+        if edited_image:
+            return edited_image, image_history, "Image edited successfully"
+        else:
+            return image, image_history, "Failed to edit image. Please try again."
+    except Exception as e:
+        return image, image_history, f"Error: {str(e)}"
+# Function to undo the last edit
+def undo_edit(image_history):
+    if image_history and len(image_history) > 1:
+        # Remove current image
+        image_history.pop()
+        # Return the previous image
+        return image_history[-1], image_history, "Reverted to previous image"
+    else:
+        return None, [], "No previous version available"
+# Create Gradio UI
+def create_ui():
+    with gr.Blocks(title="Gemini Image Editor") as app:
+        gr.Markdown("# Gemini Image Editor")
+        gr.Markdown("Upload an image, enter a description of the edit you want, and let Gemini do the rest!")
+        # Store image history in state
+        image_history = gr.State([])
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(type="pil", label="Upload Image")
+                prompt = gr.Textbox(label="Edit Description", placeholder="Describe the edit you want...")
+                api_key = gr.Textbox(label="Gemini API Key", placeholder="Enter your Gemini API key", type="password")
+                save_key = gr.Checkbox(label="Save API key in browser", value=True)
+                with gr.Row():
+                    edit_btn = gr.Button("Edit Image")
+                    undo_btn = gr.Button("Undo Last Edit")
+            with gr.Column():
+                output_image = gr.Image(type="pil", label="Edited Image")
+                status = gr.Textbox(label="Status", interactive=False)
+        # Set up event handlers
+        edit_btn.click(
+            fn=process_image_edit,
+            inputs=[input_image, prompt, api_key, image_history],
+            outputs=[output_image, image_history, status]
+        )
+        undo_btn.click(
+            fn=undo_edit,
+            inputs=[image_history],
+            outputs=[output_image, image_history, status]
+        )
+        # JavaScript for saving API key in local storage
+        app.load(None, None, None, _js="""
+        function() {
+            // Try to load saved API key from localStorage
+            const savedKey = localStorage.getItem('gemini_api_key');
+            if (savedKey) {
+                document.querySelector('input[data-testid="textbox"]#api_key').value = savedKey;
+            }
+            // Add event listener to save API key
+            document.querySelector('input[data-testid="textbox"]#api_key').addEventListener('change', function(e) {
+                if (document.querySelector('input[data-testid="checkbox"]#save_key').checked) {
+                    localStorage.setItem('gemini_api_key', e.target.value);
+                }
+            });
+        }
+        """)
+    return app
+# Launch the app
+if __name__ == "__main__":
+    app = create_ui()
+    app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+google-genai
+Pillow
+asyncio