|
import os |
|
import tempfile |
|
from PIL import Image |
|
import gradio as gr |
|
import logging |
|
import re |
|
import io |
|
from io import BytesIO |
|
|
|
from google import genai |
|
from google.genai import types |
|
|
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
def save_binary_file(file_name, data): |
|
with open(file_name, "wb") as f: |
|
f.write(data) |
|
|
|
def preprocess_prompt(prompt, image1, image2, image3): |
|
""" |
|
์
๋ ฅ๋ ํ๋กฌํํธ๊ฐ ์์ด๋ก๋ง ๋์ด ์๋์ง ํ์ธํ๊ณ , |
|
๋จ์ํ "Please generate the image."๋ฅผ ๋ง๋ถ์ฌ ๋ฐํํฉ๋๋ค. |
|
""" |
|
|
|
if re.search(r'[ใฑ-ใ
๊ฐ-ํฃ]', prompt): |
|
raise ValueError("Error: Prompt must be in English only.") |
|
|
|
|
|
prompt = prompt.strip() + " Please generate the image." |
|
return prompt |
|
|
|
def generate_with_images(prompt, images): |
|
""" |
|
๊ณต์ ๋ฌธ์์ ๊ธฐ๋ฐํ ์ฌ๋ฐ๋ฅธ API ํธ์ถ ๋ฐฉ์ ๊ตฌํ |
|
""" |
|
try: |
|
|
|
api_key = os.environ.get("GEMINI_API_KEY") |
|
if not api_key: |
|
return None, "API key is not set. Please check your environment variables." |
|
|
|
|
|
client = genai.Client(api_key=api_key) |
|
|
|
logger.info(f"Gemini API ์์ฒญ ์์ - ํ๋กฌํํธ: {prompt}") |
|
|
|
|
|
contents = [] |
|
|
|
|
|
contents.append(prompt) |
|
|
|
|
|
for idx, img in enumerate(images, 1): |
|
if img is not None: |
|
contents.append(img) |
|
logger.info(f"Image #{idx} added.") |
|
|
|
|
|
response = client.models.generate_content( |
|
model="gemini-2.0-flash-exp-image-generation", |
|
contents=contents, |
|
config=types.GenerateContentConfig( |
|
response_modalities=['Text', 'Image'], |
|
temperature=1, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=8192 |
|
) |
|
) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
temp_path = tmp.name |
|
|
|
result_text = "" |
|
image_found = False |
|
|
|
|
|
for part in response.candidates[0].content.parts: |
|
if hasattr(part, 'text') and part.text: |
|
result_text += part.text |
|
logger.info(f"Response text: {part.text}") |
|
elif hasattr(part, 'inline_data') and part.inline_data: |
|
save_binary_file(temp_path, part.inline_data.data) |
|
image_found = True |
|
logger.info("Image extracted successfully from response.") |
|
|
|
if not image_found: |
|
return None, f"API did not generate an image. Response text: {result_text}" |
|
|
|
|
|
result_img = Image.open(temp_path) |
|
if result_img.mode == "RGBA": |
|
result_img = result_img.convert("RGB") |
|
|
|
return result_img, f"Image generated successfully. {result_text}" |
|
|
|
except Exception as e: |
|
logger.exception("Error during image generation:") |
|
return None, f"Error occurred: {str(e)}" |
|
|
|
def process_images_with_prompt(image1, image2, image3, prompt): |
|
""" |
|
3๊ฐ์ ์ด๋ฏธ์ง์ ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ๋ ํจ์. |
|
์ด๋ฏธ์ง ์
๋ ฅ์ด ์์ด๋ ํ๋กฌํํธ๋ง์ผ๋ก API ํธ์ถ์ ์งํํฉ๋๋ค. |
|
""" |
|
try: |
|
|
|
images = [image1, image2, image3] |
|
valid_images = [img for img in images if img is not None] |
|
|
|
|
|
if not prompt or not prompt.strip(): |
|
if len(valid_images) == 0: |
|
prompt = "Please generate an image based on the description." |
|
logger.info("Auto prompt generated for no image input.") |
|
elif len(valid_images) == 1: |
|
prompt = "Please creatively transform this image into a more vivid and artistic version." |
|
logger.info("Auto prompt generated for a single image.") |
|
elif len(valid_images) == 2: |
|
prompt = "Please seamlessly blend these two images, integrating their elements harmoniously into a single image." |
|
logger.info("Auto prompt generated for two images.") |
|
else: |
|
prompt = "Please creatively composite these three images, incorporating their key elements into a natural and coherent scene." |
|
logger.info("Auto prompt generated for three images.") |
|
else: |
|
prompt = preprocess_prompt(prompt, image1, image2, image3) |
|
|
|
|
|
return generate_with_images(prompt, valid_images) |
|
|
|
except Exception as e: |
|
logger.exception("Error during image processing:") |
|
return None, f"Error occurred: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
""" |
|
<div style="text-align: center; margin-bottom: 1rem;"> |
|
<h1>Simple Image Generator</h1> |
|
<p>Upload an image (or leave empty) and click generate to create an image based on the English prompt.</p> |
|
</div> |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
with gr.Row(): |
|
image1_input = gr.Image(type="pil", label="Image 1", image_mode="RGB") |
|
image2_input = gr.Image(type="pil", label="Image 2", image_mode="RGB") |
|
image3_input = gr.Image(type="pil", label="Image 3", image_mode="RGB") |
|
|
|
|
|
prompt_input = gr.Textbox( |
|
lines=3, |
|
placeholder="Enter the prompt in English.", |
|
label="Prompt (Required: English only)" |
|
) |
|
|
|
|
|
submit_btn = gr.Button("Generate Image", variant="primary") |
|
|
|
with gr.Column(): |
|
|
|
output_image = gr.Image(label="Generated Image") |
|
output_text = gr.Textbox(label="Status Message") |
|
prompt_display = gr.Textbox(label="Used Prompt", visible=True) |
|
|
|
|
|
def process_and_show_prompt(image1, image2, image3, prompt): |
|
|
|
images = [image1, image2, image3] |
|
valid_images = [img for img in images if img is not None] |
|
|
|
try: |
|
|
|
if prompt and re.search(r'[ใฑ-ใ
๊ฐ-ํฃ]', prompt): |
|
return None, "Error: Prompt must be in English only.", prompt |
|
|
|
|
|
auto_prompt = prompt |
|
if not prompt or not prompt.strip(): |
|
if len(valid_images) == 0: |
|
auto_prompt = "Please generate an image based on the description." |
|
elif len(valid_images) == 1: |
|
auto_prompt = "Please creatively transform this image into a more vivid and artistic version." |
|
elif len(valid_images) == 2: |
|
auto_prompt = "Please seamlessly blend these two images, integrating their elements harmoniously into a single image." |
|
else: |
|
auto_prompt = "Please creatively composite these three images, incorporating their key elements into a natural and coherent scene." |
|
else: |
|
auto_prompt = preprocess_prompt(prompt, image1, image2, image3) |
|
|
|
result_img, status = process_images_with_prompt(image1, image2, image3, prompt) |
|
|
|
return result_img, status, auto_prompt |
|
except Exception as e: |
|
logger.exception("Error during processing:") |
|
return None, f"Error occurred: {str(e)}", prompt |
|
|
|
submit_btn.click( |
|
fn=process_and_show_prompt, |
|
inputs=[image1_input, image2_input, image3_input, prompt_input], |
|
outputs=[output_image, output_text, prompt_display], |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
### Instructions: |
|
|
|
1. **Auto Generation**: You can leave the image upload empty and the system will generate an image based solely on the prompt. |
|
2. **Prompt Requirement**: Enter the prompt in English only. |
|
3. **Image Reference**: The app supports up to three image inputs. |
|
""" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|