test-100 / app.py
Kims12's picture
Update app.py
1d396b2 verified
raw
history blame
9.43 kB
import os
import tempfile
from PIL import Image
import gradio as gr
import logging
import re
import io
from io import BytesIO
from google import genai
from google.genai import types
# ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ
from dotenv import load_dotenv
load_dotenv()
# ๋กœ๊น… ์„ค์ •
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def save_binary_file(file_name, data):
with open(file_name, "wb") as f:
f.write(data)
def preprocess_prompt(prompt, image1, image2, image3):
"""
์ž…๋ ฅ๋œ ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์˜์–ด๋กœ๋งŒ ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜๊ณ ,
๋‹จ์ˆœํžˆ "Please generate the image."๋ฅผ ๋ง๋ถ™์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
"""
# ํ”„๋กฌํ”„ํŠธ์— ํ•œ๊ธ€์ด ํฌํ•จ๋˜์–ด ์žˆ์œผ๋ฉด ์—๋Ÿฌ ๋ฐœ์ƒ
if re.search(r'[ใ„ฑ-ใ…Ž๊ฐ€-ํžฃ]', prompt):
raise ValueError("Error: Prompt must be in English only.")
# ๋ถˆํ•„์š”ํ•œ ๊ธฐ๋Šฅ ๊ด€๋ จ ์ฒ˜๋ฆฌ๋Š” ์ œ๊ฑฐํ•˜๊ณ  ๋‹จ์ˆœ ๋ช…๋ น์–ด ์ถ”๊ฐ€
prompt = prompt.strip() + " Please generate the image."
return prompt
def generate_with_images(prompt, images):
"""
๊ณต์‹ ๋ฌธ์„œ์— ๊ธฐ๋ฐ˜ํ•œ ์˜ฌ๋ฐ”๋ฅธ API ํ˜ธ์ถœ ๋ฐฉ์‹ ๊ตฌํ˜„
"""
try:
# API ํ‚ค ํ™•์ธ
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
return None, "API key is not set. Please check your environment variables."
# Gemini ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
client = genai.Client(api_key=api_key)
logger.info(f"Gemini API ์š”์ฒญ ์‹œ์ž‘ - ํ”„๋กฌํ”„ํŠธ: {prompt}")
# ์ปจํ…์ธ  ์ค€๋น„
contents = []
# ํ…์ŠคํŠธ ํ”„๋กฌํ”„ํŠธ ์ถ”๊ฐ€
contents.append(prompt)
# ์ด๋ฏธ์ง€ ์ถ”๊ฐ€ (์ด๋ฏธ์ง€๊ฐ€ ์—†์œผ๋ฉด ํ…์ŠคํŠธ๋งŒ ์ „์†ก)
for idx, img in enumerate(images, 1):
if img is not None:
contents.append(img)
logger.info(f"Image #{idx} added.")
# ์ƒ์„ฑ ์„ค์ • - ๊ณต์‹ ๋ฌธ์„œ์— ๋”ฐ๋ผ responseModalities ์„ค์ •
response = client.models.generate_content(
model="gemini-2.0-flash-exp-image-generation",
contents=contents,
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image'],
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192
)
)
# ์ž„์‹œ ํŒŒ์ผ ์ƒ์„ฑ
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
temp_path = tmp.name
result_text = ""
image_found = False
# ์‘๋‹ต ์ฒ˜๋ฆฌ
for part in response.candidates[0].content.parts:
if hasattr(part, 'text') and part.text:
result_text += part.text
logger.info(f"Response text: {part.text}")
elif hasattr(part, 'inline_data') and part.inline_data:
save_binary_file(temp_path, part.inline_data.data)
image_found = True
logger.info("Image extracted successfully from response.")
if not image_found:
return None, f"API did not generate an image. Response text: {result_text}"
# ๊ฒฐ๊ณผ ์ด๋ฏธ์ง€ ๋ฐ˜ํ™˜
result_img = Image.open(temp_path)
if result_img.mode == "RGBA":
result_img = result_img.convert("RGB")
return result_img, f"Image generated successfully. {result_text}"
except Exception as e:
logger.exception("Error during image generation:")
return None, f"Error occurred: {str(e)}"
def process_images_with_prompt(image1, image2, image3, prompt):
"""
3๊ฐœ์˜ ์ด๋ฏธ์ง€์™€ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ฒ˜๋ฆฌํ•˜๋Š” ํ•จ์ˆ˜.
์ด๋ฏธ์ง€ ์ž…๋ ฅ์ด ์—†์–ด๋„ ํ”„๋กฌํ”„ํŠธ๋งŒ์œผ๋กœ API ํ˜ธ์ถœ์„ ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
"""
try:
# ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜ ํ™•์ธ (์ด๋ฏธ์ง€ ์—†์ด๋„ ์ง„ํ–‰)
images = [image1, image2, image3]
valid_images = [img for img in images if img is not None]
# ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์—†์œผ๋ฉด ์—…๋กœ๋“œ๋œ ์ด๋ฏธ์ง€ ์ˆ˜์— ๋”ฐ๋ผ ์ž๋™ ํ•ฉ์„ฑ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ (์˜์–ด)
if not prompt or not prompt.strip():
if len(valid_images) == 0:
prompt = "Please generate an image based on the description."
logger.info("Auto prompt generated for no image input.")
elif len(valid_images) == 1:
prompt = "Please creatively transform this image into a more vivid and artistic version."
logger.info("Auto prompt generated for a single image.")
elif len(valid_images) == 2:
prompt = "Please seamlessly blend these two images, integrating their elements harmoniously into a single image."
logger.info("Auto prompt generated for two images.")
else:
prompt = "Please creatively composite these three images, incorporating their key elements into a natural and coherent scene."
logger.info("Auto prompt generated for three images.")
else:
prompt = preprocess_prompt(prompt, image1, image2, image3)
# ์ƒˆ๋กœ์šด API ํ˜ธ์ถœ ๋ฐฉ์‹ ์‚ฌ์šฉ
return generate_with_images(prompt, valid_images)
except Exception as e:
logger.exception("Error during image processing:")
return None, f"Error occurred: {str(e)}"
# Gradio ์ธํ„ฐํŽ˜์ด์Šค (๊ธฐ๋Šฅ ์„ ํƒ, ๊ธฐ๋Šฅ ์ ์šฉ, ์ปค์Šคํ…€ ํ…์ŠคํŠธ ๊ด€๋ จ ์š”์†Œ ์ œ๊ฑฐ)
with gr.Blocks() as demo:
gr.HTML(
"""
<div style="text-align: center; margin-bottom: 1rem;">
<h1>Simple Image Generator</h1>
<p>Upload an image (or leave empty) and click generate to create an image based on the English prompt.</p>
</div>
"""
)
with gr.Row():
with gr.Column():
# 3๊ฐœ์˜ ์ด๋ฏธ์ง€ ์ž…๋ ฅ (์ด๋ฏธ์ง€ ์—†์ด๋„ ์‹คํ–‰ ๊ฐ€๋Šฅ)
with gr.Row():
image1_input = gr.Image(type="pil", label="Image 1", image_mode="RGB")
image2_input = gr.Image(type="pil", label="Image 2", image_mode="RGB")
image3_input = gr.Image(type="pil", label="Image 3", image_mode="RGB")
# ํ”„๋กฌํ”„ํŠธ ์ž…๋ ฅ (์˜์–ด๋กœ๋งŒ ์ž…๋ ฅ)
prompt_input = gr.Textbox(
lines=3,
placeholder="Enter the prompt in English.",
label="Prompt (Required: English only)"
)
# ์ƒ์„ฑ ๋ฒ„ํŠผ
submit_btn = gr.Button("Generate Image", variant="primary")
with gr.Column():
# ๊ฒฐ๊ณผ ์ถœ๋ ฅ
output_image = gr.Image(label="Generated Image")
output_text = gr.Textbox(label="Status Message")
prompt_display = gr.Textbox(label="Used Prompt", visible=True)
# ์ด๋ฏธ์ง€ ์ƒ์„ฑ ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ
def process_and_show_prompt(image1, image2, image3, prompt):
# ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜ ํ™•์ธ
images = [image1, image2, image3]
valid_images = [img for img in images if img is not None]
try:
# ๋งŒ์•ฝ ์‚ฌ์šฉ์ž๊ฐ€ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ–ˆ๋‹ค๋ฉด ์˜์–ด๋งŒ ํฌํ•จ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
if prompt and re.search(r'[ใ„ฑ-ใ…Ž๊ฐ€-ํžฃ]', prompt):
return None, "Error: Prompt must be in English only.", prompt
# ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์—†์œผ๋ฉด ์ž๋™ ์ƒ์„ฑ (์˜์–ด)
auto_prompt = prompt
if not prompt or not prompt.strip():
if len(valid_images) == 0:
auto_prompt = "Please generate an image based on the description."
elif len(valid_images) == 1:
auto_prompt = "Please creatively transform this image into a more vivid and artistic version."
elif len(valid_images) == 2:
auto_prompt = "Please seamlessly blend these two images, integrating their elements harmoniously into a single image."
else:
auto_prompt = "Please creatively composite these three images, incorporating their key elements into a natural and coherent scene."
else:
auto_prompt = preprocess_prompt(prompt, image1, image2, image3)
result_img, status = process_images_with_prompt(image1, image2, image3, prompt)
return result_img, status, auto_prompt
except Exception as e:
logger.exception("Error during processing:")
return None, f"Error occurred: {str(e)}", prompt
submit_btn.click(
fn=process_and_show_prompt,
inputs=[image1_input, image2_input, image3_input, prompt_input],
outputs=[output_image, output_text, prompt_display],
)
gr.Markdown(
"""
### Instructions:
1. **Auto Generation**: You can leave the image upload empty and the system will generate an image based solely on the prompt.
2. **Prompt Requirement**: Enter the prompt in English only.
3. **Image Reference**: The app supports up to three image inputs.
"""
)
# ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
if __name__ == "__main__":
demo.launch(share=True)