|
import os |
|
import tempfile |
|
from PIL import Image |
|
import gradio as gr |
|
import logging |
|
import re |
|
import io |
|
from io import BytesIO |
|
|
|
from google import genai |
|
from google.genai import types |
|
|
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
def save_binary_file(file_name, data): |
|
with open(file_name, "wb") as f: |
|
f.write(data) |
|
|
|
def preprocess_prompt(prompt, image1, image2, image3): |
|
""" |
|
ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ๊ณ ๊ธฐ๋ฅ ๋ช
๋ น์ ํด์ |
|
""" |
|
|
|
|
|
has_img1 = image1 is not None |
|
has_img2 = image2 is not None |
|
has_img3 = image3 is not None |
|
|
|
|
|
if "#1" in prompt and not has_img1: |
|
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)") |
|
else: |
|
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง") |
|
|
|
if "#2" in prompt and not has_img2: |
|
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)") |
|
else: |
|
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง") |
|
|
|
if "#3" in prompt and not has_img3: |
|
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)") |
|
else: |
|
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง") |
|
|
|
|
|
if "1. ์ด๋ฏธ์ง ๋ณ๊ฒฝ" in prompt: |
|
|
|
desc_match = re.search(r'#1์ "(.*?)"์ผ๋ก ๋ฐ๊ฟ๋ผ', prompt) |
|
if desc_match: |
|
description = desc_match.group(1) |
|
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ {description}์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์๋ณธ ์ด๋ฏธ์ง์ ์ฃผ์ ๋ด์ฉ์ ์ ์งํ๋ ์๋ก์ด ์คํ์ผ๊ณผ ๋ถ์๊ธฐ๋ก ์ฌํด์ํด์ฃผ์ธ์." |
|
else: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ฐฝ์์ ์ผ๋ก ๋ณํํด์ฃผ์ธ์. ๋ ์์ํ๊ณ ์์ ์ ์ธ ๋ฒ์ ์ผ๋ก ๋ง๋ค์ด์ฃผ์ธ์." |
|
|
|
elif "2. ๊ธ์์ง์ฐ๊ธฐ" in prompt: |
|
|
|
text_match = re.search(r'#1์์ "(.*?)"๋ฅผ ์ง์๋ผ', prompt) |
|
if text_match: |
|
text_to_remove = text_match.group(1) |
|
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ '{text_to_remove}' ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ํ
์คํธ๊ฐ ์๋ ๋ถ๋ถ์ ๋ฐฐ๊ฒฝ๊ณผ ์กฐํ๋กญ๊ฒ ์ฑ์์ฃผ์ธ์." |
|
else: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ ๋ชจ๋ ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ๊น๋ํ ์ด๋ฏธ์ง๋ก ๋ง๋ค์ด์ฃผ์ธ์." |
|
|
|
elif "3. ์ผ๊ตด๋ฐ๊พธ๊ธฐ" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ ์ผ๊ตด์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ผ๊ตด๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์ผ๊ตด์ ํ์ ๊ณผ ํน์ง์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด๋, ๋๋จธ์ง ๋ถ๋ถ์ ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ ์งํด์ฃผ์ธ์." |
|
|
|
elif "4. ์ท๋ฐ๊พธ๊ธฐ" in prompt: |
|
|
|
if "#3" in prompt or "๋๋ #3" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ ์์์ ๋ ๋ฒ์งธ ๋๋ ์ธ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์์์ผ๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์์์ ์คํ์ผ๊ณผ ์์์ ์ฐธ์กฐ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด๋, ์ ์ฒด ๋น์จ๊ณผ ํฌ์ฆ๋ ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ ์งํด์ฃผ์ธ์." |
|
else: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ ์์์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์์์ผ๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์์์ ์คํ์ผ๊ณผ ์์์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด๋, ์ ์ฒด ๋น์จ๊ณผ ํฌ์ฆ๋ ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ ์งํด์ฃผ์ธ์." |
|
|
|
elif "5. ๋ฐฐ๊ฒฝ๋ฐ๊พธ๊ธฐ" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ผ๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ฃผ์ ํผ์ฌ์ฒด๋ ์ ์งํ๊ณ , ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ๊ณผ ์กฐํ๋กญ๊ฒ ํฉ์ฑํด์ฃผ์ธ์." |
|
|
|
elif "6. ์ด๋ฏธ์ง ํฉ์ฑ(์ํํฌํจ)" in prompt: |
|
|
|
if "#3" in prompt or "๋๋ #3" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ ๋ฒ์งธ, ์ธ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์์ฐ์ค๋ฝ๊ฒ ํฉ์ฑํด์ฃผ์ธ์. ๋ชจ๋ ์ด๋ฏธ์ง์ ์ฃผ์ ์์๋ฅผ ํฌํจํ๊ณ , ํนํ ์ํ์ด ์ ๋ณด์ด๋๋ก ์กฐํ๋กญ๊ฒ ํตํฉํด์ฃผ์ธ์." |
|
else: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์์ฐ์ค๋ฝ๊ฒ ํฉ์ฑํด์ฃผ์ธ์. ๋ ์ด๋ฏธ์ง์ ์ฃผ์ ์์๋ฅผ ํฌํจํ๊ณ , ํนํ ์ํ์ด ์ ๋ณด์ด๋๋ก ์กฐํ๋กญ๊ฒ ํตํฉํด์ฃผ์ธ์." |
|
|
|
elif "7. ์ด๋ฏธ์ง ํฉ์ฑ(์คํ์ผ์ ์ฉ)" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ด์ฉ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์คํ์ผ๋ก ๋ณํํด์ฃผ์ธ์. ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ฃผ์ ํผ์ฌ์ฒด์ ๊ตฌ๋๋ ์ ์งํ๋, ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์์ ์ ์คํ์ผ, ์์, ์ง๊ฐ์ ์ ์ฉํด์ฃผ์ธ์." |
|
|
|
|
|
elif "์ ๋ถ์์์ผ๋ก ๋ฐ๊ฟ๋ผ" in prompt or "๋ฅผ ๋ถ์์์ผ๋ก ๋ฐ๊ฟ๋ผ" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ๋ถ์์ ํค์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์ ์ฒด์ ์ธ ์์์ ๋ถ์ ๊ณ์ด๋ก ์กฐ์ ํ๊ณ ์์ฐ์ค๋ฌ์ด ๋๋์ ์ ์งํด์ฃผ์ธ์." |
|
|
|
|
|
prompt += " ์ด๋ฏธ์ง๋ฅผ ์์ฑํด์ฃผ์ธ์." |
|
|
|
return prompt |
|
|
|
def generate_with_images(prompt, images): |
|
""" |
|
๊ณต์ ๋ฌธ์์ ๊ธฐ๋ฐํ ์ฌ๋ฐ๋ฅธ API ํธ์ถ ๋ฐฉ์ ๊ตฌํ |
|
""" |
|
try: |
|
|
|
api_key = os.environ.get("GEMINI_API_KEY") |
|
if not api_key: |
|
return None, "API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ํ๊ฒฝ๋ณ์๋ฅผ ํ์ธํด์ฃผ์ธ์." |
|
|
|
|
|
client = genai.Client(api_key=api_key) |
|
|
|
logger.info(f"Gemini API ์์ฒญ ์์ - ํ๋กฌํํธ: {prompt}") |
|
|
|
|
|
contents = [] |
|
|
|
|
|
contents.append(prompt) |
|
|
|
|
|
for idx, img in enumerate(images, 1): |
|
if img is not None: |
|
contents.append(img) |
|
logger.info(f"์ด๋ฏธ์ง #{idx} ์ถ๊ฐ๋จ") |
|
|
|
|
|
response = client.models.generate_content( |
|
model="gemini-2.0-flash-exp-image-generation", |
|
contents=contents, |
|
config=types.GenerateContentConfig( |
|
response_modalities=['Text', 'Image'], |
|
temperature=1, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=8192 |
|
) |
|
) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
temp_path = tmp.name |
|
|
|
result_text = "" |
|
image_found = False |
|
|
|
|
|
for part in response.candidates[0].content.parts: |
|
if hasattr(part, 'text') and part.text: |
|
result_text += part.text |
|
logger.info(f"์๋ต ํ
์คํธ: {part.text}") |
|
elif hasattr(part, 'inline_data') and part.inline_data: |
|
save_binary_file(temp_path, part.inline_data.data) |
|
image_found = True |
|
logger.info("์๋ต์์ ์ด๋ฏธ์ง ์ถ์ถ ์ฑ๊ณต") |
|
|
|
if not image_found: |
|
return None, f"API์์ ์ด๋ฏธ์ง๋ฅผ ์์ฑํ์ง ๋ชปํ์ต๋๋ค. ์๋ต ํ
์คํธ: {result_text}" |
|
|
|
|
|
result_img = Image.open(temp_path) |
|
if result_img.mode == "RGBA": |
|
result_img = result_img.convert("RGB") |
|
|
|
return result_img, f"์ด๋ฏธ์ง๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์์ฑ๋์์ต๋๋ค. {result_text}" |
|
|
|
except Exception as e: |
|
logger.exception("์ด๋ฏธ์ง ์์ฑ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
def process_images_with_prompt(image1, image2, image3, prompt): |
|
""" |
|
3๊ฐ์ ์ด๋ฏธ์ง์ ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ๋ ํจ์ |
|
""" |
|
try: |
|
|
|
images = [image1, image2, image3] |
|
valid_images = [img for img in images if img is not None] |
|
|
|
if not valid_images: |
|
return None, "์ ์ด๋ ํ๋์ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์." |
|
|
|
|
|
if not prompt or not prompt.strip(): |
|
|
|
if len(valid_images) == 1: |
|
prompt = "Please creatively transform this image into a more vivid and artistic version." |
|
logger.info("Default prompt generated for single image") |
|
elif len(valid_images) == 2: |
|
prompt = "Please seamlessly composite these two images, integrating their key elements harmoniously into a single image." |
|
logger.info("Default prompt generated for two images") |
|
else: |
|
prompt = "Please creatively composite these three images, combining their main elements into a cohesive and natural scene." |
|
logger.info("Default prompt generated for three images") |
|
else: |
|
|
|
prompt = preprocess_prompt(prompt, image1, image2, image3) |
|
|
|
|
|
return generate_with_images(prompt, valid_images) |
|
|
|
except Exception as e: |
|
logger.exception("์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
""" |
|
<div style="text-align: center; margin-bottom: 1rem;"> |
|
<h1>๊ฐ๋จํ ์ด๋ฏธ์ง ์์ฑ๊ธฐ</h1> |
|
<p>์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ๊ณ ๋ฐ๋ก ์คํํ๋ฉด ์๋์ผ๋ก ํฉ์ฑํฉ๋๋ค.</p> |
|
</div> |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
with gr.Row(): |
|
image1_input = gr.Image(type="pil", label="#1", image_mode="RGB") |
|
image2_input = gr.Image(type="pil", label="#2", image_mode="RGB") |
|
image3_input = gr.Image(type="pil", label="#3", image_mode="RGB") |
|
|
|
|
|
prompt_input = gr.Textbox( |
|
lines=3, |
|
placeholder="ํ๋กฌํํธ๋ฅผ ์
๋ ฅํ๊ฑฐ๋ ๋น์๋๋ฉด ์๋ ํฉ์ฑ๋ฉ๋๋ค.", |
|
label="ํ๋กฌํํธ (์ ํ ์ฌํญ)" |
|
) |
|
|
|
|
|
submit_btn = gr.Button("์ด๋ฏธ์ง ์์ฑ", variant="primary") |
|
|
|
with gr.Column(): |
|
|
|
output_image = gr.Image(label="์์ฑ๋ ์ด๋ฏธ์ง") |
|
output_text = gr.Textbox(label="์ํ ๋ฉ์์ง") |
|
|
|
|
|
prompt_display = gr.Textbox(label="์ฌ์ฉ๋ ํ๋กฌํํธ", visible=True) |
|
|
|
|
|
def process_and_show_prompt(image1, image2, image3, prompt): |
|
|
|
images = [image1, image2, image3] |
|
valid_images = [img for img in images if img is not None] |
|
|
|
try: |
|
|
|
auto_prompt = prompt |
|
if not prompt or not prompt.strip(): |
|
if len(valid_images) == 1: |
|
auto_prompt = "Please creatively transform this image into a more vivid and artistic version." |
|
elif len(valid_images) == 2: |
|
auto_prompt = "Please seamlessly composite these two images, integrating their key elements harmoniously into a single image." |
|
else: |
|
auto_prompt = "Please creatively composite these three images, combining their main elements into a cohesive and natural scene." |
|
else: |
|
auto_prompt = preprocess_prompt(prompt, image1, image2, image3) |
|
|
|
|
|
result_img, status = process_images_with_prompt(image1, image2, image3, prompt) |
|
|
|
return result_img, status, auto_prompt |
|
except Exception as e: |
|
logger.exception("์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}", prompt |
|
|
|
submit_btn.click( |
|
fn=process_and_show_prompt, |
|
inputs=[image1_input, image2_input, image3_input, prompt_input], |
|
outputs=[output_image, output_text, prompt_display], |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
### ์ฌ์ฉ ๋ฐฉ๋ฒ: |
|
|
|
1. **์๋ ํฉ์ฑ**: ์ด๋ฏธ์ง๋ง ์
๋ก๋ํ๊ณ ํ๋กฌํํธ๋ฅผ ๋น์๋๋ฉด ์๋์ผ๋ก ํฉ์ฑ๋ฉ๋๋ค. |
|
2. **์ด๋ฏธ์ง ์ฐธ์กฐ**: #1, #2, #3์ผ๋ก ๊ฐ ์ด๋ฏธ์ง๋ฅผ ์ฐธ์กฐํ ์ ์์ต๋๋ค. |
|
3. **์ผ๋ถ ์ด๋ฏธ์ง๋ง**: ํ์ํ ์ด๋ฏธ์ง๋ง ์
๋ก๋ํด๋ ๊ธฐ๋ฅ ์คํ์ด ๊ฐ๋ฅํฉ๋๋ค. |
|
|
|
> **ํ**: ํ๋กฌํํธ๋ฅผ ์ง์ ์์ ํ ์๋ ์์ต๋๋ค. |
|
""" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|