|
import json |
|
import os |
|
import time |
|
import tempfile |
|
from PIL import Image |
|
import gradio as gr |
|
import logging |
|
from io import BytesIO |
|
|
|
from google import genai |
|
from google.genai import types |
|
|
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG, |
|
format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def save_binary_file(file_name, data): |
|
logger.debug(f"νμΌμ μ΄μ§ λ°μ΄ν° μ μ₯ μ€: {file_name}") |
|
with open(file_name, "wb") as f: |
|
f.write(data) |
|
logger.debug(f"νμΌ μ μ₯ μλ£: {file_name}") |
|
|
|
|
|
def merge_images(person_img_path, product_img_path, background_img_path, prompt, model="gemini-2.0-flash-exp-image-generation"): |
|
logger.debug(f"merge_images ν¨μ μμ - ν둬ννΈ: '{prompt}'") |
|
|
|
try: |
|
|
|
effective_api_key = os.environ.get("GEMINI_API_KEY") |
|
if effective_api_key: |
|
logger.debug("νκ²½λ³μμμ API ν€ λΆλ¬μ΄") |
|
else: |
|
logger.error("API ν€κ° νκ²½λ³μμ μ€μ λμ§ μμμ΅λλ€.") |
|
raise ValueError("API ν€κ° νμν©λλ€.") |
|
|
|
client = genai.Client(api_key=effective_api_key) |
|
logger.debug("Gemini ν΄λΌμ΄μΈνΈ μ΄κΈ°ν μλ£.") |
|
|
|
|
|
person_img = Image.open(person_img_path) |
|
product_img = Image.open(product_img_path) |
|
|
|
|
|
contents = [person_img, product_img] |
|
|
|
|
|
if background_img_path: |
|
background_img = Image.open(background_img_path) |
|
contents.append(background_img) |
|
logger.debug("λ°°κ²½ μ΄λ―Έμ§ μΆκ°λ¨") |
|
|
|
|
|
contents.append(prompt) |
|
logger.debug(f"컨ν
μΈ κ°μ²΄ μμ± μλ£: {len(contents)} μμ΄ν
") |
|
|
|
|
|
generate_content_config = types.GenerateContentConfig( |
|
temperature=1, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=8192, |
|
response_modalities=["text", "image"], |
|
) |
|
logger.debug(f"μμ± μ€μ : {generate_content_config}") |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
temp_path = tmp.name |
|
logger.debug(f"μμ νμΌ μμ±λ¨: {temp_path}") |
|
|
|
|
|
response = client.models.generate_content( |
|
model=model, |
|
contents=contents, |
|
config=generate_content_config, |
|
) |
|
|
|
logger.debug("μλ΅ μ²λ¦¬ μμ...") |
|
|
|
|
|
image_saved = False |
|
response_text = "" |
|
|
|
for part in response.candidates[0].content.parts: |
|
if hasattr(part, 'text') and part.text: |
|
response_text += part.text |
|
logger.info(f"μμ λ ν
μ€νΈ: {part.text}") |
|
elif hasattr(part, 'inline_data') and part.inline_data: |
|
save_binary_file(temp_path, part.inline_data.data) |
|
logger.info(f"MIME νμ
{part.inline_data.mime_type}μ νμΌμ΄ μ μ₯λ¨: {temp_path}") |
|
image_saved = True |
|
|
|
if not image_saved: |
|
logger.warning("μ΄λ―Έμ§κ° μμ±λμ§ μμμ΅λλ€.") |
|
return None, response_text |
|
|
|
logger.debug("μ΄λ―Έμ§ μμ± μλ£.") |
|
return temp_path, response_text |
|
|
|
except Exception as e: |
|
logger.exception("μ΄λ―Έμ§ μμ± μ€ μ€λ₯ λ°μ:") |
|
return None, str(e) |
|
|
|
|
|
def process_images_and_prompt(person_pil, product_pil, background_pil, prompt): |
|
logger.debug(f"process_images_and_prompt ν¨μ μμ - ν둬ννΈ: '{prompt}'") |
|
try: |
|
|
|
if not prompt or not prompt.strip(): |
|
if background_pil: |
|
prompt = "μ΄ λ°°κ²½μ μ΄ μ¬λμ΄ μ΄ μνμ μ¬μ©νλ λͺ¨μ΅μ μμ°μ€λ½κ² 보μ¬μ£ΌμΈμ. μνμ μ 보μ΄κ² ν΄μ£ΌμΈμ. Create a natural composite image showing this person using this product in this background setting. Make sure the product is clearly visible." |
|
else: |
|
prompt = "μ΄ μ¬λμ΄ μ΄ μνμ μ¬μ©νλ λͺ¨μ΅μ μμ°μ€λ½κ² 보μ¬μ£ΌμΈμ. μνμ μ 보μ΄κ² ν΄μ£ΌμΈμ. Create a natural composite image showing this person using this product. Make sure the product is clearly visible." |
|
|
|
|
|
if not any(ord(c) < 128 for c in prompt): |
|
if background_pil: |
|
prompt += " Create a realistic composite image of this person with this product in this background." |
|
else: |
|
prompt += " Create a realistic composite image of this person with this product." |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_person: |
|
person_path = tmp_person.name |
|
person_pil.save(person_path) |
|
logger.debug(f"μ¬λ μ΄λ―Έμ§ μ μ₯ μλ£: {person_path}") |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_product: |
|
product_path = tmp_product.name |
|
product_pil.save(product_path) |
|
logger.debug(f"μν μ΄λ―Έμ§ μ μ₯ μλ£: {product_path}") |
|
|
|
|
|
background_path = None |
|
if background_pil is not None: |
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_bg: |
|
background_path = tmp_bg.name |
|
background_pil.save(background_path) |
|
logger.debug(f"λ°°κ²½ μ΄λ―Έμ§ μ μ₯ μλ£: {background_path}") |
|
|
|
|
|
result_path, response_text = merge_images( |
|
person_img_path=person_path, |
|
product_img_path=product_path, |
|
background_img_path=background_path, |
|
prompt=prompt |
|
) |
|
|
|
|
|
if result_path: |
|
logger.debug(f"μ΄λ―Έμ§ μμ± μλ£. κ²½λ‘: {result_path}") |
|
result_img = Image.open(result_path) |
|
if result_img.mode == "RGBA": |
|
result_img = result_img.convert("RGB") |
|
|
|
|
|
try: |
|
os.unlink(person_path) |
|
os.unlink(product_path) |
|
if background_path: |
|
os.unlink(background_path) |
|
except Exception as e: |
|
logger.warning(f"μμ νμΌ μμ μ€ μ€λ₯: {str(e)}") |
|
|
|
return [result_img], response_text |
|
else: |
|
logger.error("merge_images ν¨μμμ None λ°νλ¨.") |
|
return [], response_text |
|
|
|
except Exception as e: |
|
logger.exception("process_images_and_prompt ν¨μμμ μ€λ₯ λ°μ:") |
|
return [], str(e) |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
""" |
|
<div style='display: flex; align-items: center; justify-content: center; gap: 20px'> |
|
<div style="background-color: var(--block-background-fill); border-radius: 8px"> |
|
<img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;"> |
|
</div> |
|
<div> |
|
<h1>Geminiλ₯Ό μ΄μ©ν μ΄λ―Έμ§ ν©μ±</h1> |
|
<p>μ¬λ, μν, λ°°κ²½ μ΄λ―Έμ§λ₯Ό ν©μ±νλ μ ν리μΌμ΄μ
μ
λλ€.</p> |
|
</div> |
|
</div> |
|
""" |
|
) |
|
gr.Markdown("μ¬λ μ΄λ―Έμ§, μν μ΄λ―Έμ§, λ°°κ²½ μ΄λ―Έμ§λ₯Ό μ
λ‘λνκ³ , μ΄λ»κ² ν©μ±ν μ§ μ€λͺ
ν΄μ£ΌμΈμ.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
person_input = gr.Image(type="pil", label="μ¬λ μ΄λ―Έμ§ (νμ)", image_mode="RGB") |
|
product_input = gr.Image(type="pil", label="μν μ΄λ―Έμ§ (νμ)", image_mode="RGB") |
|
background_input = gr.Image(type="pil", label="λ°°κ²½ μ΄λ―Έμ§ (μ ν μ¬ν)", image_mode="RGB") |
|
prompt_input = gr.Textbox( |
|
lines=2, |
|
placeholder="ν©μ± λ°©λ²μ μ€λͺ
ν΄μ£ΌμΈμ. (μ: 'μ΄ λ°°κ²½μμ μ΄ μ¬λμ΄ μνμ μ¬μ©νλ λͺ¨μ΅' λλ 'μ΄ μ¬λμ΄ μ΄ μνμ λ€κ³ μ΄ λ°°κ²½μ μ μλ λͺ¨μ΅')", |
|
label="ν©μ± λ°©λ² μ€λͺ
" |
|
) |
|
submit_btn = gr.Button("μ΄λ―Έμ§ ν©μ± μ€ν") |
|
with gr.Column(): |
|
output_gallery = gr.Gallery(label="ν©μ± κ²°κ³Ό") |
|
output_text = gr.Textbox(label="AI μλ΅ ν
μ€νΈ", visible=True) |
|
|
|
submit_btn.click( |
|
fn=process_images_and_prompt, |
|
inputs=[person_input, product_input, background_input, prompt_input], |
|
outputs=[output_gallery, output_text], |
|
) |
|
|
|
gr.HTML(""" |
|
<div style="margin-top: 20px; padding: 10px; background-color: #f8f9fa; border-radius: 8px;"> |
|
<h3>μ¬μ© ν:</h3> |
|
<ul> |
|
<li><strong>λ°°κ²½ νμ©:</strong> "μ΄ λ°°κ²½μμ μ΄ μ¬λμ΄ μ΄ μνμ μ¬μ©νλ μμ°μ€λ¬μ΄ λͺ¨μ΅μ λ§λ€μ΄μ£ΌμΈμ."</li> |
|
<li><strong>νΉμ μμΉ μ§μ :</strong> "μ΄ μ¬λμ΄ μ΄ μνμ μμ λ€κ³ μ΄ λ°°κ²½ μμ μ μλ λͺ¨μ΅μ 보μ¬μ£ΌμΈμ."</li> |
|
<li><strong>μν κ°μ‘°:</strong> "μ΄ μ¬λμ΄ μ΄ λ°°κ²½μμ μ΄ μνμ μ¬μ©νλ λͺ¨μ΅μ 보μ¬μ£Όλ, μνμ΄ μ 보μ΄λλ‘ ν΄μ£ΌμΈμ."</li> |
|
<li><strong>μ₯λ©΄ μ€μ :</strong> "μ΄ μ¬λμ΄ μ΄ μνμΌλ‘ μ리νλ λͺ¨μ΅μ μ΄ μ£Όλ°© λ°°κ²½μμ 보μ¬μ£ΌμΈμ."</li> |
|
<li><strong>μμ΄ ν둬ννΈ:</strong> λ λμ κ²°κ³Όλ₯Ό μν΄ μμ΄μ νκ΅μ΄λ₯Ό ν¨κ» μ¬μ©ν΄ 보μΈμ.</li> |
|
<li><strong>λ°°κ²½ μ νμ¬ν:</strong> λ°°κ²½ μ΄λ―Έμ§λ μ νμ¬νμ
λλ€. λ°°κ²½ μμ΄ μ¬λκ³Ό μνλ§ ν©μ±ν μλ μμ΅λλ€.</li> |
|
</ul> |
|
</div> |
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |