|
import json |
|
import os |
|
import time |
|
import uuid |
|
import tempfile |
|
from PIL import Image |
|
import gradio as gr |
|
import base64 |
|
import mimetypes |
|
import logging |
|
from io import BytesIO |
|
|
|
from google import genai |
|
from google.genai import types |
|
|
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG, |
|
format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def save_binary_file(file_name, data): |
|
logger.debug(f"ํ์ผ์ ์ด์ง ๋ฐ์ดํฐ ์ ์ฅ ์ค: {file_name}") |
|
with open(file_name, "wb") as f: |
|
f.write(data) |
|
logger.debug(f"ํ์ผ ์ ์ฅ ์๋ฃ: {file_name}") |
|
|
|
|
|
def generate(text, original_image_path, background_image_path=None, style_image_path=None, model="gemini-2.0-flash-exp-image-generation"): |
|
logger.debug(f"generate ํจ์ ์์ - ํ
์คํธ: '{text}', ์๋ณธ ํ์ผ๋ช
: '{original_image_path}', ๋ชจ๋ธ: '{model}'") |
|
|
|
try: |
|
|
|
effective_api_key = os.environ.get("GEMINI_API_KEY") |
|
if effective_api_key: |
|
logger.debug("ํ๊ฒฝ๋ณ์์์ API ํค ๋ถ๋ฌ์ด") |
|
else: |
|
logger.error("API ํค๊ฐ ํ๊ฒฝ๋ณ์์ ์ค์ ๋์ง ์์์ต๋๋ค.") |
|
raise ValueError("API ํค๊ฐ ํ์ํฉ๋๋ค.") |
|
|
|
client = genai.Client(api_key=effective_api_key) |
|
logger.debug("Gemini ํด๋ผ์ด์ธํธ ์ด๊ธฐํ ์๋ฃ.") |
|
|
|
|
|
original_img = Image.open(original_image_path) |
|
|
|
|
|
contents = [] |
|
|
|
|
|
prompt = text |
|
if background_image_path and "๋ฐฐ๊ฒฝ" not in text.lower(): |
|
prompt += " ์๋ณธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์
๋ก๋๋ ์ด๋ฏธ์ง๋ก ์์ ํ ๊ต์ฒดํด ์ฃผ์ธ์. ์ด๋ฏธ์ง๋ฅผ ์
๋ฐ์ดํธํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ๋ณด์ฌ์ฃผ์ธ์." |
|
if style_image_path and "์คํ์ผ" not in text.lower(): |
|
prompt += " ์ธ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์คํ์ผ์ ์ ์ฒด์ ์ผ๋ก ์ ์ฉํด ์ฃผ์ธ์." |
|
|
|
contents.append(prompt) |
|
contents.append(original_img) |
|
|
|
|
|
if background_image_path: |
|
background_img = Image.open(background_image_path) |
|
contents.append(background_img) |
|
logger.debug("๋ฐฐ๊ฒฝ ์ด๋ฏธ์ง ์ถ๊ฐ๋จ") |
|
|
|
|
|
if style_image_path: |
|
style_img = Image.open(style_image_path) |
|
contents.append(style_img) |
|
logger.debug("์คํ์ผ ์ด๋ฏธ์ง ์ถ๊ฐ๋จ") |
|
|
|
logger.debug(f"์ปจํ
์ธ ๊ฐ์ฒด ์์ฑ ์๋ฃ: {len(contents)} ์์ดํ
") |
|
|
|
|
|
generate_content_config = types.GenerateContentConfig( |
|
temperature=1, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=8192, |
|
response_modalities=["text", "image"], |
|
) |
|
logger.debug(f"์์ฑ ์ค์ : {generate_content_config}") |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
temp_path = tmp.name |
|
logger.debug(f"์์ ํ์ผ ์์ฑ๋จ: {temp_path}") |
|
|
|
|
|
response = client.models.generate_content( |
|
model=model, |
|
contents=contents, |
|
config=generate_content_config, |
|
) |
|
|
|
logger.debug("์๋ต ์ฒ๋ฆฌ ์์...") |
|
|
|
|
|
image_saved = False |
|
for part in response.candidates[0].content.parts: |
|
if hasattr(part, 'text') and part.text: |
|
logger.info(f"์์ ๋ ํ
์คํธ: {part.text}") |
|
print(part.text) |
|
elif hasattr(part, 'inline_data') and part.inline_data: |
|
save_binary_file(temp_path, part.inline_data.data) |
|
logger.info(f"MIME ํ์
{part.inline_data.mime_type}์ ํ์ผ์ด ์ ์ฅ๋จ: {temp_path}") |
|
image_saved = True |
|
|
|
if not image_saved: |
|
logger.warning("์ด๋ฏธ์ง๊ฐ ์์ฑ๋์ง ์์์ต๋๋ค.") |
|
return None |
|
|
|
logger.debug("์ด๋ฏธ์ง ์์ฑ ์๋ฃ.") |
|
return temp_path |
|
|
|
except Exception as e: |
|
logger.exception("์ด๋ฏธ์ง ์์ฑ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return None |
|
|
|
|
|
def process_image_and_prompt(original_pil, prompt, background_pil=None, style_pil=None): |
|
logger.debug(f"process_image_and_prompt ํจ์ ์์ - ํ๋กฌํํธ: '{prompt}'") |
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
original_path = tmp.name |
|
original_pil.save(original_path) |
|
logger.debug(f"์๋ณธ ์ด๋ฏธ์ง ์ ์ฅ ์๋ฃ: {original_path}") |
|
|
|
|
|
background_path = None |
|
if background_pil is not None: |
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_bg: |
|
background_path = tmp_bg.name |
|
background_pil.save(background_path) |
|
logger.debug(f"๋ฐฐ๊ฒฝ ์ด๋ฏธ์ง ์ ์ฅ ์๋ฃ: {background_path}") |
|
|
|
|
|
style_path = None |
|
if style_pil is not None: |
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_style: |
|
style_path = tmp_style.name |
|
style_pil.save(style_path) |
|
logger.debug(f"์คํ์ผ ์ด๋ฏธ์ง ์ ์ฅ ์๋ฃ: {style_path}") |
|
|
|
|
|
if prompt and not prompt.strip(): |
|
if background_path and style_path: |
|
prompt = "์๋ณธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ์ ์ ์งํ๋ฉด์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ก ๊ต์ฒดํ๊ณ ์ธ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์คํ์ผ์ ์ ์ฉํด ์ฃผ์ธ์. Please replace the background with the second image while keeping the person, and apply the style of the third image." |
|
elif background_path: |
|
prompt = "์๋ณธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ์ ์ ์งํ๋ฉด์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ก ๊ต์ฒดํด ์ฃผ์ธ์. Please replace the background with the second image while keeping the person." |
|
elif style_path: |
|
prompt = "์๋ณธ ์ด๋ฏธ์ง์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์คํ์ผ์ ์ ์ฉํด ์ฃผ์ธ์. Please apply the style of the second image to the original image." |
|
|
|
model = "gemini-2.0-flash-exp-image-generation" |
|
|
|
gemma_edited_image_path = generate( |
|
text=prompt, |
|
original_image_path=original_path, |
|
background_image_path=background_path, |
|
style_image_path=style_path, |
|
model=model |
|
) |
|
|
|
if gemma_edited_image_path: |
|
logger.debug(f"์ด๋ฏธ์ง ์์ฑ ์๋ฃ. ๊ฒฝ๋ก: {gemma_edited_image_path}") |
|
result_img = Image.open(gemma_edited_image_path) |
|
if result_img.mode == "RGBA": |
|
result_img = result_img.convert("RGB") |
|
|
|
|
|
try: |
|
os.unlink(original_path) |
|
if background_path: |
|
os.unlink(background_path) |
|
if style_path: |
|
os.unlink(style_path) |
|
except Exception as e: |
|
logger.warning(f"์์ ํ์ผ ์ญ์ ์ค ์ค๋ฅ: {str(e)}") |
|
|
|
return [result_img] |
|
else: |
|
logger.error("generate ํจ์์์ None ๋ฐํ๋จ.") |
|
return [] |
|
|
|
except Exception as e: |
|
logger.exception("process_image_and_prompt ํจ์์์ ์ค๋ฅ ๋ฐ์:") |
|
return [] |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
""" |
|
<div style='display: flex; align-items: center; justify-content: center; gap: 20px'> |
|
<div style="background-color: var(--block-background-fill); border-radius: 8px"> |
|
<img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;"> |
|
</div> |
|
<div> |
|
<h1>Gemini๋ฅผ ์ด์ฉํ ์ด๋ฏธ์ง ํธ์ง</h1> |
|
<p>Gemini API ํค๋ ํ๊ฒฝ๋ณ์(GEMINI_API_KEY)๋ก ์ค์ ๋์ด ์์ต๋๋ค.</p> |
|
</div> |
|
</div> |
|
""" |
|
) |
|
gr.Markdown("์๋ณธ, ๋ฐฐ๊ฒฝ, ์คํ์ผ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ๊ณ , ํธ์งํ ๋ด์ฉ์ ์
๋ ฅํ์ธ์.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
original_input = gr.Image(type="pil", label="์๋ณธ ์ด๋ฏธ์ง", image_mode="RGB") |
|
background_input = gr.Image(type="pil", label="๋ฐฐ๊ฒฝ ์ด๋ฏธ์ง", image_mode="RGB") |
|
style_input = gr.Image(type="pil", label="์คํ์ผ ์ด๋ฏธ์ง", image_mode="RGB") |
|
prompt_input = gr.Textbox( |
|
lines=2, |
|
placeholder="ํธ์งํ ๋ด์ฉ์ ์
๋ ฅํ์ธ์...", |
|
label="ํธ์ง ํ๋กฌํํธ" |
|
) |
|
submit_btn = gr.Button("์ด๋ฏธ์ง ํธ์ง ์คํ") |
|
with gr.Column(): |
|
output_gallery = gr.Gallery(label="ํธ์ง ๊ฒฐ๊ณผ") |
|
output_text = gr.Textbox(label="API ์๋ต ํ
์คํธ", visible=True) |
|
|
|
submit_btn.click( |
|
fn=process_image_and_prompt, |
|
inputs=[original_input, prompt_input, background_input, style_input], |
|
outputs=output_gallery, |
|
) |
|
|
|
gr.HTML(""" |
|
<div style="margin-top: 20px; padding: 10px; background-color: #f8f9fa; border-radius: 8px;"> |
|
<h3>์ฌ์ฉ ํ:</h3> |
|
<ul> |
|
<li><strong>ํ๋กฌํํธ ์์ฑ ์์:</strong> "์๋ณธ ์ธ๋ฌผ์ ์ ์งํ๋ฉด์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ก ๊ต์ฒดํด ์ฃผ์ธ์."</li> |
|
<li><strong>๋ฐฐ๊ฒฝ ๊ต์ฒด ๋ช
ํํ:</strong> "๋ฐฐ๊ฒฝ๋ง ์์ ํ ๊ต์ฒดํ๊ณ ์๋ณธ ์ธ๋ฌผ์ ๊ทธ๋๋ก ์ ์งํด ์ฃผ์ธ์."</li> |
|
<li><strong>์คํ์ผ ์ ์ฉ:</strong> "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์์ ์คํ์ผ์ ์ ์ฒด ์ด๋ฏธ์ง์ ์ ์ฉํด ์ฃผ์ธ์."</li> |
|
<li><strong>์์ด ํ๋กฌํํธ:</strong> ๋ ๋์ ๊ฒฐ๊ณผ๋ฅผ ์ํด ์์ด์ ํ๊ตญ์ด๋ฅผ ํจ๊ป ์ฌ์ฉํด ๋ณด์ธ์.</li> |
|
</ul> |
|
</div> |
|
""") |
|
|
|
|
|
demo.launch(share=True) |