|
import os |
|
import tempfile |
|
from PIL import Image |
|
import gradio as gr |
|
import logging |
|
import re |
|
from io import BytesIO |
|
|
|
from google import genai |
|
from google.genai import types |
|
|
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
def save_binary_file(file_name, data): |
|
with open(file_name, "wb") as f: |
|
f.write(data) |
|
|
|
def translate_prompt_to_english(prompt): |
|
""" |
|
์
๋ ฅ๋ ํ๋กฌํํธ์ ํ๊ธ์ด ํฌํจ๋์ด ์์ผ๋ฉด Geminiโ2.0โflash ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ ์์ด๋ก ๋ฒ์ญํฉ๋๋ค. |
|
ํ๊ธ์ด ์์ผ๋ฉด ์๋ณธ ํ๋กฌํํธ๋ฅผ ๊ทธ๋๋ก ๋ฐํํฉ๋๋ค. |
|
""" |
|
if not re.search("[๊ฐ-ํฃ]", prompt): |
|
return prompt |
|
try: |
|
api_key = os.environ.get("GEMINI_API_KEY") |
|
if not api_key: |
|
logger.error("Gemini API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") |
|
return prompt |
|
client = genai.Client(api_key=api_key) |
|
translation_prompt = f"Translate the following Korean text to English:\n\n{prompt}" |
|
logger.info(f"Translation prompt: {translation_prompt}") |
|
response = client.models.generate_content( |
|
model="gemini-2.0-flash", |
|
contents=[translation_prompt], |
|
config=types.GenerateContentConfig( |
|
response_modalities=['Text'], |
|
temperature=0.2, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=512 |
|
) |
|
) |
|
translated_text = "" |
|
for part in response.candidates[0].content.parts: |
|
if hasattr(part, 'text') and part.text: |
|
translated_text += part.text |
|
if translated_text.strip(): |
|
logger.info(f"Translated text: {translated_text.strip()}") |
|
return translated_text.strip() |
|
else: |
|
logger.warning("๋ฒ์ญ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค. ์๋ณธ ํ๋กฌํํธ ์ฌ์ฉ") |
|
return prompt |
|
except Exception as e: |
|
logger.exception("๋ฒ์ญ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return prompt |
|
|
|
def preprocess_prompt(prompt, image1, image2, image3): |
|
""" |
|
ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ๊ณ ๊ธฐ๋ฅ ๋ช
๋ น์ ํด์ |
|
""" |
|
has_img1 = image1 is not None |
|
has_img2 = image2 is not None |
|
has_img3 = image3 is not None |
|
|
|
if "#1" in prompt and not has_img1: |
|
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)") |
|
else: |
|
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง") |
|
|
|
if "#2" in prompt and not has_img2: |
|
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)") |
|
else: |
|
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง") |
|
|
|
if "#3" in prompt and not has_img3: |
|
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)") |
|
else: |
|
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง") |
|
|
|
if "1. ์ด๋ฏธ์ง ๋ณ๊ฒฝ" in prompt: |
|
desc_match = re.search(r'#1์ "(.*?)"์ผ๋ก ๋ฐ๊ฟ๋ผ', prompt) |
|
if desc_match: |
|
description = desc_match.group(1) |
|
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ {description}์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์๋ณธ ์ด๋ฏธ์ง์ ์ฃผ์ ๋ด์ฉ์ ์ ์งํ๋ ์๋ก์ด ์คํ์ผ๊ณผ ๋ถ์๊ธฐ๋ก ์ฌํด์ํด์ฃผ์ธ์." |
|
else: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ฐฝ์์ ์ผ๋ก ๋ณํํด์ฃผ์ธ์. ๋ ์์ํ๊ณ ์์ ์ ์ธ ๋ฒ์ ์ผ๋ก ๋ง๋ค์ด์ฃผ์ธ์." |
|
|
|
elif "2. ๊ธ์์ง์ฐ๊ธฐ" in prompt: |
|
text_match = re.search(r'#1์์ "(.*?)"๋ฅผ ์ง์๋ผ', prompt) |
|
if text_match: |
|
text_to_remove = text_match.group(1) |
|
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ '{text_to_remove}' ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ํ
์คํธ๊ฐ ์๋ ๋ถ๋ถ์ ๋ฐฐ๊ฒฝ๊ณผ ์กฐํ๋กญ๊ฒ ์ฑ์์ฃผ์ธ์." |
|
else: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ ๋ชจ๋ ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ๊น๋ํ ์ด๋ฏธ์ง๋ก ๋ง๋ค์ด์ฃผ์ธ์." |
|
|
|
elif "4. ์ท๋ฐ๊พธ๊ธฐ" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ ์์์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์์์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์์์ ์คํ์ผ๊ณผ ์์์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด๋, ์ ์ฒด ๋น์จ๊ณผ ํฌ์ฆ๋ ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ ์งํด์ฃผ์ธ์." |
|
|
|
elif "5. ๋ฐฐ๊ฒฝ๋ฐ๊พธ๊ธฐ" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ฃผ์ ํผ์ฌ์ฒด๋ ์ ์งํ๊ณ , ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ๊ณผ ์กฐํ๋กญ๊ฒ ํฉ์ฑํด์ฃผ์ธ์." |
|
|
|
elif "6. ์ด๋ฏธ์ง ํฉ์ฑ(์ํํฌํจ)" in prompt: |
|
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง(๋๋ ์ธ ๋ฒ์งธ ์ด๋ฏธ์ง)๋ฅผ ์์ฐ์ค๋ฝ๊ฒ ํฉ์ฑํด์ฃผ์ธ์. ๋ชจ๋ ์ด๋ฏธ์ง์ ์ฃผ์ ์์๋ฅผ ํฌํจํ๊ณ , ํนํ ์ํ์ด ๋๋ณด์ด๋๋ก ์กฐํ๋กญ๊ฒ ํตํฉํด์ฃผ์ธ์." |
|
|
|
prompt += " ์ด๋ฏธ์ง๋ฅผ ์์ฑํด์ฃผ์ธ์." |
|
return prompt |
|
|
|
def generate_with_images(prompt, images): |
|
""" |
|
API ํธ์ถ์ ํตํด ์ด๋ฏธ์ง๋ฅผ ์์ฑํ๊ณ ๊ฒฐ๊ณผ ์ด๋ฏธ์ง๋ฅผ ๋ฐํํฉ๋๋ค. |
|
""" |
|
try: |
|
api_key = os.environ.get("GEMINI_API_KEY") |
|
if not api_key: |
|
return None, "API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ํ๊ฒฝ๋ณ์๋ฅผ ํ์ธํด์ฃผ์ธ์." |
|
|
|
client = genai.Client(api_key=api_key) |
|
logger.info(f"Gemini API ์์ฒญ ์์ - ํ๋กฌํํธ: {prompt}") |
|
|
|
contents = [prompt] |
|
for idx, img in enumerate(images, 1): |
|
if img is not None: |
|
contents.append(img) |
|
logger.info(f"์ด๋ฏธ์ง #{idx} ์ถ๊ฐ๋จ") |
|
|
|
response = client.models.generate_content( |
|
model="gemini-2.0-flash-exp-image-generation", |
|
contents=contents, |
|
config=types.GenerateContentConfig( |
|
response_modalities=['Text', 'Image'], |
|
temperature=1, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=8192 |
|
) |
|
) |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
temp_path = tmp.name |
|
result_text = "" |
|
image_found = False |
|
for part in response.candidates[0].content.parts: |
|
if hasattr(part, 'text') and part.text: |
|
result_text += part.text |
|
logger.info(f"์๋ต ํ
์คํธ: {part.text}") |
|
elif hasattr(part, 'inline_data') and part.inline_data: |
|
save_binary_file(temp_path, part.inline_data.data) |
|
image_found = True |
|
logger.info("์๋ต์์ ์ด๋ฏธ์ง ์ถ์ถ ์ฑ๊ณต") |
|
if not image_found: |
|
return None, f"API์์ ์ด๋ฏธ์ง๋ฅผ ์์ฑํ์ง ๋ชปํ์ต๋๋ค. ์๋ต ํ
์คํธ: {result_text}" |
|
result_img = Image.open(temp_path) |
|
if result_img.mode == "RGBA": |
|
result_img = result_img.convert("RGB") |
|
return result_img, f"์ด๋ฏธ์ง๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์์ฑ๋์์ต๋๋ค. {result_text}" |
|
except Exception as e: |
|
logger.exception("์ด๋ฏธ์ง ์์ฑ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
def process_images_with_prompt(image1, image2, image3, prompt): |
|
""" |
|
3๊ฐ์ ์ด๋ฏธ์ง์ ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ์ฌ ์ต์ข
์์ด ํ๋กฌํํธ(final_prompt)๋ฅผ ์์ฑํ ํ, |
|
API๋ฅผ ํธ์ถํ์ฌ ๊ฒฐ๊ณผ ์ด๋ฏธ์ง๋ฅผ ๋ฐํํฉ๋๋ค. |
|
""" |
|
try: |
|
images = [image1, image2, image3] |
|
valid_images = [img for img in images if img is not None] |
|
if not valid_images: |
|
return None, "์ ์ด๋ ํ๋์ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์.", "" |
|
|
|
if prompt and prompt.strip(): |
|
processed_prompt = preprocess_prompt(prompt, image1, image2, image3) |
|
if re.search("[๊ฐ-ํฃ]", processed_prompt): |
|
final_prompt = translate_prompt_to_english(processed_prompt) |
|
else: |
|
final_prompt = processed_prompt |
|
else: |
|
if len(valid_images) == 1: |
|
final_prompt = "Please creatively transform this image into a more vivid and artistic version." |
|
logger.info("Default prompt generated for single image") |
|
elif len(valid_images) == 2: |
|
final_prompt = "Please seamlessly composite these two images, integrating their key elements harmoniously into a single image." |
|
logger.info("Default prompt generated for two images") |
|
else: |
|
final_prompt = "Please creatively composite these three images, combining their main elements into a cohesive and natural scene." |
|
logger.info("Default prompt generated for three images") |
|
|
|
result_img, status = generate_with_images(final_prompt, valid_images) |
|
return result_img, status, final_prompt |
|
except Exception as e: |
|
logger.exception("์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}", prompt |
|
|
|
def process_and_show_prompt(image1, image2, image3, prompt): |
|
try: |
|
result_img, status, final_prompt = process_images_with_prompt(image1, image2, image3, prompt) |
|
return result_img, status, final_prompt |
|
except Exception as e: |
|
logger.exception("์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:") |
|
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}", prompt |
|
|
|
|
|
def run_example_1(): |
|
"""์์ 1: ์ด๋ฏธ์ง ๋ณ๊ฒฝ ์์ """ |
|
input_path = os.path.join("down", "1_in-1.png") |
|
output_path = os.path.join("down", "1_out-1.webp") |
|
try: |
|
input_img = Image.open(input_path) |
|
except Exception as e: |
|
return None, f"์
๋ ฅ ์ด๋ฏธ์ง ์ด๊ธฐ ์ค๋ฅ: {str(e)}", "" |
|
prompt = "#1 ์ด๋ฏธ์ง์ [์ฒญ์ ์์ด๋ ๊ณ ๋ฅผ ๊ฒ์ ๊ณ ๋๋ ๊ณ ]์ผ๋ก ๋ณ๊ฒฝํ๋ผ." |
|
result_img, status, final_prompt = process_and_show_prompt(input_img, None, None, prompt) |
|
if result_img is not None: |
|
result_img.save(output_path, "WEBP") |
|
return result_img, status, final_prompt |
|
|
|
def run_example_2(): |
|
"""์์ 2: ๊ธ์์ง์ฐ๊ธฐ ์์ """ |
|
input_path = os.path.join("down", "2_in-1.png") |
|
output_path = os.path.join("down", "2_out-1.webp") |
|
try: |
|
input_img = Image.open(input_path) |
|
except Exception as e: |
|
return None, f"์
๋ ฅ ์ด๋ฏธ์ง ์ด๊ธฐ ์ค๋ฅ: {str(e)}", "" |
|
prompt = "#1 ์ด๋ฏธ์ง์ [์ค๊ตญ์ด๋ฅผ ๋ชจ๋]๋ฅผ ์ ๊ฑฐํ๋ผ." |
|
result_img, status, final_prompt = process_and_show_prompt(input_img, None, None, prompt) |
|
if result_img is not None: |
|
result_img.save(output_path, "WEBP") |
|
return result_img, status, final_prompt |
|
|
|
|
|
|
|
|
|
|
|
submit_btn.click( |
|
fn=process_image_and_prompt, |
|
inputs=[image_input, prompt_input, gemini_api_key], |
|
outputs=[output_gallery, output_text], |
|
) |
|
|
|
gr.Markdown("## Try these examples", elem_classes="gr-examples-header") |
|
|
|
examples = [ |
|
["data/1.webp", 'change text to "AMEER"', ""], |
|
["data/2.webp", "remove the spoon from hand only", ""], |
|
["data/3.webp", 'change text to "Make it "', ""], |
|
["data/1.jpg", "add joker style only on face", ""], |
|
["data/1777043.jpg", "add joker style only on face", ""], |
|
["data/2807615.jpg", "add lipstick on lip only", ""], |
|
["data/76860.jpg", "add lipstick on lip only", ""], |
|
["data/2807615.jpg", "make it happy looking face only", ""], |
|
] |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=[image_input, prompt_input], |
|
elem_id="examples-grid" |
|
) |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
""" |
|
<div style="text-align: center; margin-bottom: 1rem;"> |
|
<h1>Gemini for Image Editing</h1> |
|
<p>Upload an image and enter a prompt to generate outputs.</p> |
|
</div> |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
image_input = gr.Image( |
|
type="pil", |
|
label="Upload Image", |
|
image_mode="RGBA", |
|
elem_id="image-input" |
|
) |
|
gemini_api_key = gr.Textbox( |
|
lines=1, |
|
placeholder="Enter Gemini API Key (optional)", |
|
label="Gemini API Key (optional)" |
|
) |
|
prompt_input = gr.Textbox( |
|
lines=2, |
|
placeholder="Enter prompt here...", |
|
label="Prompt" |
|
) |
|
submit_btn = gr.Button("Generate") |
|
with gr.Column(): |
|
output_gallery = gr.Gallery(label="Generated Outputs") |
|
output_text = gr.Textbox( |
|
label="Gemini Output", |
|
placeholder="Text response will appear here if no image is generated." |
|
) |
|
|
|
gr.Markdown("## Try these examples", elem_classes="gr-examples-header") |
|
|
|
examples = [ |
|
["data/1.webp", 'change text to "AMEER"', ""], |
|
["data/2.webp", "remove the spoon from hand only", ""], |
|
["data/3.webp", 'change text to "Make it "', ""], |
|
["data/1.jpg", "add joker style only on face", ""], |
|
["data/1777043.jpg", "add joker style only on face", ""], |
|
["data/2807615.jpg", "add lipstick on lip only", ""], |
|
["data/76860.jpg", "add lipstick on lip only", ""], |
|
["data/2807615.jpg", "make it happy looking face only", ""], |
|
] |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=[image_input, prompt_input], |
|
elem_id="examples-grid" |
|
) |
|
|
|
demo.queue(max_size=50).launch() |
|
|