test-100

Running

File size: 26,864 Bytes

6cd8947

import os
import tempfile
from PIL import Image
import gradio as gr
import logging
import re
import time

from google import genai
from google.genai import types
from dotenv import load_dotenv
load_dotenv()

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)

def translate_prompt_to_english(prompt):
    """
    입력된 프롬프트에 한글이 포함되어 있으면 Gemini‑2.0‑flash 모델을 사용하여 영어로 번역합니다.
    한글이 없으면 원본 프롬프트를 그대로 반환합니다.
    중요: #1, #2, #3 태그는 번역 전후에 반드시 보존되어야 합니다.
    """
    if not re.search("[가-힣]", prompt):
        return prompt
    
    prompt = prompt.replace("#1", "IMAGE_TAG_ONE")
    prompt = prompt.replace("#2", "IMAGE_TAG_TWO")
    prompt = prompt.replace("#3", "IMAGE_TAG_THREE")
    
    try:
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            logger.error("Gemini API 키가 설정되지 않았습니다.")
            prompt = prompt.replace("IMAGE_TAG_ONE", "#1")
            prompt = prompt.replace("IMAGE_TAG_TWO", "#2")
            prompt = prompt.replace("IMAGE_TAG_THREE", "#3")
            return prompt
            
        client = genai.Client(api_key=api_key)
        translation_prompt = f"""
        Translate the following Korean text to English:
        
        {prompt}
        
        IMPORTANT: The tokens IMAGE_TAG_ONE, IMAGE_TAG_TWO, and IMAGE_TAG_THREE are special tags 
        and must be preserved exactly as is in your translation. Do not translate these tokens.
        """
        
        logger.info(f"Translation prompt: {translation_prompt}")
        response = client.models.generate_content(
            model="gemini-2.0-flash",
            contents=[translation_prompt],
            config=types.GenerateContentConfig(
                response_modalities=['Text'],
                temperature=0.2,
                top_p=0.95,
                top_k=40,
                max_output_tokens=512
            )
        )
        
        translated_text = ""
        for part in response.candidates[0].content.parts:
            if hasattr(part, 'text') and part.text:
                translated_text += part.text
        
        if translated_text.strip():
            translated_text = translated_text.replace("IMAGE_TAG_ONE", "#1")
            translated_text = translated_text.replace("IMAGE_TAG_TWO", "#2")
            translated_text = translated_text.replace("IMAGE_TAG_THREE", "#3")
            logger.info(f"Translated text: {translated_text.strip()}")
            return translated_text.strip()
        else:
            logger.warning("번역 결과가 없습니다. 원본 프롬프트 사용")
            prompt = prompt.replace("IMAGE_TAG_ONE", "#1")
            prompt = prompt.replace("IMAGE_TAG_TWO", "#2")
            prompt = prompt.replace("IMAGE_TAG_THREE", "#3")
            return prompt
    except Exception as e:
        logger.exception("번역 중 오류 발생:")
        prompt = prompt.replace("IMAGE_TAG_ONE", "#1")
        prompt = prompt.replace("IMAGE_TAG_TWO", "#2")
        prompt = prompt.replace("IMAGE_TAG_THREE", "#3")
        return prompt

def preprocess_prompt(prompt, image1, image2, image3):
    """
    프롬프트를 처리하고 기능 명령을 해석
    """
    has_img1 = image1 is not None
    has_img2 = image2 is not None
    has_img3 = image3 is not None

    if "#1" in prompt and not has_img1:
        prompt = prompt.replace("#1", "첫 번째 이미지(없음)")
    else:
        prompt = prompt.replace("#1", "첫 번째 이미지")

    if "#2" in prompt and not has_img2:
        prompt = prompt.replace("#2", "두 번째 이미지(없음)")
    else:
        prompt = prompt.replace("#2", "두 번째 이미지")

    if "#3" in prompt and not has_img3:
        prompt = prompt.replace("#3", "세 번째 이미지(없음)")
    else:
        prompt = prompt.replace("#3", "세 번째 이미지")

    if "1. 이미지 변경" in prompt:
        desc_match = re.search(r'#1을 "(.*?)"으로 바꿔라', prompt)
        if desc_match:
            description = desc_match.group(1)
            prompt = f"첫 번째 이미지를 {description}으로 변경해주세요. 원본 이미지의 주요 내용은 유지하되 새로운 스타일과 분위기로 재해석해주세요."
        else:
            prompt = "첫 번째 이미지를 창의적으로 변형해주세요. 더 생생하고 예술적인 버전으로 만들어주세요."

    elif "2. 글자지우기" in prompt:
        text_match = re.search(r'#1에서 "(.*?)"를 지워라', prompt)
        if text_match:
            text_to_remove = text_match.group(1)
            prompt = f"첫 번째 이미지에서 '{text_to_remove}' 텍스트를 찾아 자연스럽게 제거해주세요. 텍스트가 있던 부분을 배경과 조화롭게 채워주세요."
        else:
            prompt = "첫 번째 이미지에서 모든 텍스트를 찾아 자연스럽게 제거해주세요. 깔끔한 이미지로 만들어주세요."

    elif "4. 옷바꾸기" in prompt:
        prompt = "첫 번째 이미지의 인물 의상을 두 번째 이미지의 의상으로 변경해주세요. 의상의 스타일과 색상은 두 번째 이미지를 따르되, 신체 비율과 포즈는 첫 번째 이미지를 유지해주세요."

    elif "5. 배경바꾸기" in prompt:
        prompt = "첫 번째 이미지의 배경을 두 번째 이미지의 배경으로 변경해주세요. 첫 번째 이미지의 주요 피사체는 유지하고, 두 번째 이미지의 배경과 조화롭게 합성해주세요."

    elif "6. 이미지 합성(상품포함)" in prompt:
        prompt = "첫 번째 이미지와 두 번째 이미지(또는 세 번째 이미지)를 자연스럽게 합성해주세요. 모든 이미지의 주요 요소를 포함하고, 특히 상품이 돋보이도록 조화롭게 통합해주세요."

    prompt += " 이미지를 생성해주세요. 이미지에 텍스트나 글자를 포함하지 마세요."
    return prompt

def generate_with_images(prompt, images, variation_index=0):
    """
    API 호출을 통해 이미지를 생성하고 결과 이미지를 반환합니다.
    variation_index로 다양한 변화를 줍니다.
    """
    try:
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            return None, "API 키가 설정되지 않았습니다. 환경변수를 확인해주세요."

        client = genai.Client(api_key=api_key)
        logger.info(f"Gemini API 요청 시작 - 프롬프트: {prompt}, 변형 인덱스: {variation_index}")

        variation_suffixes = [
            " Create this as the first variation. Do not add any text, watermarks, or labels to the image.",
            " Create this as the second variation with more vivid colors. Do not add any text, watermarks, or labels to the image.",
            " Create this as the third variation with a more creative style. Do not add any text, watermarks, or labels to the image.",
            " Create this as the fourth variation with enhanced details. Do not add any text, watermarks, or labels to the image."
        ]
        
        if variation_index < len(variation_suffixes):
            prompt = prompt + variation_suffixes[variation_index]
        else:
            prompt = prompt + " Do not add any text, watermarks, or labels to the image."

        contents = [prompt]
        for idx, img in enumerate(images, 1):
            if img is not None:
                contents.append(img)
                logger.info(f"이미지 #{idx} 추가됨")

        response = client.models.generate_content(
            model="gemini-2.0-flash-exp-image-generation",
            contents=contents,
            config=types.GenerateContentConfig(
                response_modalities=['Text', 'Image'],
                temperature=1,
                top_p=0.95,
                top_k=40,
                max_output_tokens=8192
            )
        )

        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
            temp_path = tmp.name
            result_text = ""
            image_found = False
            for part in response.candidates[0].content.parts:
                if hasattr(part, 'text') and part.text:
                    result_text += part.text
                    logger.info(f"응답 텍스트: {part.text}")
                elif hasattr(part, 'inline_data') and part.inline_data:
                    save_binary_file(temp_path, part.inline_data.data)
                    image_found = True
                    logger.info("응답에서 이미지 추출 성공")
            if not image_found:
                return None, f"API에서 이미지를 생성하지 못했습니다. 응답 텍스트: {result_text}"
            result_img = Image.open(temp_path)
            if result_img.mode == "RGBA":
                result_img = result_img.convert("RGB")
            return result_img, f"이미지가 성공적으로 생성되었습니다. {result_text}"
    except Exception as e:
        logger.exception("이미지 생성 중 오류 발생:")
        return None, f"오류 발생: {str(e)}"

def process_images_with_prompt(image1, image2, image3, prompt, variation_index=0, max_retries=3):
    """
    3개의 이미지와 프롬프트를 처리하여 최종 영어 프롬프트(final_prompt)를 생성한 후,
    API를 호출하여 결과 이미지를 반환합니다. 에러 발생 시 최대 max_retries 횟수만큼 재시도합니다.
    """
    retry_count = 0
    last_error = None
    
    while retry_count < max_retries:
        try:
            images = [image1, image2, image3]
            valid_images = [img for img in images if img is not None]
            if not valid_images:
                return None, "적어도 하나의 이미지를 업로드해주세요.", ""

            if prompt and prompt.strip():
                processed_prompt = preprocess_prompt(prompt, image1, image2, image3)
                if re.search("[가-힣]", processed_prompt):
                    final_prompt = translate_prompt_to_english(processed_prompt)
                else:
                    final_prompt = processed_prompt
            else:
                if len(valid_images) == 1:
                    final_prompt = "Please creatively transform this image into a more vivid and artistic version. Do not include any text or watermarks in the generated image."
                    logger.info("Default prompt generated for single image")
                elif len(valid_images) == 2:
                    final_prompt = "Please seamlessly composite these two images, integrating their key elements harmoniously into a single image. Do not include any text or watermarks in the generated image."
                    logger.info("Default prompt generated for two images")
                else:
                    final_prompt = "Please creatively composite these three images, combining their main elements into a cohesive and natural scene. Do not include any text or watermarks in the generated image."
                    logger.info("Default prompt generated for three images")

            result_img, status = generate_with_images(final_prompt, valid_images, variation_index)
            if result_img is not None:
                return result_img, status, final_prompt
            else:
                last_error = status
                retry_count += 1
                logger.warning(f"이미지 생성 실패, 재시도 {retry_count}/{max_retries}: {status}")
                time.sleep(1)
        except Exception as e:
            last_error = str(e)
            retry_count += 1
            logger.exception(f"이미지 처리 중 오류 발생, 재시도 {retry_count}/{max_retries}:")
            time.sleep(1)
    
    return None, f"최대 재시도 횟수({max_retries}회) 초과 후 실패: {last_error}", prompt

def generate_multiple_images(image1, image2, image3, prompt, progress=gr.Progress()):
    """
    여러 개의 이미지를 차례대로 생성합니다.
    """
    results = []
    statuses = []
    prompts = []
    
    num_images = 4
    max_retries = 3
    
    progress(0, desc="이미지 생성 준비 중...")
    
    for i in range(num_images):
        progress((i / num_images), desc=f"{i+1}/{num_images} 이미지 생성 중...")
        result_img, status, final_prompt = process_images_with_prompt(image1, image2, image3, prompt, i, max_retries)
        
        if result_img is not None:
            results.append(result_img)
            statuses.append(f"이미지 #{i+1}: {status}")
            prompts.append(f"이미지 #{i+1}: {final_prompt}")
        else:
            results.append(None)
            statuses.append(f"이미지 #{i+1} 생성 실패: {status}")
            prompts.append(f"이미지 #{i+1}: {final_prompt}")
        
        time.sleep(1)
    
    progress(1.0, desc="이미지 생성 완료!")
    
    while len(results) < 4:
        results.append(None)
    
    combined_status = "\n".join(statuses)
    combined_prompts = "\n".join(prompts)
    
    return results[0], results[1], results[2], results[3], combined_status, combined_prompts

# =====================================================================
# 아래는 GFPGAN 참조코드를 탭으로 추가한 부분 (기능 구현 및 UI 개선)
# 기존 GFPGAN 관련 코드에서 version과 Rescaling factor 관련 UI는 제거하고,
# GFPGANv1.4를 고정으로 사용합니다.
# =====================================================================

import sys
from torchvision.transforms import functional
sys.modules["torchvision.transforms.functional_tensor"] = functional

from basicsr.archs.srvgg_arch import SRVGGNetCompact
from gfpgan.utils import GFPGANer
from realesrgan.utils import RealESRGANer

import torch
import cv2

# 필요한 모델 다운로드 (이미 없으면 다운로드)
if not os.path.exists('realesr-general-x4v3.pth'):
    os.system("wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth -P .")
if not os.path.exists('GFPGANv1.4.pth'):
    os.system("wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -P .")
if not os.path.exists('RestoreFormer.pth'):
    os.system("wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/RestoreFormer.pth -P .")

model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
model_path = 'realesr-general-x4v3.pth'
half = True if torch.cuda.is_available() else False
upsampler = RealESRGANer(scale=4, model_path=model_path, model=model, tile=0, tile_pad=10, pre_pad=0, half=half)

def upscaler(img, version, scale):
    try:
        img = cv2.imread(img, cv2.IMREAD_UNCHANGED)
        if len(img.shape) == 3 and img.shape[2] == 4:
            img_mode = 'RGBA'
        elif len(img.shape) == 2:
            img_mode = None
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        else:
            img_mode = None

        h, w = img.shape[0:2]
        if h < 300:
            img = cv2.resize(img, (w * 2, h * 2), interpolation=cv2.INTER_LANCZOS4)

        face_enhancer = GFPGANer(
            model_path=f'{version}.pth',
            upscale=2,
            arch='RestoreFormer' if version=='RestoreFormer' else 'clean',
            channel_multiplier=2,
            bg_upsampler=upsampler
        )

        try:
            _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
        except RuntimeError as error:
            print('오류 발생:', error)

        try:
            if scale != 2:
                interpolation = cv2.INTER_AREA if scale < 2 else cv2.INTER_LANCZOS4
                h, w = img.shape[0:2]
                output = cv2.resize(output, (int(w * scale / 2), int(h * scale / 2)), interpolation=interpolation)
        except Exception as error:
            print('잘못된 스케일 입력:', error)

        output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
        return output
    except Exception as error:
        print('전체 예외 발생:', error)
        return None

# GFPGAN 업스케일러 함수 (버전과 스케일은 고정)
def upscaler_korean(img):
    return upscaler(img, "GFPGANv1.4", 2)

# =====================================================================
# Gradio Blocks로 탭 UI 구성 (Gradio 5.21 기준)
# =====================================================================

with gr.Blocks() as demo:
    with gr.Tabs():
        with gr.TabItem("이커머스 이미지 생성기"):
            gr.HTML(
                """
                <div style="text-align: center; margin-bottom: 1rem;">
                    <h1>이커머스용 이미지 생성기</h1>
                    <p>이미지를 업로드하고 예제를 참가하여 프롬프트를 수정한 후 "이미지 생성" 버튼을 클릭하면 차례로 4장의 이미지가 생성됩니다.</p>
                </div>
                """
            )
            
            with gr.Row():
                with gr.Column(scale=1):
                    with gr.Row():
                        image1_input = gr.Image(type="pil", label="#1", image_mode="RGB", height=300, width=200)
                        image2_input = gr.Image(type="pil", label="#2", image_mode="RGB", height=300, width=200)
                        image3_input = gr.Image(type="pil", label="#3", image_mode="RGB", height=300, width=200)
                    prompt_input = gr.Textbox(
                        lines=3,
                        placeholder="프롬프트를 입력하거나 비워두면 자동 합성됩니다.",
                        label="프롬프트 (선택 사항)"
                    )
                    with gr.Row():
                        image_change_btn1 = gr.Button("이미지 변경-1")
                        image_change_btn2 = gr.Button("이미지 변경-2")
                        text_remove_btn = gr.Button("글자 지우기")
                        text_change_btn = gr.Button("글자 변경하기")
                        clothes_change_btn1 = gr.Button("가상 상품착용-1")
                        clothes_change_btn2 = gr.Button("가상 상품착용-2")
                        holding_product_btn = gr.Button("상품들고 있기")
                        background_change_btn = gr.Button("배경 바꾸기")
                        composite_product_btn = gr.Button("부분 지우기")
                    submit_btn = gr.Button("이미지 생성 (4장)", variant="primary")
                    
                    gr.Markdown(
                        """
                        ### 사용 방법:
                        
                        1. **자동 합성**: 이미지를 업로드하고 프롬프트를 비워두면 자동으로 합성됩니다.
                        2. **이미지 참조**: #1, #2, #3으로 각 이미지를 참조할 수 있습니다.
                        3. **선택 옵션**: 위의 버튼을 클릭하면 프롬프트 입력란에 한국어 문구로 입력하시면 됩니다.
                        4. **다양한 이미지**: "이미지 생성" 버튼을 클릭하면 차례로 4장의 이미지가 생성됩니다.
                        5. **예제 선택**: 다양한 예제를 통해 미리 테스트해보세요.
                        
                        > **팁**: 프롬프트를 직접 수정할 수도 있습니다.
                        """
                    )
                    
                    gr.Markdown("## 예제 이미지")
                    examples = [
                        ["down/모델.jpg", None, None, "(#1의 여성)이 살짝 뒤로 돌아보는 모습으로 최대한 이전 seed를 유지한테 자연스럽게 변경하라."],
                        ["down/상어레고모형.png", None, None, "(#1 레모모형)에서 청색상어레고만 검은색 고래레고로 변경하고 나머지 부분은 seed를 변경을 하지마라."],
                        ["down/중국어.png", None, None, "(#1 이미지)에 있는 중국어를 모두 제거하라."],
                        ["down/텍스트.webp", None, None, '(#1의 텍스트)를 스타일을 유지한체 텍스트만 "Hello"로 바꿔라'],
                        ["down/모델.jpg", "down/선글라스.png", "down/청바지.png", "(#1의 여성모델)이 신체 비율과 포즈는 유치한 체 (#2의 선글라스)와 (#3의 청바지)를 직접 모델이 착용한것 처럼 자연스럽게 이미지를 생성하라."],
                        ["down/모델.jpg", "down/선글라스.png", "down/카페전경.png", "(#1의 여성모델)이 신체 비율과 포즈는 유치한 체 (#2의 선글라스)를 직접 모델이 착용한 것처럼 (#3의 장소)에서 의자에 앉아 있는 자연스러운 이미지를 생성하라."],
                        ["down/모델.jpg", "down/와인잔.png", None, "(#1의 여성모델)이 신체 비율과 포즈는 유치한 체 (#2의 와인잔)을 여성모델이 홍보할 와인잔을 돋보이게 들고 있는 자연스러운 모습으로 이미지를 생성하라."],
                        ["down/모델.jpg", "down/카페전경.png", None, "(#1의 여성모델)이 (#2 이미지의 배경)을 주요 피사체는 그대로 유지하여 이미지의 분위기가 자연스럽게 어우러지도록 생성하라."],
                        ["down/상어레고모형.png", None, None, "(#1의 레고모형)에서 청색상어레고를 제거한 후, 그 자리를 주변 배경과 자연스럽게 어우러지도록 채워주세요. 단, 이미지의 다른 부분의 주요 요소는 동일하게 유지해 해야한다."]
                    ]
                    
                    gr.Examples(
                        examples=examples,
                        inputs=[image1_input, image2_input, image3_input, prompt_input],
                        elem_id="examples-grid"
                    )
                    
                with gr.Column(scale=1):
                    with gr.Row():
                        with gr.Column():
                            output_image1 = gr.Image(label="생성된 이미지 #1", height=600, width=450)
                            output_image3 = gr.Image(label="생성된 이미지 #3", height=600, width=450)
                        with gr.Column():
                            output_image2 = gr.Image(label="생성된 이미지 #2", height=600, width=450)
                            output_image4 = gr.Image(label="생성된 이미지 #4", height=600, width=450)
                    
                    output_text = gr.Textbox(label="상태 메시지", lines=4)
                    prompt_display = gr.Textbox(label="사용된 프롬프트 (영어)", visible=True, lines=4)
            
            image_change_btn1.click(
                fn=lambda: "(#1의 여성)이 살짝 뒤로 돌아보는 모습으로 최대한 이전 seed를 유지한테 자연스럽게 변경하라.",
                inputs=[],
                outputs=prompt_input
            )
            image_change_btn2.click(
                fn=lambda: "(#1 레모모형)에서 청색상어레고만 검은색 고래레고로 변경하고 나머지 부분은 seed를 변경을 하지마라.",
                inputs=[],
                outputs=prompt_input
            )
            text_remove_btn.click(
                fn=lambda: "(#1 이미지)에 있는 중국어를 모두 제거하라.",
                inputs=[],
                outputs=prompt_input
            )
            text_change_btn.click(
                fn=lambda: '(#1의 텍스트)를 스타일을 유지한체 텍스트만 "Hello"로 바꿔라',
                inputs=[],
                outputs=prompt_input
            )
            clothes_change_btn1.click(
                fn=lambda: "(#1의 여성모겔)이 신체 비율과 포즈는 유치한 체 (#2의 선글라스)와 (#3의 청바지)를 직접 모델이 착용한것 처럼 자연스럽게 이미지를 생성하라.",
                inputs=[],
                outputs=prompt_input
            )
            clothes_change_btn2.click(
                fn=lambda: "(#1의 여성모델)이 신체 비율과 포즈는 유치한 체 (#2의 선글라스)를 직접 모델이 착용한 것처럼 (#3의 장소)에서 의자에 앉아 있는 자연스러운 이미지를 생성하라.",
                inputs=[],
                outputs=prompt_input
            )
            holding_product_btn.click(
                fn=lambda: "(#1의 여성모델)이 신체 비율과 포즈는 유치한 체 (#2의 와인잔)을 여성모델이 홍보할 와인잔을 돋보이게 들고 있는 자연스러운 모습으로 이미지를 생성하라.",
                inputs=[],
                outputs=prompt_input
            )
            background_change_btn.click(
                fn=lambda: "(#1의 여성모델)이 (#2 이미지의 배경)을 주요 피사체는 그대로 유지하여 두 이미지의 분위기가 자연스럽게 어우러지도록 생성하라.",
                inputs=[],
                outputs=prompt_input
            )
            composite_product_btn.click(
                fn=lambda: "(#1의 레고모형)에서 청색상어레고를 제거한 후, 그 자리를 주변 배경과 자연스럽게 어우러지도록 채워주세요. 단, 이미지의 다른 부분의 주요 요소는 동일하게 유지해 해야한다.",
                inputs=[],
                outputs=prompt_input
            )
            submit_btn.click(
                fn=generate_multiple_images,
                inputs=[image1_input, image2_input, image3_input, prompt_input],
                outputs=[output_image1, output_image2, output_image3, output_image4, output_text, prompt_display],
            )
        
        with gr.TabItem("GFPGAN 업스케일러"):
            gr.Markdown("<h1 style='text-align: center;'>GFPGAN 업스케일러 및 복원</h1>")
            gr.Markdown("입력 이미지를 업로드하면 GFPGANv1.4 모델을 사용하여 얼굴 복원 및 업스케일링을 수행합니다.")
            with gr.Row():
                gfpgan_input = gr.Image(type="filepath", label="입력 이미지", image_mode="RGB")
                gfpgan_output = gr.Image(type="numpy", label="출력 이미지")
            gfpgan_btn = gr.Button("업스케일 및 복원")
            gfpgan_btn.click(fn=upscaler_korean, inputs=gfpgan_input, outputs=gfpgan_output)
            
    demo.queue()
    demo.launch()