import os import re import time from os import path import tempfile import io import random import string import torch from PIL import Image from transformers import pipeline from safetensors.torch import load_file from huggingface_hub import hf_hub_download import gradio as gr from diffusers import FluxPipeline # (Internal) text-modification library from google import genai from google.genai import types ####################################### # 0. Environment & Translation Pipeline ####################################### BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd() CACHE_PATH = path.join(BASE_DIR, "models") os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH os.environ["HF_HUB_CACHE"] = CACHE_PATH os.environ["HF_HOME"] = CACHE_PATH # Translation (Korean -> English), CPU only translator = pipeline( task="translation", model="Helsinki-NLP/opus-mt-ko-en", device=-1 # force CPU ) def maybe_translate_to_english(text: str) -> str: """ If the prompt contains any Korean characters, translate to English. Otherwise, return as-is. """ import re if re.search("[가-힣]", text): translated = translator(text)[0]["translation_text"] print(f"[TRANSLATE] Detected Korean -> '{text}' -> '{translated}'") return translated return text # Simple Timer Class class timer: def __init__(self, method_name="timed process"): self.method = method_name def __enter__(self): self.start = time.time() print(f"[TIMER] {self.method} starts") def __exit__(self, exc_type, exc_val, exc_tb): end = time.time() print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s") ####################################### # 1. Load FLUX Pipeline ####################################### if not path.exists(CACHE_PATH): os.makedirs(CACHE_PATH, exist_ok=True) pipe = FluxPipeline.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16 ) # 예시용 LoRA 다운로드 & 합치기 lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors") pipe.load_lora_weights(lora_path) pipe.fuse_lora(lora_scale=0.125) pipe.to(device="cuda", dtype=torch.bfloat16) ####################################### # 2. Internal Text Modification Functions ####################################### def save_binary_file(file_name, data): with open(file_name, "wb") as f: f.write(data) def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"): """ - 추가 지시사항(AIP)을 전달해 이미지 기반 편집을 수행. - 응답이 '이미지'면 저장, '텍스트'면 누적하여 반환. """ # 기존 API 키 로직 유지 (환경 변수 GAPI_TOKEN 사용) api_key = os.getenv("GAPI_TOKEN", None) if not api_key: raise ValueError("GAPI_TOKEN is missing. Please set an API key.") client = genai.Client(api_key=api_key) files = [client.files.upload(file=file_name)] contents = [ types.Content( role="user", parts=[ types.Part.from_uri( file_uri=files[0].uri, mime_type=files[0].mime_type, ), types.Part.from_text(text=text), ], ), ] generate_content_config = types.GenerateContentConfig( temperature=1, top_p=0.95, top_k=40, max_output_tokens=8192, response_modalities=["image", "text"], response_mime_type="text/plain", ) text_response = "" image_path = None # 임시 파일에 이미지 저장 가능하도록 준비 with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: temp_path = tmp.name for chunk in client.models.generate_content_stream( model=model, contents=contents, config=generate_content_config, ): if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: continue candidate = chunk.candidates[0].content.parts[0] # 만약 inline_data(이미지 데이터)가 있다면 -> 실제 이미지 편집 결과 if candidate.inline_data: save_binary_file(temp_path, candidate.inline_data.data) print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}") image_path = temp_path # 이미지 한 장만 확보하면 중단 break else: # inline_data가 없으면 텍스트 데이터이므로 누적 text_response += chunk.text + "\n" del files return image_path, text_response ####################################### # 3. Diffusion Utility ####################################### def generate_random_letters(length: int) -> str: """ Create a random sequence of uppercase/lowercase letters of given length. """ letters = string.ascii_lowercase + string.ascii_uppercase return "".join(random.choice(letters) for _ in range(length)) def is_all_english(text: str) -> bool: """ Check if text consists only of English letters (a-z, A-Z), digits, spaces, and basic punctuation. If so, return True; otherwise False. """ import re return bool(re.match(r'^[a-zA-Z0-9\s\.,!\?\']*$', text)) def maybe_use_random_or_original(final_text: str) -> str: """ If final_text is strictly English/allowed chars, use it as-is. Else replace with random letters of the same length. """ if not final_text: return "" if is_all_english(final_text): return final_text else: return generate_random_letters(len(final_text)) def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str: """ Replace , , placeholders with r1, r2, r3. """ if "" in prompt: prompt = prompt.replace("", r1) else: prompt = f"{prompt} with clear readable text that says '{r1}'" if "" in prompt: prompt = prompt.replace("", r2) if "" in prompt: prompt = prompt.replace("", r3) return prompt def generate_initial_image(prompt, height, width, steps, scale, seed): """ Use Flux Pipeline to generate the initial image from the prompt. """ with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux Generation"): result = pipe( prompt=[prompt], generator=torch.Generator().manual_seed(int(seed)), num_inference_steps=int(steps), guidance_scale=float(scale), height=int(height), width=int(width), max_sequence_length=256 ).images[0] return result ####################################### # 4. Creating 2 Final Images ####################################### def change_text_in_image_two_times(original_image, instruction): """ Call the text-modification API twice, returning 2 final variations. """ results = [] for version_tag in ["(A)", "(B)"]: mod_instruction = f"{instruction} {version_tag}" try: with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: original_path = tmp.name original_image.save(original_path) image_path, text_response = generate_by_google_genai( text=mod_instruction, file_name=original_path ) if image_path: with open(image_path, "rb") as f: image_data = f.read() new_img = Image.open(io.BytesIO(image_data)) results.append(new_img) else: # 만약 이미지 응답이 없고, 텍스트만 온 경우 print("[WARNING] No image returned. text_response=", text_response) results.append(original_image) except Exception as e: raise gr.Error(f"Error: {e}") return results ####################################### # 5. Main Process (Generation from Prompt) ####################################### def run_process( prompt, final_text1, final_text2, final_text3, height, width, steps, scale, seed ): """ 1) Translate prompt if Korean -> English 2) For each text, if not English -> random 3) Generate initial image 4) Replace placeholders with real text via API (2 variations) """ # 1) Translate prompt if needed prompt_en = maybe_translate_to_english(prompt) # 2) Decide placeholders r1 = maybe_use_random_or_original(final_text1) r2 = maybe_use_random_or_original(final_text2) r3 = maybe_use_random_or_original(final_text3) print(f"[DEBUG] Using placeholders: r1='{r1}', r2='{r2}', r3='{r3}'") # 3) Fill placeholders in prompt final_prompt = fill_prompt_with_random_texts(prompt_en, r1, r2, r3) print(f"[DEBUG] final_prompt = {final_prompt}") # 4) Generate initial "random/original" image _random_image = generate_initial_image(final_prompt, height, width, steps, scale, seed) # Build final instructions (replace placeholders -> real text) instructions = [] if r1 and final_text1: instructions.append(f"Change any text reading '{r1}' in this image to '{final_text1}'.") if r2 and final_text2: instructions.append(f"Change any text reading '{r2}' in this image to '{final_text2}'.") if r3 and final_text3: instructions.append(f"Change any text reading '{r3}' in this image to '{final_text3}'.") instruction = " ".join(instructions) if instructions else "No text changes needed." # Call 2 variations final_imgs = change_text_in_image_two_times(_random_image, instruction) return [final_imgs[0], final_imgs[1]] ####################################### # 5-2. Process for Editing Uploaded Image ####################################### def run_edit_process(input_image, edit_prompt, final_text1): """ 1) If final_text1 is empty => skip text replacement 2) Otherwise, combine edit_prompt + text-change instructions 3) Call 2 times for final images """ r1 = maybe_use_random_or_original(final_text1) print(f"[DEBUG] Editing image with placeholder r1='{r1}'") # *** 수정 핵심 *** # final_text1이 비어 있으면 텍스트 치환을 생략, # 아니면 "Change any text reading 'r1' => final_text1" 명령 추가 if not final_text1.strip(): instruction = f"{edit_prompt}" else: instruction = f"{edit_prompt}\nChange any text reading '{r1}' in this image to '{final_text1}'." final_imgs = change_text_in_image_two_times(input_image, instruction) return [final_imgs[0], final_imgs[1]] ####################################### # 6. Gradio UI with Two Tabs ####################################### with gr.Blocks(title="Eevery Text Imaginator: FLUX") as demo: gr.Markdown( """

Eevery Text Imaginator: FLUX

This tool generates two final images from a prompt or an uploaded image, optionally containing placeholders <text1>, <text2>, <text3>.


""" ) with gr.Tabs(): ############################################### # Tab 1) Generate from Prompt ############################################### with gr.TabItem("Generate from Prompt"): with gr.Row(): with gr.Column(): with gr.Group(): prompt_input = gr.Textbox( lines=3, label="Prompt (Korean or English)", placeholder="On a grand stage, in big letters..." ) final_text1 = gr.Textbox( label="New Text #1 (Required)", placeholder="Example: HELLO or 안녕하세요" ) final_text2 = gr.Textbox( label="New Text #2 (Optional)", placeholder="Example: WORLD or 반갑습니다" ) final_text3 = gr.Textbox( label="New Text #3 (Optional)", placeholder="(Leave blank if not used)" ) with gr.Accordion("Advanced Settings (optional)", open=False): height = gr.Slider( label="Height", minimum=256, maximum=1152, step=64, value=512 ) width = gr.Slider( label="Width", minimum=256, maximum=1152, step=64, value=512 ) steps = gr.Slider( label="Inference Steps", minimum=6, maximum=25, step=1, value=8 ) scale = gr.Slider( label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5 ) seed = gr.Number( label="Seed", value=1234, precision=0 ) run_btn = gr.Button("Generate 2 Final Images", variant="primary") gr.Examples( examples=[ [ "Futuristic neon sign with , plus near the bottom", "OPEN", "", "" ], [ "On a grand stage, in big letters and on the left side", "환영합니다.", "", "" ], [ "A classical poster reading in bold, as a subtitle", "错觉", "", "" ], [ "In a cartoon style, a speech bubble with and another text", "안녕", "", "" ], [ "Large billboard featuring ", "아름다운 당신", "", "" ], [ "썬글라스 착용한 흰색 고양이의 배너 ", "안녕", "", "" ], ], inputs=[prompt_input, final_text1, final_text2, final_text3], label="Example Prompts" ) with gr.Column(): final_image_output1 = gr.Image( label="Final Image #1", type="pil" ) final_image_output2 = gr.Image( label="Final Image #2", type="pil" ) # 버튼 클릭 시 처리 run_btn.click( fn=run_process, inputs=[ prompt_input, final_text1, final_text2, final_text3, height, width, steps, scale, seed ], outputs=[final_image_output1, final_image_output2] ) ############################################### # Tab 2) Edit Uploaded Image ############################################### with gr.TabItem("Edit Uploaded Image"): with gr.Row(): with gr.Column(): # Gradio 구버전 호환을 위해 source="upload"는 제거 uploaded_image = gr.Image( label="Upload Image for Editing", type="pil" ) edit_prompt = gr.Textbox( label="Additional Instruction Prompt", placeholder="(예: Make the background black, add sparkles, etc.)" ) final_text1_edit = gr.Textbox( label="Replace Text", placeholder="Example: HELLO or 안녕하세요" ) run_edit_btn = gr.Button("Edit Image", variant="primary") with gr.Column(): edited_image_output1 = gr.Image( label="Edited Image #1", type="pil" ) edited_image_output2 = gr.Image( label="Edited Image #2", type="pil" ) # 업로드 이미지 편집 시 처리 run_edit_btn.click( fn=run_edit_process, inputs=[uploaded_image, edit_prompt, final_text1_edit], outputs=[edited_image_output1, edited_image_output2] ) demo.launch(max_threads=20)