File size: 13,668 Bytes
176edce
1e367e3
176edce
 
80e38a2
 
 
 
 
e48aa5a
15cc8b5
 
80e38a2
 
b2f5030
 
1e367e3
176edce
 
 
1e367e3
343fdaf
176edce
343fdaf
1e367e3
80e38a2
 
 
 
1e367e3
80e38a2
 
 
 
343fdaf
80e38a2
 
 
 
1e367e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176edce
 
 
 
 
0399de8
176edce
 
0399de8
343fdaf
80e38a2
1e367e3
80e38a2
 
 
 
343fdaf
80e38a2
b2f5030
80e38a2
 
 
 
 
176edce
 
343fdaf
80e38a2
1e367e3
80e38a2
 
 
 
 
 
 
1e367e3
 
 
 
f09c591
 
 
1e367e3
f09c591
 
 
80e38a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ec2621
7b9b23e
80e38a2
 
 
 
 
 
 
 
 
 
0399de8
80e38a2
 
0399de8
80e38a2
 
1e367e3
80e38a2
f09c591
80e38a2
 
3ec2621
80e38a2
 
 
1e367e3
80e38a2
1e367e3
80e38a2
 
0399de8
1e367e3
 
 
0399de8
 
 
1e367e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0399de8
80e38a2
1e367e3
 
80e38a2
0399de8
 
b2f5030
0399de8
 
 
 
 
 
 
 
80e38a2
1e367e3
0399de8
1e367e3
0399de8
 
80e38a2
0399de8
80e38a2
 
 
 
 
 
 
 
 
0399de8
1e367e3
 
 
 
 
80e38a2
1e367e3
80e38a2
0399de8
1e367e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80e38a2
 
1e367e3
819fc44
 
0399de8
 
 
 
 
 
 
 
 
 
 
819fc44
1e367e3
 
 
 
 
819fc44
1e367e3
0399de8
1e367e3
 
 
 
0399de8
1e367e3
0399de8
1e367e3
 
 
 
 
 
819fc44
1e367e3
 
 
 
 
819fc44
 
1e367e3
819fc44
 
1e367e3
819fc44
 
1e367e3
 
 
 
 
 
 
 
 
 
 
 
3ec2621
 
343fdaf
1e367e3
0399de8
 
1e367e3
 
0399de8
 
1e367e3
 
0399de8
 
1e367e3
 
0399de8
 
1e367e3
 
0399de8
 
1e367e3
 
 
0399de8
 
80e38a2
 
1e367e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80e38a2
 
 
f8cac3e
1e367e3
80e38a2
1e367e3
80e38a2
0399de8
 
 
1e367e3
0399de8
 
80e38a2
1e367e3
 
819fc44
1e367e3
819fc44
0399de8
 
 
 
 
 
 
 
 
 
 
 
1e367e3
3ec2621
343fdaf
f09c591
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
import os
import re
import time
from os import path
import tempfile
import uuid
import base64
import mimetypes
import json
import io
import random
import string

import torch
from PIL import Image

from transformers import pipeline
from safetensors.torch import load_file
from huggingface_hub import hf_hub_download

# Diffusers
import gradio as gr
from diffusers import FluxPipeline

# (Internal) text-modification library
from google import genai
from google.genai import types

#######################################
# 0. Environment & Translation Pipeline
#######################################

BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
CACHE_PATH = path.join(BASE_DIR, "models")

os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
os.environ["HF_HUB_CACHE"] = CACHE_PATH
os.environ["HF_HOME"] = CACHE_PATH

# Translation (Korean -> English), CPU only
translator = pipeline(
    task="translation",
    model="Helsinki-NLP/opus-mt-ko-en",
    device=-1  # force CPU
)

def maybe_translate_to_english(text: str) -> str:
    """
    If the prompt contains any Korean characters, translate to English.
    Otherwise, return as-is.
    """
    if re.search("[가-힣]", text):
        translated = translator(text)[0]["translation_text"]
        print(f"[TRANSLATE] Detected Korean -> '{text}' -> '{translated}'")
        return translated
    return text

# Simple Timer Class
class timer:
    def __init__(self, method_name="timed process"):
        self.method = method_name
    def __enter__(self):
        self.start = time.time()
        print(f"[TIMER] {self.method} starts")
    def __exit__(self, exc_type, exc_val, exc_tb):
        end = time.time()
        print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s")

#######################################
# 1. Load FLUX Pipeline
#######################################

if not path.exists(CACHE_PATH):
    os.makedirs(CACHE_PATH, exist_ok=True)

pipe = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-dev",
    torch_dtype=torch.bfloat16
)

lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
pipe.load_lora_weights(lora_path)
pipe.fuse_lora(lora_scale=0.125)
pipe.to(device="cuda", dtype=torch.bfloat16)

#######################################
# 2. Internal Text Modification Functions
#######################################

def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)

def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
    """
    Internally modifies text within an image, returning a new image path.
    (Screen instructions do not mention 'Google'.)
    """
    api_key = os.getenv("GAPI_TOKEN", None)
    if not api_key:
        raise ValueError(
            "GAPI_TOKEN is missing. Please set an API key."
        )

    client = genai.Client(api_key=api_key)
    files = [client.files.upload(file=file_name)]
    
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(
                    file_uri=files[0].uri,
                    mime_type=files[0].mime_type,
                ),
                types.Part.from_text(text=text),
            ],
        ),
    ]

    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        top_k=40,
        max_output_tokens=8192,
        response_modalities=["image", "text"],
        response_mime_type="text/plain",
    )

    text_response = ""
    image_path = None

    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
        temp_path = tmp.name
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=generate_content_config,
        ):
            if not chunk.candidates or not chunk.candidates[0].content:
                continue

            candidate = chunk.candidates[0].content.parts[0]
            if candidate.inline_data:
                save_binary_file(temp_path, candidate.inline_data.data)
                print(f"[DEBUG] Returned new image -> {temp_path}")
                image_path = temp_path
                break
            else:
                text_response += chunk.text + "\n"
    
    del files
    return image_path, text_response


#######################################
# 3. Diffusion Utility
#######################################

def generate_random_letters(length: int) -> str:
    """
    Create a random sequence of uppercase/lowercase letters of given length.
    """
    letters = string.ascii_lowercase + string.ascii_uppercase
    return "".join(random.choice(letters) for _ in range(length))

def is_all_english(text: str) -> bool:
    """
    Check if text consists only of English letters (a-z, A-Z), digits, spaces,
    and a few basic punctuation characters. If so, return True.
    Otherwise, False (includes Korean or other characters).
    """
    return bool(re.match(r'^[a-zA-Z0-9\s\.,!\?\']*$', text))

def maybe_use_random_or_original(final_text: str) -> str:
    """
    If final_text is strictly English/allowed chars, use it as-is.
    If it contains other chars (like Korean, etc.), 
    replace with random letters of the same length.
    """
    if not final_text:
        return ""
    if is_all_english(final_text):
        return final_text
    else:
        return generate_random_letters(len(final_text))

def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str:
    """
    Replace <text1>, <text2>, <text3> with r1, r2, r3 respectively.
    <text1> is required; if missing, we append something.
    """
    if "<text1>" in prompt:
        prompt = prompt.replace("<text1>", r1)
    else:
        prompt = f"{prompt} with clear readable text that says '{r1}'"

    if "<text2>" in prompt:
        prompt = prompt.replace("<text2>", r2)
    if "<text3>" in prompt:
        prompt = prompt.replace("<text3>", r3)
    
    return prompt

def generate_initial_image(prompt, height, width, steps, scale, seed):
    """
    Use Flux Pipeline to generate the initial image from the prompt.
    """
    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux Generation"):
        result = pipe(
            prompt=[prompt],
            generator=torch.Generator().manual_seed(int(seed)),
            num_inference_steps=int(steps),
            guidance_scale=float(scale),
            height=int(height),
            width=int(width),
            max_sequence_length=256
        ).images[0]
    return result


#######################################
# 4. Creating 2 Final Images
#######################################

def build_multi_change_instruction(r1, f1, r2, f2, r3, f3):
    """
    Summarize instructions to replace (r1->f1), (r2->f2), (r3->f3).
    """
    instructions = []
    if r1 and f1:
        instructions.append(f"Change any text reading '{r1}' in this image to '{f1}'.")
    if r2 and f2:
        instructions.append(f"Change any text reading '{r2}' in this image to '{f2}'.")
    if r3 and f3:
        instructions.append(f"Change any text reading '{r3}' in this image to '{f3}'.")
    if instructions:
        return " ".join(instructions)
    return "No text changes needed."

def change_text_in_image_two_times(original_image, instruction):
    """
    Call the text modification function twice, 
    returning 2 final variations.
    """
    results = []
    for version_tag in ["(A)", "(B)"]:
        mod_instruction = f"{instruction} {version_tag}"
        try:
            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
                original_path = tmp.name
                original_image.save(original_path)

            image_path, text_response = generate_by_google_genai(
                text=mod_instruction,
                file_name=original_path
            )
            if image_path:
                with open(image_path, "rb") as f:
                    image_data = f.read()
                new_img = Image.open(io.BytesIO(image_data))
                results.append(new_img)
            else:
                results.append(original_image)
        except Exception as e:
            raise gr.Error(f"Error: {e}")
    return results


#######################################
# 5. Main Process
#######################################

def run_process(
    prompt,
    final_text1,
    final_text2,
    final_text3,
    height,
    width,
    steps,
    scale,
    seed
):
    """
    1) If prompt has Korean, translate to English
    2) For each <textX>, if it's purely English, use as-is,
       else generate random letters of the same length.
    3) Generate initial image with these placeholders
    4) Then produce 2 final images by replacing placeholders with real texts
    """
    prompt_en = maybe_translate_to_english(prompt)

    # Decide random vs original for each text
    r1 = maybe_use_random_or_original(final_text1)
    r2 = maybe_use_random_or_original(final_text2)
    r3 = maybe_use_random_or_original(final_text3)

    print(f"[DEBUG] Using placeholders: r1='{r1}', r2='{r2}', r3='{r3}'")

    # Fill prompt
    final_prompt = fill_prompt_with_random_texts(prompt_en, r1, r2, r3)
    print(f"[DEBUG] final_prompt = {final_prompt}")

    # Generate initial "random/original" image
    _random_image = generate_initial_image(final_prompt, height, width, steps, scale, seed)

    # Build final instructions & call twice -> 2 final images
    instruction = build_multi_change_instruction(r1, final_text1, r2, final_text2, r3, final_text3)
    final_imgs = change_text_in_image_two_times(_random_image, instruction)
    # Return only the 2 final images (don't show the random image)
    return [final_imgs[0], final_imgs[1]]

#######################################
# 6. Gradio UI
#######################################

with gr.Blocks(title="Eevery Text Imaginator: FLUX") as demo:
    gr.Markdown(
        """
        <h2 style="text-align:center; margin-bottom: 15px;">
            <strong>Eevery Text Imaginator: FLUX</strong>
        </h2>
        
        <p style="text-align:center;">
            This tool generates two final images from a prompt
            containing placeholders <code>&lt;text1&gt;</code>, <code>&lt;text2&gt;</code>, <code>&lt;text3&gt;</code>.
            If your chosen text is purely English, it will appear directly;
            otherwise it becomes random letters in the initial phase.
        </p>
        
        <hr style="margin: 15px 0;">
        """
    )

    # 5 example prompts (focusing on <text1>, <text2>)
    examples = [
        [
            "On a grand stage, <text1> in big letters and <text2> on the left side",
            "HELLO", "WORLD", ""
        ],
        [
            "Futuristic neon sign with <text1>, plus <text2> near the bottom",
            "WELCOME", "SALE", ""
        ],
        [
            "A classical poster reading <text1> in bold, <text2> as a subtitle",
            "MUSICFEST", "2025", ""
        ],
        [
            "In a cartoon style, a speech bubble with <text1> and another text <text2>",
            "HI!", "OhYes", ""
        ],
        [
            "Large billboard featuring <text1>, smaller text <text2> in the corner",
            "ANNOUNCEMENT", "OPENNOW", ""
        ],
    ]

    with gr.Row():
        with gr.Column():
            with gr.Box():
                prompt_input = gr.Textbox(
                    lines=3,
                    label="Prompt (Korean or English)",
                    placeholder="On a grand stage, <text1> in big letters..."
                )
                final_text1 = gr.Textbox(
                    label="New Text #1 (Required)",
                    placeholder="Example: HELLO or 안녕하세요"
                )
                final_text2 = gr.Textbox(
                    label="New Text #2 (Optional)",
                    placeholder="Example: WORLD or 반갑습니다"
                )
                final_text3 = gr.Textbox(
                    label="New Text #3 (Optional)",
                    placeholder="(Leave blank if not used)"
                )

            with gr.Accordion("Advanced Settings (optional)", open=False):
                height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
                width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
                steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
                scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5)
                seed = gr.Number(label="Seed", value=1234, precision=0)

            run_btn = gr.Button("Generate 2 Final Images", variant="primary")

            gr.Examples(
                examples=examples,
                inputs=[prompt_input, final_text1, final_text2, final_text3],
                label="Example Prompts"
            )

        with gr.Column():
            final_image_output1 = gr.Image(label="Final Image #1", type="pil")
            final_image_output2 = gr.Image(label="Final Image #2", type="pil")

    # We only display the 2 final images, not the initial random image
    run_btn.click(
        fn=run_process,
        inputs=[
            prompt_input,
            final_text1,
            final_text2,
            final_text3,
            height,
            width,
            steps,
            scale,
            seed
        ],
        outputs=[final_image_output1, final_image_output2]
    )

demo.launch(max_threads=20)