Spaces:

ginigen
/

FLUXllama-Multilingual

Running on Zero

App Files Files Community

ginipick commited on Dec 16, 2024

Commit

4e85f51

verified ·

1 Parent(s): 844ec2f

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -32

app.py CHANGED Viewed

@@ -753,39 +753,78 @@ model_zero_init = False
 # result = model.load_state_dict(load_file("/storage/dev/nyanko/flux-dev/flux1-dev.sft"))
 @spaces.GPU
 @torch.no_grad()
 def generate_image(
     prompt, width, height, guidance, inference_steps, seed,
     do_img2img, init_image, image2image_strength, resize_img,
     progress=gr.Progress(track_tqdm=True),
 ):
     translated_prompt = prompt
-    # 한글 또는 일본어 문자 감지
-    def contains_korean(text):
-        return any('\u3131' <= c <= '\u318E' or '\uAC00' <= c <= '\uD7A3' for c in text)
-    def contains_japanese(text):
-        return any('\u3040' <= c <= '\u309F' or '\u30A0' <= c <= '\u30FF' or '\u4E00' <= c <= '\u9FFF' for c in text)
-    # 한글이나 일본어가 있으면 번역
-    if contains_korean(prompt):
-        translated_prompt = ko_translator(prompt, max_length=512)[0]['translation_text']
-        print(f"Translated Korean prompt: {translated_prompt}")
-        prompt = translated_prompt
-    elif contains_japanese(prompt):
-        translated_prompt = ja_translator(prompt, max_length=512)[0]['translation_text']
-        print(f"Translated Japanese prompt: {translated_prompt}")
-        prompt = translated_prompt
     if seed == 0:
         seed = int(random.random() * 1000000)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_device = torch.device(device)
     global model, model_zero_init
     if not model_zero_init:
@@ -802,10 +841,11 @@ def generate_image(
             height = init_image.shape[-2]
             width = init_image.shape[-1]
         init_image = ae.encode(init_image.to(torch_device).to(torch.bfloat16)).latent_dist.sample()
-        init_image =  (init_image - ae.config.shift_factor) * ae.config.scaling_factor
     generator = torch.Generator(device=device).manual_seed(seed)
-    x = torch.randn(1, 16, 2 * math.ceil(height / 16), 2 * math.ceil(width / 16), device=device, dtype=torch.bfloat16, generator=generator)
     num_steps = inference_steps
     timesteps = get_schedule(num_steps, (x.shape[-1] * x.shape[-2]) // 4, shift=True)
@@ -816,12 +856,9 @@ def generate_image(
         timesteps = timesteps[t_idx:]
         x = t * x + (1.0 - t) * init_image.to(x.dtype)
-    inp = prepare(t5=t5, clip=clip, img=x, prompt=prompt)
     x = denoise(model, **inp, timesteps=timesteps, guidance=guidance)
-    # with profile(activities=[ProfilerActivity.CPU],record_shapes=True,profile_memory=True) as prof:
-    # print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=20))
     x = unpack(x.float(), height, width)
     with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
         x = x = (x / ae.config.scaling_factor) + ae.config.shift_factor
@@ -831,22 +868,33 @@ def generate_image(
     x = rearrange(x[0], "c h w -> h w c")
     img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
     return img, seed, translated_prompt
 css = """
 footer {
     visibility: hidden;
 }
 """
 def create_demo():
     with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
         with gr.Row():
             with gr.Column():
-                prompt = gr.Textbox(label="Prompt(한글 가능)", value="A cute and fluffy golden retriever puppy sitting upright, holding a neatly designed white sign with bold, colorful lettering that reads 'Have a Happy Day!' in cheerful fonts. The puppy has expressive, sparkling eyes, a happy smile, and fluffy ears slightly flopped. The background is a vibrant and sunny meadow with soft-focus flowers, glowing sunlight filtering through the trees, and a warm golden glow that enhances the joyful atmosphere. The sign is framed with small decorative flowers, adding a charming and wholesome touch. Ensure the text on the sign is clear and legible.")
                 width = gr.Slider(minimum=128, maximum=2048, step=64, label="Width", value=768)
                 height = gr.Slider(minimum=128, maximum=2048, step=64, label="Height", value=768)
@@ -861,13 +909,21 @@ def create_demo():
                 seed = gr.Number(label="Seed", precision=-1)
                 do_img2img = gr.Checkbox(label="Image to Image", value=False)
                 init_image = gr.Image(label="Input Image", visible=False)
-                image2image_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Noising strength", value=0.8, visible=False)
                 resize_img = gr.Checkbox(label="Resize image", value=True, visible=False)
                 generate_button = gr.Button("Generate")
             with gr.Column():
                 output_image = gr.Image(label="Generated Image")
                 output_seed = gr.Text(label="Used Seed")
         do_img2img.change(
             fn=lambda x: [gr.update(visible=x), gr.update(visible=x), gr.update(visible=x)],
@@ -877,8 +933,12 @@ def create_demo():
         generate_button.click(
             fn=generate_image,
-            inputs=[prompt, width, height, guidance, inference_steps, seed, do_img2img, init_image, image2image_strength, resize_img],
-            outputs=[output_image, output_seed]
         )
         examples = [

 # result = model.load_state_dict(load_file("/storage/dev/nyanko/flux-dev/flux1-dev.sft"))
+# 기존 import 문들은 유지...
+# 언어 모델 딕셔너리 추가
+LANGUAGE_MODELS = {
+    "Korean": "Helsinki-NLP/opus-mt-ko-en",
+    "Japanese": "Helsinki-NLP/opus-mt-ja-en",
+    "Chinese": "Helsinki-NLP/opus-mt-zh-en",
+    "Russian": "Helsinki-NLP/opus-mt-ru-en",
+    "Spanish": "Helsinki-NLP/opus-mt-es-en",
+    "French": "Helsinki-NLP/opus-mt-fr-en",
+    "Arabic": "Helsinki-NLP/opus-mt-ar-en",
+    "Bengali": "Helsinki-NLP/opus-mt-bn-en",
+    "Estonian": "Helsinki-NLP/opus-mt-et-en",
+    "Polish": "Helsinki-NLP/opus-mt-pl-en",
+    "Swedish": "Helsinki-NLP/opus-mt-sv-en",
+    "Thai": "Helsinki-NLP/opus-mt-th-en",
+    "Urdu": "Helsinki-NLP/opus-mt-ur-en",
+    "Bulgarian": "Helsinki-NLP/opus-mt-bg-en",
+    "Catalan": "Helsinki-NLP/opus-mt-ca-en",
+    "Czech": "Helsinki-NLP/opus-mt-cs-en",
+    "Azerbaijani": "Helsinki-NLP/opus-mt-az-en",
+    "Basque": "Helsinki-NLP/opus-mt-bat-en",
+    "Bicolano": "Helsinki-NLP/opus-mt-bcl-en",
+    "Bemba": "Helsinki-NLP/opus-mt-bem-en",
+    "Berber": "Helsinki-NLP/opus-mt-ber-en",
+    "Bislama": "Helsinki-NLP/opus-mt-bi-en",
+    "Bantu": "Helsinki-NLP/opus-mt-bnt-en",
+    "Brazilian Sign Language": "Helsinki-NLP/opus-mt-bzs-en",
+    "Caucasian": "Helsinki-NLP/opus-mt-cau-en",
+    "Cebuano": "Helsinki-NLP/opus-mt-ceb-en",
+    "Celtic": "Helsinki-NLP/opus-mt-cel-en",
+    "Chuukese": "Helsinki-NLP/opus-mt-chk-en",
+    "Creoles and pidgins (French)": "Helsinki-NLP/opus-mt-cpf-en",
+    "Seychelles Creole": "Helsinki-NLP/opus-mt-crs-en",
+    "American Sign Language": "Helsinki-NLP/opus-mt-ase-en",
+    "Artificial Language": "Helsinki-NLP/opus-mt-art-en",
+    "Atlantic-Congo": "Helsinki-NLP/opus-mt-alv-en",
+    "Afroasiatic": "Helsinki-NLP/opus-mt-afa-en",
+    "Afrikaans": "Helsinki-NLP/opus-mt-af-en",
+    "Austroasiatic": "Helsinki-NLP/opus-mt-aav-en"
+}
+# 번역기 딕셔너리를 저장할 전역 변수
+translators = {}
+def get_translator(language):
+    """필요할 때만 번역기를 로드하는 지연 초기화 함수"""
+    if language not in translators and language in LANGUAGE_MODELS:
+        translators[language] = pipeline("translation", model=LANGUAGE_MODELS[language])
+    return translators.get(language)
 @spaces.GPU
 @torch.no_grad()
 def generate_image(
     prompt, width, height, guidance, inference_steps, seed,
     do_img2img, init_image, image2image_strength, resize_img,
+    selected_language="Auto",
     progress=gr.Progress(track_tqdm=True),
 ):
     translated_prompt = prompt
+    if selected_language != "Auto":
+        translator = get_translator(selected_language)
+        if translator:
+            translated_prompt = translator(prompt, max_length=512)[0]['translation_text']
+            print(f"Translated from {selected_language}: {translated_prompt}")
     if seed == 0:
         seed = int(random.random() * 1000000)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_device = torch.device(device)
     global model, model_zero_init
     if not model_zero_init:
             height = init_image.shape[-2]
             width = init_image.shape[-1]
         init_image = ae.encode(init_image.to(torch_device).to(torch.bfloat16)).latent_dist.sample()
+        init_image = (init_image - ae.config.shift_factor) * ae.config.scaling_factor
     generator = torch.Generator(device=device).manual_seed(seed)
+    x = torch.randn(1, 16, 2 * math.ceil(height / 16), 2 * math.ceil(width / 16),
+                   device=device, dtype=torch.bfloat16, generator=generator)
     num_steps = inference_steps
     timesteps = get_schedule(num_steps, (x.shape[-1] * x.shape[-2]) // 4, shift=True)
         timesteps = timesteps[t_idx:]
         x = t * x + (1.0 - t) * init_image.to(x.dtype)
+    inp = prepare(t5=t5, clip=clip, img=x, prompt=translated_prompt)
     x = denoise(model, **inp, timesteps=timesteps, guidance=guidance)
     x = unpack(x.float(), height, width)
     with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
         x = x = (x / ae.config.scaling_factor) + ae.config.shift_factor
     x = rearrange(x[0], "c h w -> h w c")
     img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
     return img, seed, translated_prompt
 css = """
 footer {
     visibility: hidden;
 }
 """
 def create_demo():
     with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
         with gr.Row():
             with gr.Column():
+                # 언어 선택 드롭다운 추가
+                language_selector = gr.Dropdown(
+                    choices=["Auto"] + list(LANGUAGE_MODELS.keys()),
+                    value="Auto",
+                    label="Input language_selector = gr.Dropdown(
+                    choices=["Auto"] + list(LANGUAGE_MODELS.keys()),
+                    value="Auto",
+                    label="Input Language"
+                )
+                prompt = gr.Textbox(
+                    label="Prompt (Multi-language Support)",
+                    value="A cute and fluffy golden retriever puppy sitting upright, holding a neatly designed white sign with bold, colorful lettering that reads 'Have a Happy Day!' in cheerful fonts. The puppy has expressive, sparkling eyes, a happy smile, and fluffy ears slightly flopped. The background is a vibrant and sunny meadow with soft-focus flowers, glowing sunlight filtering through the trees, and a warm golden glow that enhances the joyful atmosphere. The sign is framed with small decorative flowers, adding a charming and wholesome touch. Ensure the text on the sign is clear and legible."
+                )
                 width = gr.Slider(minimum=128, maximum=2048, step=64, label="Width", value=768)
                 height = gr.Slider(minimum=128, maximum=2048, step=64, label="Height", value=768)
                 seed = gr.Number(label="Seed", precision=-1)
                 do_img2img = gr.Checkbox(label="Image to Image", value=False)
                 init_image = gr.Image(label="Input Image", visible=False)
+                image2image_strength = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.01,
+                    label="Noising strength",
+                    value=0.8,
+                    visible=False
+                )
                 resize_img = gr.Checkbox(label="Resize image", value=True, visible=False)
                 generate_button = gr.Button("Generate")
             with gr.Column():
                 output_image = gr.Image(label="Generated Image")
                 output_seed = gr.Text(label="Used Seed")
+                translated_prompt = gr.Text(label="Translated Prompt")
         do_img2img.change(
             fn=lambda x: [gr.update(visible=x), gr.update(visible=x), gr.update(visible=x)],
         generate_button.click(
             fn=generate_image,
+            inputs=[
+                prompt, width, height, guidance, inference_steps, seed,
+                do_img2img, init_image, image2image_strength, resize_img,
+                language_selector
+            ],
+            outputs=[output_image, output_seed, translated_prompt]
         )
         examples = [