ginipick commited on
Commit
a2a7c32
·
verified ·
1 Parent(s): e2ffb6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -155
app.py CHANGED
@@ -1,36 +1,26 @@
1
  import subprocess
2
  import os
3
 
 
4
  subprocess.run(
5
  "pip install flash-attn --no-build-isolation",
6
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
7
  shell=True,
8
  )
 
 
9
 
10
- subprocess.run(
11
- "pip install huggingface_hub==0.25.0",
12
- shell=True,
13
- )
14
-
15
- subprocess.run(
16
- "pip install numpy==1.26.4",
17
- shell=True,
18
- )
19
-
20
- # Additional dependencies for translation and UI improvements
21
- subprocess.run(
22
- "pip install transformers gradio safetensors torchvision diffusers",
23
- shell=True,
24
- )
25
-
26
  os.makedirs("/home/user/app/checkpoints", exist_ok=True)
27
  from huggingface_hub import snapshot_download
28
- snapshot_download(
29
- repo_id="Alpha-VLLM/Lumina-Image-2.0", local_dir="/home/user/app/checkpoints"
30
- )
31
 
32
  hf_token = os.environ["HF_TOKEN"]
33
 
 
 
 
 
34
  import argparse
35
  import builtins
36
  import json
@@ -40,18 +30,16 @@ import random
40
  import socket
41
  import traceback
42
 
 
43
  import gradio as gr
44
  import numpy as np
45
  from safetensors.torch import load_file
46
- import torch
47
  from torchvision.transforms.functional import to_pil_image
48
 
49
- # Import translation pipeline from transformers
50
  from transformers import pipeline
51
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
52
 
53
- import spaces
54
-
55
  from imgproc import generate_crop_size_list
56
  import models
57
  from transport import Sampler, create_transport
@@ -97,32 +85,26 @@ def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empty_prompt
97
 
98
  return prompt_embeds, prompt_masks
99
 
100
-
101
  @torch.no_grad()
102
  def model_main(args, master_port, rank):
103
- # Import here to avoid huggingface Tokenizer parallelism warnings
104
  from diffusers.models import AutoencoderKL
105
  from transformers import AutoModel, AutoTokenizer
106
 
107
- # Override the default print function since the delay can be large for child processes
108
  original_print = builtins.print
109
-
110
  def print(*args, **kwargs):
111
  kwargs.setdefault("flush", True)
112
  original_print(*args, **kwargs)
113
-
114
  builtins.print = print
115
 
116
  train_args = torch.load(os.path.join(args.ckpt, "model_args.pth"))
117
  print("Loaded model arguments:", json.dumps(train_args.__dict__, indent=2))
118
 
119
  print(f"Creating lm: Gemma-2-2B")
120
-
121
  dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[args.precision]
122
 
123
- text_encoder = AutoModel.from_pretrained(
124
- "google/gemma-2-2b", torch_dtype=dtype, token=hf_token
125
- ).eval().to("cuda")
126
  cap_feat_dim = text_encoder.config.hidden_size
127
  if args.num_gpus > 1:
128
  raise NotImplementedError("Inference with >1 GPUs not yet supported")
@@ -133,7 +115,6 @@ def model_main(args, master_port, rank):
133
  vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", token=hf_token).cuda()
134
 
135
  print(f"Creating DiT: {train_args.model}")
136
-
137
  model = models.__dict__[train_args.model](
138
  in_channels=16,
139
  qk_norm=train_args.qk_norm,
@@ -163,7 +144,6 @@ def model_main(args, master_port, rank):
163
 
164
  return text_encoder, tokenizer, vae, model
165
 
166
-
167
  @torch.no_grad()
168
  def inference(args, infer_args, text_encoder, tokenizer, vae, model):
169
  dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[args.precision]
@@ -211,7 +191,7 @@ def inference(args, infer_args, text_encoder, tokenizer, vae, model):
211
  print("> Parameters:", json.dumps(metadata, indent=2))
212
 
213
  try:
214
- # Begin sampler
215
  if solver == "dpm":
216
  transport = create_transport("Linear", "velocity")
217
  sampler = Sampler(transport)
@@ -236,8 +216,7 @@ def inference(args, infer_args, text_encoder, tokenizer, vae, model):
236
  reverse=args.reverse,
237
  time_shifting_factor=t_shift,
238
  )
239
- # End sampler
240
-
241
  resolution = resolution.split(" ")[-1]
242
  w, h = resolution.split("x")
243
  w, h = int(w), int(h)
@@ -252,7 +231,7 @@ def inference(args, infer_args, text_encoder, tokenizer, vae, model):
252
  cap_feats, cap_mask = encode_prompt([cap] + [neg_cap], text_encoder, tokenizer, 0.0)
253
  else:
254
  cap_feats, cap_mask = encode_prompt([cap] + [""], text_encoder, tokenizer, 0.0)
255
-
256
  cap_mask = cap_mask.to(cap_feats.device)
257
 
258
  model_kwargs = dict(
@@ -304,13 +283,11 @@ def inference(args, infer_args, text_encoder, tokenizer, vae, model):
304
  print(traceback.format_exc())
305
  return ModelFailure()
306
 
307
-
308
  def none_or_str(value):
309
  if value == "None":
310
  return None
311
  return value
312
 
313
-
314
  def parse_transport_args(parser):
315
  group = parser.add_argument_group("Transport arguments")
316
  group.add_argument(
@@ -337,7 +314,6 @@ def parse_transport_args(parser):
337
  group.add_argument("--sample-eps", type=float, help="Sampling parameter in the transport model.")
338
  group.add_argument("--train-eps", type=float, help="Training epsilon to stabilize learning.")
339
 
340
-
341
  def parse_ode_args(parser):
342
  group = parser.add_argument_group("ODE arguments")
343
  group.add_argument(
@@ -359,7 +335,6 @@ def parse_ode_args(parser):
359
  help="Enable likelihood calculation during the ODE solving process.",
360
  )
361
 
362
-
363
  def find_free_port() -> int:
364
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
365
  sock.bind(("", 0))
@@ -367,22 +342,17 @@ def find_free_port() -> int:
367
  sock.close()
368
  return port
369
 
370
-
371
- # Utility function to translate Korean text to English if needed.
372
  def translate_if_korean(text: str) -> str:
373
  import re
374
- # Check if any Korean characters are present
375
  if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", text):
376
  print("Translating Korean prompt to English...")
377
  translation = translator(text)
378
- # Return the translated text from the pipeline output
379
  return translation[0]["translation_text"]
380
  return text
381
 
382
-
383
  def main():
384
  parser = argparse.ArgumentParser()
385
-
386
  parser.add_argument("--num_gpus", type=int, default=1)
387
  parser.add_argument("--ckpt", type=str, default='/home/user/app/checkpoints', required=False)
388
  parser.add_argument("--ema", action="store_true")
@@ -393,19 +363,16 @@ def main():
393
 
394
  parse_transport_args(parser)
395
  parse_ode_args(parser)
396
-
397
  args = parser.parse_known_args()[0]
398
 
399
  if args.num_gpus != 1:
400
  raise NotImplementedError("Multi-GPU Inference is not yet supported")
401
 
402
  master_port = find_free_port()
403
-
404
  text_encoder, tokenizer, vae, model = model_main(args, master_port, 0)
405
-
406
  description = "Lumina-Image 2.0 ([Github](https://github.com/Alpha-VLLM/Lumina-Image-2.0/tree/main))"
407
 
408
- # Create a Gradio Blocks UI with custom CSS for a sleek, modern appearance.
409
  custom_css = """
410
  body {
411
  background: linear-gradient(135deg, #1a2a6c, #b21f1f, #fdbb2d);
@@ -436,14 +403,14 @@ def main():
436
  label="Caption",
437
  interactive=True,
438
  value="Majestic landscape photograph of snow-capped mountains under a dramatic sky at sunset. The mountains dominate the lower half of the image, with rugged peaks and deep crevasses visible. A glacier flows down the right side, partially illuminated by the warm light. The sky is filled with fiery orange and golden clouds, contrasting with the cool tones of the snow. The central peak is partially obscured by clouds, adding a sense of mystery. The foreground features dark, shadowed forested areas, enhancing the depth. High contrast, natural lighting, warm color palette, photorealistic, expansive, awe-inspiring, serene, visually balanced, dynamic composition.",
439
- placeholder="Enter a caption.",
440
  )
441
  neg_cap = gr.Textbox(
442
  lines=2,
443
  label="Negative Caption",
444
  interactive=True,
445
  value="",
446
- placeholder="Enter a negative caption.",
447
  )
448
  default_value = "You are an assistant designed to generate high-quality images with the highest degree of image-text alignment based on textual prompts."
449
  system_type = gr.Dropdown(
@@ -451,99 +418,36 @@ def main():
451
  choices=[
452
  "You are an assistant designed to generate high-quality images with the highest degree of image-text alignment based on textual prompts.",
453
  "You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.",
454
- "",
455
  ],
456
- label="System Type",
457
  )
458
-
459
  with gr.Row():
460
  res_choices = [f"{w}x{h}" for w, h in generate_crop_size_list((args.res // 64) ** 2, 64)]
461
  default_value = "1024x1024"
462
- resolution = gr.Dropdown(
463
- value=default_value, choices=res_choices, label="Resolution"
464
- )
465
  with gr.Row():
466
- num_sampling_steps = gr.Slider(
467
- minimum=1,
468
- maximum=70,
469
- value=40,
470
- step=1,
471
- interactive=True,
472
- label="Sampling Steps",
473
- )
474
- seed = gr.Slider(
475
- minimum=0,
476
- maximum=int(1e5),
477
- value=0,
478
- step=1,
479
- interactive=True,
480
- label="Seed (0 for random)",
481
- )
482
- cfg_trunc = gr.Slider(
483
- minimum=0,
484
- maximum=1,
485
- value=0,
486
- step=0.01,
487
- interactive=True,
488
- label="CFG Truncation",
489
- )
490
  with gr.Row():
491
- solver = gr.Dropdown(
492
- value="euler",
493
- choices=["euler", "midpoint", "rk4"],
494
- label="Solver",
495
- )
496
- t_shift = gr.Slider(
497
- minimum=1,
498
- maximum=20,
499
- value=6,
500
- step=1,
501
- interactive=True,
502
- label="Time Shift",
503
- )
504
- cfg_scale = gr.Slider(
505
- minimum=1.0,
506
- maximum=20.0,
507
- value=4.0,
508
- interactive=True,
509
- label="CFG Scale",
510
- )
511
  with gr.Row():
512
- renorm_cfg = gr.Dropdown(
513
- value=True,
514
- choices=[True, False, 2.0],
515
- label="CFG Renorm",
516
- )
517
  with gr.Accordion("Advanced Settings for Resolution Extrapolation", open=False):
518
  with gr.Row():
519
- scaling_method = gr.Dropdown(
520
- value="Time-aware",
521
- choices=["Time-aware", "None"],
522
- label="RoPE Scaling Method",
523
- )
524
- scaling_watershed = gr.Slider(
525
- minimum=0.0,
526
- maximum=1.0,
527
- value=0.3,
528
- interactive=True,
529
- label="Linear/NTK Watershed",
530
- )
531
  with gr.Row():
532
- proportional_attn = gr.Checkbox(
533
- value=True,
534
- interactive=True,
535
- label="Proportional Attention",
536
- )
537
  with gr.Row():
538
  submit_btn = gr.Button("Submit", variant="primary")
539
  with gr.Column():
540
- output_img = gr.Image(
541
- label="Generated Image",
542
- interactive=False,
543
- )
544
  with gr.Accordion(label="Generation Parameters", open=True):
545
  gr_metadata = gr.JSON(label="Metadata", show_label=False)
546
-
547
  with gr.Row():
548
  prompts = [
549
  "Close-up portrait of a young woman with light brown hair, looking to the right, illuminated by warm, golden sunlight. Her hair is gently tousled, catching the light and creating a halo effect around her head. She wears a white garment with a V-neck, visible in the lower left of the frame. The background is dark and out of focus, enhancing the contrast between her illuminated face and the shadows. Soft, ethereal lighting, high contrast, warm color palette, shallow depth of field, natural backlighting, serene and contemplative mood, cinematic quality, intimate and visually striking composition.",
@@ -553,48 +457,26 @@ def main():
553
  ]
554
  prompts = [[p] for p in prompts]
555
  gr.Examples(prompts, [cap], label="Examples")
556
-
557
  @spaces.GPU(duration=200)
558
  def on_submit(cap, neg_cap, system_type, resolution, num_sampling_steps, cfg_scale, cfg_trunc, renorm_cfg, solver, t_shift, seed, scaling_method, scaling_watershed, proportional_attn, progress=gr.Progress(track_tqdm=True)):
559
- # Translate the caption and negative caption if they contain Korean characters
560
  cap = translate_if_korean(cap)
561
  if neg_cap and neg_cap.strip():
562
  neg_cap = translate_if_korean(neg_cap)
563
- # Pack updated arguments and call inference
564
  infer_args = (cap, neg_cap, system_type, resolution, num_sampling_steps, cfg_scale, cfg_trunc, renorm_cfg, solver, t_shift, seed, scaling_method, scaling_watershed, proportional_attn)
565
  result = inference(args, infer_args, text_encoder, tokenizer, vae, model)
566
  if isinstance(result, ModelFailure):
567
  raise RuntimeError("Model failed to generate the image.")
568
  return result
569
-
570
  submit_btn.click(
571
  on_submit,
572
- [
573
- cap,
574
- neg_cap,
575
- system_type,
576
- resolution,
577
- num_sampling_steps,
578
- cfg_scale,
579
- cfg_trunc,
580
- renorm_cfg,
581
- solver,
582
- t_shift,
583
- seed,
584
- scaling_method,
585
- scaling_watershed,
586
- proportional_attn,
587
- ],
588
  [output_img, gr_metadata],
589
  )
590
-
591
  def show_scaling_watershed(scaling_m):
592
  return gr.update(visible=scaling_m == "Time-aware")
593
-
594
  scaling_method.change(show_scaling_watershed, scaling_method, scaling_watershed)
595
-
596
  demo.queue().launch(server_name="0.0.0.0")
597
 
598
-
599
  if __name__ == "__main__":
600
  main()
 
1
  import subprocess
2
  import os
3
 
4
+ # 필수 패키지 설치 (이미 설치되어 있다면 무시됩니다)
5
  subprocess.run(
6
  "pip install flash-attn --no-build-isolation",
7
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
8
  shell=True,
9
  )
10
+ subprocess.run("pip install huggingface_hub==0.25.0", shell=True)
11
+ subprocess.run("pip install numpy==1.26.4 sentencepiece sacremoses transformers gradio safetensors torchvision diffusers", shell=True)
12
 
13
+ # 체크포인트 폴더 생성 및 모델 스냅샷 다운로드
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  os.makedirs("/home/user/app/checkpoints", exist_ok=True)
15
  from huggingface_hub import snapshot_download
16
+ snapshot_download(repo_id="Alpha-VLLM/Lumina-Image-2.0", local_dir="/home/user/app/checkpoints")
 
 
17
 
18
  hf_token = os.environ["HF_TOKEN"]
19
 
20
+ # ★ 중요: CUDA 초기화 전에 spaces 패키지를 임포트합니다.
21
+ import spaces
22
+
23
+ # 이제 CUDA와 관련된 라이브러리들을 임포트합니다.
24
  import argparse
25
  import builtins
26
  import json
 
30
  import socket
31
  import traceback
32
 
33
+ import torch
34
  import gradio as gr
35
  import numpy as np
36
  from safetensors.torch import load_file
 
37
  from torchvision.transforms.functional import to_pil_image
38
 
39
+ # 번역 파이프라인 (한글 프롬프트를 영어로 번역)
40
  from transformers import pipeline
41
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
42
 
 
 
43
  from imgproc import generate_crop_size_list
44
  import models
45
  from transport import Sampler, create_transport
 
85
 
86
  return prompt_embeds, prompt_masks
87
 
 
88
  @torch.no_grad()
89
  def model_main(args, master_port, rank):
90
+ # diffusers, transformers 등의 내부 임포트를 위해 함수 내부에서 임포트합니다.
91
  from diffusers.models import AutoencoderKL
92
  from transformers import AutoModel, AutoTokenizer
93
 
94
+ # 기본 print 함수를 오버라이드하여 출력 지연을 최소화합니다.
95
  original_print = builtins.print
 
96
  def print(*args, **kwargs):
97
  kwargs.setdefault("flush", True)
98
  original_print(*args, **kwargs)
 
99
  builtins.print = print
100
 
101
  train_args = torch.load(os.path.join(args.ckpt, "model_args.pth"))
102
  print("Loaded model arguments:", json.dumps(train_args.__dict__, indent=2))
103
 
104
  print(f"Creating lm: Gemma-2-2B")
 
105
  dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[args.precision]
106
 
107
+ text_encoder = AutoModel.from_pretrained("google/gemma-2-2b", torch_dtype=dtype, token=hf_token).eval().to("cuda")
 
 
108
  cap_feat_dim = text_encoder.config.hidden_size
109
  if args.num_gpus > 1:
110
  raise NotImplementedError("Inference with >1 GPUs not yet supported")
 
115
  vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", token=hf_token).cuda()
116
 
117
  print(f"Creating DiT: {train_args.model}")
 
118
  model = models.__dict__[train_args.model](
119
  in_channels=16,
120
  qk_norm=train_args.qk_norm,
 
144
 
145
  return text_encoder, tokenizer, vae, model
146
 
 
147
  @torch.no_grad()
148
  def inference(args, infer_args, text_encoder, tokenizer, vae, model):
149
  dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[args.precision]
 
191
  print("> Parameters:", json.dumps(metadata, indent=2))
192
 
193
  try:
194
+ # 샘플러 설정
195
  if solver == "dpm":
196
  transport = create_transport("Linear", "velocity")
197
  sampler = Sampler(transport)
 
216
  reverse=args.reverse,
217
  time_shifting_factor=t_shift,
218
  )
219
+ # 해상도 및 latent 공간 크기 계산
 
220
  resolution = resolution.split(" ")[-1]
221
  w, h = resolution.split("x")
222
  w, h = int(w), int(h)
 
231
  cap_feats, cap_mask = encode_prompt([cap] + [neg_cap], text_encoder, tokenizer, 0.0)
232
  else:
233
  cap_feats, cap_mask = encode_prompt([cap] + [""], text_encoder, tokenizer, 0.0)
234
+
235
  cap_mask = cap_mask.to(cap_feats.device)
236
 
237
  model_kwargs = dict(
 
283
  print(traceback.format_exc())
284
  return ModelFailure()
285
 
 
286
  def none_or_str(value):
287
  if value == "None":
288
  return None
289
  return value
290
 
 
291
  def parse_transport_args(parser):
292
  group = parser.add_argument_group("Transport arguments")
293
  group.add_argument(
 
314
  group.add_argument("--sample-eps", type=float, help="Sampling parameter in the transport model.")
315
  group.add_argument("--train-eps", type=float, help="Training epsilon to stabilize learning.")
316
 
 
317
  def parse_ode_args(parser):
318
  group = parser.add_argument_group("ODE arguments")
319
  group.add_argument(
 
335
  help="Enable likelihood calculation during the ODE solving process.",
336
  )
337
 
 
338
  def find_free_port() -> int:
339
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
340
  sock.bind(("", 0))
 
342
  sock.close()
343
  return port
344
 
345
+ # 한글 프롬프트가 감지되면 영어로 번역하는 함수
 
346
  def translate_if_korean(text: str) -> str:
347
  import re
 
348
  if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", text):
349
  print("Translating Korean prompt to English...")
350
  translation = translator(text)
 
351
  return translation[0]["translation_text"]
352
  return text
353
 
 
354
  def main():
355
  parser = argparse.ArgumentParser()
 
356
  parser.add_argument("--num_gpus", type=int, default=1)
357
  parser.add_argument("--ckpt", type=str, default='/home/user/app/checkpoints', required=False)
358
  parser.add_argument("--ema", action="store_true")
 
363
 
364
  parse_transport_args(parser)
365
  parse_ode_args(parser)
 
366
  args = parser.parse_known_args()[0]
367
 
368
  if args.num_gpus != 1:
369
  raise NotImplementedError("Multi-GPU Inference is not yet supported")
370
 
371
  master_port = find_free_port()
 
372
  text_encoder, tokenizer, vae, model = model_main(args, master_port, 0)
 
373
  description = "Lumina-Image 2.0 ([Github](https://github.com/Alpha-VLLM/Lumina-Image-2.0/tree/main))"
374
 
375
+ # 커스텀 CSS로 모던한 UI 스타일 적용
376
  custom_css = """
377
  body {
378
  background: linear-gradient(135deg, #1a2a6c, #b21f1f, #fdbb2d);
 
403
  label="Caption",
404
  interactive=True,
405
  value="Majestic landscape photograph of snow-capped mountains under a dramatic sky at sunset. The mountains dominate the lower half of the image, with rugged peaks and deep crevasses visible. A glacier flows down the right side, partially illuminated by the warm light. The sky is filled with fiery orange and golden clouds, contrasting with the cool tones of the snow. The central peak is partially obscured by clouds, adding a sense of mystery. The foreground features dark, shadowed forested areas, enhancing the depth. High contrast, natural lighting, warm color palette, photorealistic, expansive, awe-inspiring, serene, visually balanced, dynamic composition.",
406
+ placeholder="Enter a caption."
407
  )
408
  neg_cap = gr.Textbox(
409
  lines=2,
410
  label="Negative Caption",
411
  interactive=True,
412
  value="",
413
+ placeholder="Enter a negative caption."
414
  )
415
  default_value = "You are an assistant designed to generate high-quality images with the highest degree of image-text alignment based on textual prompts."
416
  system_type = gr.Dropdown(
 
418
  choices=[
419
  "You are an assistant designed to generate high-quality images with the highest degree of image-text alignment based on textual prompts.",
420
  "You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.",
421
+ ""
422
  ],
423
+ label="System Type"
424
  )
 
425
  with gr.Row():
426
  res_choices = [f"{w}x{h}" for w, h in generate_crop_size_list((args.res // 64) ** 2, 64)]
427
  default_value = "1024x1024"
428
+ resolution = gr.Dropdown(value=default_value, choices=res_choices, label="Resolution")
 
 
429
  with gr.Row():
430
+ num_sampling_steps = gr.Slider(minimum=1, maximum=70, value=40, step=1, interactive=True, label="Sampling Steps")
431
+ seed = gr.Slider(minimum=0, maximum=int(1e5), value=0, step=1, interactive=True, label="Seed (0 for random)")
432
+ cfg_trunc = gr.Slider(minimum=0, maximum=1, value=0, step=0.01, interactive=True, label="CFG Truncation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  with gr.Row():
434
+ solver = gr.Dropdown(value="euler", choices=["euler", "midpoint", "rk4"], label="Solver")
435
+ t_shift = gr.Slider(minimum=1, maximum=20, value=6, step=1, interactive=True, label="Time Shift")
436
+ cfg_scale = gr.Slider(minimum=1.0, maximum=20.0, value=4.0, interactive=True, label="CFG Scale")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  with gr.Row():
438
+ renorm_cfg = gr.Dropdown(value=True, choices=[True, False, 2.0], label="CFG Renorm")
 
 
 
 
439
  with gr.Accordion("Advanced Settings for Resolution Extrapolation", open=False):
440
  with gr.Row():
441
+ scaling_method = gr.Dropdown(value="Time-aware", choices=["Time-aware", "None"], label="RoPE Scaling Method")
442
+ scaling_watershed = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, interactive=True, label="Linear/NTK Watershed")
 
 
 
 
 
 
 
 
 
 
443
  with gr.Row():
444
+ proportional_attn = gr.Checkbox(value=True, interactive=True, label="Proportional Attention")
 
 
 
 
445
  with gr.Row():
446
  submit_btn = gr.Button("Submit", variant="primary")
447
  with gr.Column():
448
+ output_img = gr.Image(label="Generated Image", interactive=False)
 
 
 
449
  with gr.Accordion(label="Generation Parameters", open=True):
450
  gr_metadata = gr.JSON(label="Metadata", show_label=False)
 
451
  with gr.Row():
452
  prompts = [
453
  "Close-up portrait of a young woman with light brown hair, looking to the right, illuminated by warm, golden sunlight. Her hair is gently tousled, catching the light and creating a halo effect around her head. She wears a white garment with a V-neck, visible in the lower left of the frame. The background is dark and out of focus, enhancing the contrast between her illuminated face and the shadows. Soft, ethereal lighting, high contrast, warm color palette, shallow depth of field, natural backlighting, serene and contemplative mood, cinematic quality, intimate and visually striking composition.",
 
457
  ]
458
  prompts = [[p] for p in prompts]
459
  gr.Examples(prompts, [cap], label="Examples")
 
460
  @spaces.GPU(duration=200)
461
  def on_submit(cap, neg_cap, system_type, resolution, num_sampling_steps, cfg_scale, cfg_trunc, renorm_cfg, solver, t_shift, seed, scaling_method, scaling_watershed, proportional_attn, progress=gr.Progress(track_tqdm=True)):
462
+ # 한글 프롬프트가 감지되면 영어로 번역
463
  cap = translate_if_korean(cap)
464
  if neg_cap and neg_cap.strip():
465
  neg_cap = translate_if_korean(neg_cap)
 
466
  infer_args = (cap, neg_cap, system_type, resolution, num_sampling_steps, cfg_scale, cfg_trunc, renorm_cfg, solver, t_shift, seed, scaling_method, scaling_watershed, proportional_attn)
467
  result = inference(args, infer_args, text_encoder, tokenizer, vae, model)
468
  if isinstance(result, ModelFailure):
469
  raise RuntimeError("Model failed to generate the image.")
470
  return result
 
471
  submit_btn.click(
472
  on_submit,
473
+ [cap, neg_cap, system_type, resolution, num_sampling_steps, cfg_scale, cfg_trunc, renorm_cfg, solver, t_shift, seed, scaling_method, scaling_watershed, proportional_attn],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  [output_img, gr_metadata],
475
  )
 
476
  def show_scaling_watershed(scaling_m):
477
  return gr.update(visible=scaling_m == "Time-aware")
 
478
  scaling_method.change(show_scaling_watershed, scaling_method, scaling_watershed)
 
479
  demo.queue().launch(server_name="0.0.0.0")
480
 
 
481
  if __name__ == "__main__":
482
  main()