ginipick commited on
Commit
3bee602
ยท
verified ยท
1 Parent(s): 57b7eaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -66
app.py CHANGED
@@ -18,47 +18,32 @@ from refiners.fluxion.utils import no_grad
18
  from refiners.solutions import BoxSegmenter
19
  from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
20
  from diffusers import FluxPipeline
21
- # ์ƒ๋‹จ์— import ์ถ”๊ฐ€
22
- # ์ƒ๋‹จ์— import ์ถ”๊ฐ€
23
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
24
-
25
  import gc
26
- import torch.cuda.amp as amp
27
 
28
  def clear_memory():
29
  """๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ ํ•จ์ˆ˜"""
30
- if torch.cuda.is_available():
31
- torch.cuda.empty_cache()
32
- torch.cuda.synchronize()
33
  gc.collect()
 
 
 
 
 
34
 
35
- # GPU ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ ์„ค์ •
36
- if torch.cuda.is_available():
37
- torch.cuda.empty_cache()
38
- torch.backends.cudnn.benchmark = True
39
- torch.backends.cuda.matmul.allow_tf32 = True
40
-
41
- # ๋ฉ”๋ชจ๋ฆฌ ๋ถ„ํ•  ์„ค์ •
42
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
43
- "max_split_size_mb:128,"
44
- "garbage_collection_threshold:0.8,"
45
- "memory_fraction:0.9"
46
- )
47
-
48
-
49
- # ์ž๋™ ํ˜ผํ•ฉ ์ •๋ฐ€๋„(Automatic Mixed Precision) ์„ค์ •
50
  if torch.cuda.is_available():
51
- scaler = torch.amp.GradScaler('cuda')
52
- else:
53
- scaler = None
54
-
55
-
 
56
 
 
57
  model_name = "Helsinki-NLP/opus-mt-ko-en"
58
  tokenizer = AutoTokenizer.from_pretrained(model_name)
59
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to('cpu') # CPU๋กœ ๊ฐ•์ œ ์ง€์ •
60
- translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1) # CPU ์‚ฌ์šฉ
61
-
62
 
63
  def translate_to_english(text: str) -> str:
64
  """ํ•œ๊ธ€ ํ…์ŠคํŠธ๋ฅผ ์˜์–ด๋กœ ๋ฒˆ์—ญ"""
@@ -72,8 +57,6 @@ def translate_to_english(text: str) -> str:
72
  print(f"Translation error: {str(e)}")
73
  return text
74
 
75
-
76
-
77
  BoundingBox = tuple[int, int, int, int]
78
 
79
  pillow_heif.register_heif_opener()
@@ -102,15 +85,13 @@ gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_
102
  gd_model = gd_model.to(device=device)
103
  assert isinstance(gd_model, GroundingDinoForObjectDetection)
104
 
105
-
106
- # FLUX ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
107
  # FLUX ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
108
  pipe = FluxPipeline.from_pretrained(
109
  "black-forest-labs/FLUX.1-dev",
110
- torch_dtype=torch.float16, # A100์— ์ตœ์ ํ™”๋œ float16 ์‚ฌ์šฉ
111
  use_auth_token=HF_TOKEN
112
  )
113
- pipe.enable_attention_slicing(slice_size="auto") # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ์ตœ์ ํ™”
114
 
115
  # LoRA ๊ฐ€์ค‘์น˜ ๋กœ๋“œ
116
  pipe.load_lora_weights(
@@ -122,16 +103,12 @@ pipe.load_lora_weights(
122
  )
123
  pipe.fuse_lora(lora_scale=0.125)
124
 
125
- # GPU ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”
126
- if torch.cuda.is_available():
127
- pipe.to("cuda")
128
- # ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”๋ฅผ ์œ„ํ•œ ์ถ”๊ฐ€ ์„ค์ •
129
- torch.backends.cudnn.benchmark = True
130
- torch.backends.cuda.matmul.allow_tf32 = True
131
-
132
- os.environ["CUDA_VISIBLE_DEVICES"] = "0" # ๋‹จ์ผ GPU ์‚ฌ์šฉ
133
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" # CUDA ๋ฉ”๋ชจ๋ฆฌ ํ• ๋‹น ์„ค์ •
134
-
135
 
136
  class timer:
137
  def __init__(self, method_name="timed process"):
@@ -210,7 +187,6 @@ def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
210
  width, height = calculate_dimensions(aspect_ratio)
211
  width, height = adjust_size_to_multiple_of_8(width, height)
212
 
213
- # A100 ๋ฉ”๋ชจ๋ฆฌ ์ œํ•œ์„ ๊ณ ๋ คํ•œ ์ตœ๋Œ€ ํฌ๊ธฐ ์„ค์ •
214
  max_size = 768
215
  if width > max_size or height > max_size:
216
  ratio = max_size / max(width, height)
@@ -218,24 +194,24 @@ def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
218
  height = int(height * ratio)
219
  width, height = adjust_size_to_multiple_of_8(width, height)
220
 
221
- clear_memory() # ์ƒ์„ฑ ์ „ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
222
-
223
  with timer("Background generation"):
224
- with torch.inference_mode(), torch.cuda.amp.autocast():
225
- image = pipe(
226
- prompt=prompt,
227
- width=width,
228
- height=height,
229
- num_inference_steps=8,
230
- guidance_scale=4.0,
231
- max_length=77,
232
- ).images[0]
233
-
234
- clear_memory() # ์ƒ์„ฑ ํ›„ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
 
 
 
235
  return image
236
  except Exception as e:
237
  print(f"Background generation error: {str(e)}")
238
- clear_memory() # ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ์—๋„ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
239
  return Image.new('RGB', (512, 512), 'white')
240
 
241
 
@@ -355,21 +331,18 @@ def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
355
  aspect_ratio: str = "1:1", position: str = "bottom-center",
356
  scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
357
  try:
358
- clear_memory() # ์ฒ˜๋ฆฌ ์ „ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
359
  if img is None or prompt.strip() == "":
360
  raise gr.Error("Please provide both image and prompt")
361
 
362
  print(f"Processing with position: {position}, scale: {scale_percent}")
363
 
364
  try:
365
- # ํ”„๋กฌํ”„ํŠธ ๋ฒˆ์—ญ ์‹œ๋„
366
  prompt = translate_to_english(prompt)
367
  if bg_prompt:
368
  bg_prompt = translate_to_english(bg_prompt)
369
  except Exception as e:
370
  print(f"Translation error (continuing with original text): {str(e)}")
371
 
372
- # Process the image
373
  results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
374
 
375
  if bg_prompt:
@@ -390,9 +363,8 @@ def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
390
  except Exception as e:
391
  print(f"Error in process_prompt: {str(e)}")
392
  raise gr.Error(str(e))
393
-
394
  finally:
395
- clear_memory() # ์ฒ˜๋ฆฌ ํ›„ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
396
 
397
  def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
398
  try:
 
18
  from refiners.solutions import BoxSegmenter
19
  from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
20
  from diffusers import FluxPipeline
 
 
21
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 
22
  import gc
 
23
 
24
  def clear_memory():
25
  """๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ ํ•จ์ˆ˜"""
 
 
 
26
  gc.collect()
27
+ if torch.cuda.is_available():
28
+ try:
29
+ torch.cuda.empty_cache()
30
+ except:
31
+ pass
32
 
33
+ # GPU ์„ค์ •์„ try-except๋กœ ๊ฐ์‹ธ๊ธฐ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  if torch.cuda.is_available():
35
+ try:
36
+ torch.cuda.empty_cache()
37
+ torch.backends.cudnn.benchmark = True
38
+ torch.backends.cuda.matmul.allow_tf32 = True
39
+ except:
40
+ print("Warning: Could not configure CUDA settings")
41
 
42
+ # ๋ฒˆ์—ญ ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
43
  model_name = "Helsinki-NLP/opus-mt-ko-en"
44
  tokenizer = AutoTokenizer.from_pretrained(model_name)
45
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to('cpu')
46
+ translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
 
47
 
48
  def translate_to_english(text: str) -> str:
49
  """ํ•œ๊ธ€ ํ…์ŠคํŠธ๋ฅผ ์˜์–ด๋กœ ๋ฒˆ์—ญ"""
 
57
  print(f"Translation error: {str(e)}")
58
  return text
59
 
 
 
60
  BoundingBox = tuple[int, int, int, int]
61
 
62
  pillow_heif.register_heif_opener()
 
85
  gd_model = gd_model.to(device=device)
86
  assert isinstance(gd_model, GroundingDinoForObjectDetection)
87
 
 
 
88
  # FLUX ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
89
  pipe = FluxPipeline.from_pretrained(
90
  "black-forest-labs/FLUX.1-dev",
91
+ torch_dtype=torch.float16,
92
  use_auth_token=HF_TOKEN
93
  )
94
+ pipe.enable_attention_slicing(slice_size="auto")
95
 
96
  # LoRA ๊ฐ€์ค‘์น˜ ๋กœ๋“œ
97
  pipe.load_lora_weights(
 
103
  )
104
  pipe.fuse_lora(lora_scale=0.125)
105
 
106
+ # GPU ์„ค์ •์„ try-except๋กœ ๊ฐ์‹ธ๊ธฐ
107
+ try:
108
+ if torch.cuda.is_available():
109
+ pipe.to("cuda")
110
+ except:
111
+ print("Warning: Could not move pipeline to CUDA")
 
 
 
 
112
 
113
  class timer:
114
  def __init__(self, method_name="timed process"):
 
187
  width, height = calculate_dimensions(aspect_ratio)
188
  width, height = adjust_size_to_multiple_of_8(width, height)
189
 
 
190
  max_size = 768
191
  if width > max_size or height > max_size:
192
  ratio = max_size / max(width, height)
 
194
  height = int(height * ratio)
195
  width, height = adjust_size_to_multiple_of_8(width, height)
196
 
 
 
197
  with timer("Background generation"):
198
+ try:
199
+ with torch.inference_mode():
200
+ image = pipe(
201
+ prompt=prompt,
202
+ width=width,
203
+ height=height,
204
+ num_inference_steps=8,
205
+ guidance_scale=4.0,
206
+ max_length=77,
207
+ ).images[0]
208
+ except Exception as e:
209
+ print(f"Pipeline error: {str(e)}")
210
+ return Image.new('RGB', (width, height), 'white')
211
+
212
  return image
213
  except Exception as e:
214
  print(f"Background generation error: {str(e)}")
 
215
  return Image.new('RGB', (512, 512), 'white')
216
 
217
 
 
331
  aspect_ratio: str = "1:1", position: str = "bottom-center",
332
  scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
333
  try:
 
334
  if img is None or prompt.strip() == "":
335
  raise gr.Error("Please provide both image and prompt")
336
 
337
  print(f"Processing with position: {position}, scale: {scale_percent}")
338
 
339
  try:
 
340
  prompt = translate_to_english(prompt)
341
  if bg_prompt:
342
  bg_prompt = translate_to_english(bg_prompt)
343
  except Exception as e:
344
  print(f"Translation error (continuing with original text): {str(e)}")
345
 
 
346
  results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
347
 
348
  if bg_prompt:
 
363
  except Exception as e:
364
  print(f"Error in process_prompt: {str(e)}")
365
  raise gr.Error(str(e))
 
366
  finally:
367
+ clear_memory()
368
 
369
  def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
370
  try: