aiqtech commited on
Commit
48af830
·
verified ·
1 Parent(s): 279d61b

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -968
app.py DELETED
@@ -1,968 +0,0 @@
1
- import tempfile
2
- import time
3
- from collections.abc import Sequence
4
- from typing import Any, cast
5
- import os
6
- from huggingface_hub import login, hf_hub_download
7
-
8
- import gradio as gr
9
- import numpy as np
10
- import pillow_heif
11
- import spaces
12
- import torch
13
- from gradio_image_annotation import image_annotator
14
- from gradio_imageslider import ImageSlider
15
- from PIL import Image
16
- from pymatting.foreground.estimate_foreground_ml import estimate_foreground_ml
17
- from refiners.fluxion.utils import no_grad
18
- from refiners.solutions import BoxSegmenter
19
- from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
20
- from diffusers import FluxPipeline
21
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
22
- import gc
23
-
24
- from PIL import Image, ImageDraw, ImageFont
25
- from PIL import Image
26
- from gradio_client import Client, handle_file
27
- import uuid
28
- import traceback # 상단에 추가
29
-
30
- def clear_memory():
31
- """메모리 정리 함수"""
32
- gc.collect()
33
- try:
34
- if torch.cuda.is_available():
35
- with torch.cuda.device(0): # 명시적으로 device 0 사용
36
- torch.cuda.empty_cache()
37
- except:
38
- pass
39
-
40
- # GPU 설정
41
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 명시적으로 cuda:0 지정
42
-
43
- # GPU 설정을 try-except로 감싸기
44
- if torch.cuda.is_available():
45
- try:
46
- with torch.cuda.device(0):
47
- torch.cuda.empty_cache()
48
- torch.backends.cudnn.benchmark = True
49
- torch.backends.cuda.matmul.allow_tf32 = True
50
- except:
51
- print("Warning: Could not configure CUDA settings")
52
-
53
- # 번역 모델 초기화
54
- model_name = "Helsinki-NLP/opus-mt-ko-en"
55
- tokenizer = AutoTokenizer.from_pretrained(model_name)
56
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to('cpu')
57
- translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
58
-
59
- def translate_to_english(text: str) -> str:
60
- """한글 텍스트를 영어로 번역"""
61
- try:
62
- if any(ord('가') <= ord(char) <= ord('힣') for char in text):
63
- translated = translator(text, max_length=128)[0]['translation_text']
64
- print(f"Translated '{text}' to '{translated}'")
65
- return translated
66
- return text
67
- except Exception as e:
68
- print(f"Translation error: {str(e)}")
69
- return text
70
-
71
- BoundingBox = tuple[int, int, int, int]
72
-
73
- pillow_heif.register_heif_opener()
74
- pillow_heif.register_avif_opener()
75
-
76
- # HF 토큰 설정
77
- HF_TOKEN = os.getenv("HF_TOKEN")
78
- if HF_TOKEN is None:
79
- raise ValueError("Please set the HF_TOKEN environment variable")
80
-
81
- try:
82
- login(token=HF_TOKEN)
83
- except Exception as e:
84
- raise ValueError(f"Failed to login to Hugging Face: {str(e)}")
85
-
86
- # 모델 초기화
87
- segmenter = BoxSegmenter(device="cpu")
88
- segmenter.device = device
89
- segmenter.model = segmenter.model.to(device=segmenter.device)
90
-
91
- gd_model_path = "IDEA-Research/grounding-dino-base"
92
- gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
93
- gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_dtype=torch.float32)
94
- gd_model = gd_model.to(device=device)
95
- assert isinstance(gd_model, GroundingDinoForObjectDetection)
96
-
97
- # FLUX 파이프라인 초기화
98
- pipe = FluxPipeline.from_pretrained(
99
- "black-forest-labs/FLUX.1-dev",
100
- torch_dtype=torch.float16,
101
- use_auth_token=HF_TOKEN
102
- )
103
- pipe.enable_attention_slicing(slice_size="auto")
104
-
105
- # LoRA 가중치 로드
106
- pipe.load_lora_weights(
107
- hf_hub_download(
108
- "ByteDance/Hyper-SD",
109
- "Hyper-FLUX.1-dev-8steps-lora.safetensors",
110
- use_auth_token=HF_TOKEN
111
- )
112
- )
113
- pipe.fuse_lora(lora_scale=0.125)
114
-
115
- # GPU 설정을 try-except로 감싸기
116
- try:
117
- if torch.cuda.is_available():
118
- pipe = pipe.to("cuda:0") # 명시적으로 cuda:0 지정
119
- except Exception as e:
120
- print(f"Warning: Could not move pipeline to CUDA: {str(e)}")
121
-
122
- client = Client("NabeelShar/BiRefNet_for_text_writing")
123
-
124
- class timer:
125
- def __init__(self, method_name="timed process"):
126
- self.method = method_name
127
- def __enter__(self):
128
- self.start = time.time()
129
- print(f"{self.method} starts")
130
- def __exit__(self, exc_type, exc_val, exc_tb):
131
- end = time.time()
132
- print(f"{self.method} took {str(round(end - self.start, 2))}s")
133
-
134
- def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
135
- if not bboxes:
136
- return None
137
- for bbox in bboxes:
138
- assert len(bbox) == 4
139
- assert all(isinstance(x, int) for x in bbox)
140
- return (
141
- min(bbox[0] for bbox in bboxes),
142
- min(bbox[1] for bbox in bboxes),
143
- max(bbox[2] for bbox in bboxes),
144
- max(bbox[3] for bbox in bboxes),
145
- )
146
-
147
- def corners_to_pixels_format(bboxes: torch.Tensor, width: int, height: int) -> torch.Tensor:
148
- x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
149
- return torch.stack((x1.clamp_(0, width), y1.clamp_(0, height), x2.clamp_(0, width), y2.clamp_(0, height)), dim=-1)
150
-
151
- def gd_detect(img: Image.Image, prompt: str) -> BoundingBox | None:
152
- inputs = gd_processor(images=img, text=f"{prompt}.", return_tensors="pt").to(device=device)
153
- with no_grad():
154
- outputs = gd_model(**inputs)
155
- width, height = img.size
156
- results: dict[str, Any] = gd_processor.post_process_grounded_object_detection(
157
- outputs,
158
- inputs["input_ids"],
159
- target_sizes=[(height, width)],
160
- )[0]
161
- assert "boxes" in results and isinstance(results["boxes"], torch.Tensor)
162
- bboxes = corners_to_pixels_format(results["boxes"].cpu(), width, height)
163
- return bbox_union(bboxes.numpy().tolist())
164
-
165
-
166
-
167
- def adjust_size_to_multiple_of_8(width: int, height: int) -> tuple[int, int]:
168
- """이미지 크기를 8의 배수로 조정하는 함수"""
169
- new_width = ((width + 7) // 8) * 8
170
- new_height = ((height + 7) // 8) * 8
171
- return new_width, new_height
172
-
173
- def calculate_dimensions(aspect_ratio: str, base_size: int = 512) -> tuple[int, int]:
174
- """선택된 비율에 따라 이미지 크기 계산"""
175
- if aspect_ratio == "1:1":
176
- return base_size, base_size
177
- elif aspect_ratio == "16:9":
178
- return base_size * 16 // 9, base_size
179
- elif aspect_ratio == "9:16":
180
- return base_size, base_size * 16 // 9
181
- elif aspect_ratio == "4:3":
182
- return base_size * 4 // 3, base_size
183
- return base_size, base_size
184
-
185
- @spaces.GPU(duration=20) # 40초에서 20초로 감소
186
- def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
187
- try:
188
- width, height = calculate_dimensions(aspect_ratio)
189
- width, height = adjust_size_to_multiple_of_8(width, height)
190
-
191
- max_size = 768
192
- if width > max_size or height > max_size:
193
- ratio = max_size / max(width, height)
194
- width = int(width * ratio)
195
- height = int(height * ratio)
196
- width, height = adjust_size_to_multiple_of_8(width, height)
197
-
198
- with timer("Background generation"):
199
- try:
200
- with torch.inference_mode():
201
- image = pipe(
202
- prompt=prompt,
203
- width=width,
204
- height=height,
205
- num_inference_steps=8,
206
- guidance_scale=4.0
207
- ).images[0]
208
- except Exception as e:
209
- print(f"Pipeline error: {str(e)}")
210
- return Image.new('RGB', (width, height), 'white')
211
-
212
- return image
213
- except Exception as e:
214
- print(f"Background generation error: {str(e)}")
215
- return Image.new('RGB', (512, 512), 'white')
216
-
217
- def create_position_grid():
218
- return """
219
- <div class="position-grid" style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; width: 150px; margin: auto;">
220
- <button class="position-btn" data-pos="top-left">↖</button>
221
- <button class="position-btn" data-pos="top-center">↑</button>
222
- <button class="position-btn" data-pos="top-right">↗</button>
223
- <button class="position-btn" data-pos="middle-left">←</button>
224
- <button class="position-btn" data-pos="middle-center">•</button>
225
- <button class="position-btn" data-pos="middle-right">→</button>
226
- <button class="position-btn" data-pos="bottom-left">↙</button>
227
- <button class="position-btn" data-pos="bottom-center" data-default="true">↓</button>
228
- <button class="position-btn" data-pos="bottom-right">↘</button>
229
- </div>
230
- """
231
-
232
- def calculate_object_position(position: str, bg_size: tuple[int, int], obj_size: tuple[int, int]) -> tuple[int, int]:
233
- """오브젝트의 위치 계산"""
234
- bg_width, bg_height = bg_size
235
- obj_width, obj_height = obj_size
236
-
237
- positions = {
238
- "top-left": (0, 0),
239
- "top-center": ((bg_width - obj_width) // 2, 0),
240
- "top-right": (bg_width - obj_width, 0),
241
- "middle-left": (0, (bg_height - obj_height) // 2),
242
- "middle-center": ((bg_width - obj_width) // 2, (bg_height - obj_height) // 2),
243
- "middle-right": (bg_width - obj_width, (bg_height - obj_height) // 2),
244
- "bottom-left": (0, bg_height - obj_height),
245
- "bottom-center": ((bg_width - obj_width) // 2, bg_height - obj_height),
246
- "bottom-right": (bg_width - obj_width, bg_height - obj_height)
247
- }
248
-
249
- return positions.get(position, positions["bottom-center"])
250
-
251
- def resize_object(image: Image.Image, scale_percent: float) -> Image.Image:
252
- """오브젝트 크기 조정"""
253
- width = int(image.width * scale_percent / 100)
254
- height = int(image.height * scale_percent / 100)
255
- return image.resize((width, height), Image.Resampling.LANCZOS)
256
-
257
-
258
-
259
- @spaces.GPU(duration=30) # 120초에서 30초로 감소
260
- def _gpu_process(img: Image.Image, prompt: str | BoundingBox | None) -> tuple[Image.Image, BoundingBox | None, list[str]]:
261
- time_log: list[str] = []
262
- try:
263
- if isinstance(prompt, str):
264
- t0 = time.time()
265
- bbox = gd_detect(img, prompt)
266
- time_log.append(f"detect: {time.time() - t0}")
267
- if not bbox:
268
- print(time_log[0])
269
- raise gr.Error("No object detected")
270
- else:
271
- bbox = prompt
272
- t0 = time.time()
273
- mask = segmenter(img, bbox)
274
- time_log.append(f"segment: {time.time() - t0}")
275
- return mask, bbox, time_log
276
- except Exception as e:
277
- print(f"GPU process error: {str(e)}")
278
- raise
279
-
280
- def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str | None = None, aspect_ratio: str = "1:1") -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
281
- try:
282
- # 입력 이미지 크기 제한
283
- max_size = 1024
284
- if img.width > max_size or img.height > max_size:
285
- ratio = max_size / max(img.width, img.height)
286
- new_size = (int(img.width * ratio), int(img.height * ratio))
287
- img = img.resize(new_size, Image.LANCZOS)
288
-
289
- # CUDA 메모리 관리 수정
290
- try:
291
- if torch.cuda.is_available():
292
- current_device = torch.cuda.current_device()
293
- with torch.cuda.device(current_device):
294
- torch.cuda.empty_cache()
295
- except Exception as e:
296
- print(f"CUDA memory management failed: {e}")
297
-
298
- with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
299
- mask, bbox, time_log = _gpu_process(img, prompt)
300
- masked_alpha = apply_mask(img, mask, defringe=True)
301
-
302
- if bg_prompt:
303
- background = generate_background(bg_prompt, aspect_ratio)
304
- combined = background
305
- else:
306
- combined = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)
307
-
308
- clear_memory()
309
-
310
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp:
311
- combined.save(temp.name)
312
- return (img, combined, masked_alpha), gr.DownloadButton(value=temp.name, interactive=True)
313
- except Exception as e:
314
- clear_memory()
315
- print(f"Processing error: {str(e)}")
316
- raise gr.Error(f"Processing failed: {str(e)}")
317
-
318
- def on_change_bbox(prompts: dict[str, Any] | None):
319
- return gr.update(interactive=prompts is not None)
320
-
321
-
322
- def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
323
- return gr.update(interactive=bool(img and prompt))
324
-
325
-
326
- def combine_with_background(foreground: Image.Image, background: Image.Image,
327
- position: str = "bottom-center", scale_percent: float = 100) -> Image.Image:
328
- """전경과 배경 합성 함수"""
329
- print(f"Combining with position: {position}, scale: {scale_percent}")
330
-
331
- # 배경 이미지를 RGBA 모드로 변환
332
- result = background.convert('RGBA')
333
-
334
- # 전경 이미지가 RGBA가 아니면 변환
335
- if foreground.mode != 'RGBA':
336
- foreground = foreground.convert('RGBA')
337
-
338
- # 스케일 조정
339
- scaled_foreground = resize_object(foreground, scale_percent)
340
-
341
- # 위치 계산
342
- x, y = calculate_object_position(position, result.size, scaled_foreground.size)
343
- print(f"Calculated position coordinates: ({x}, {y})")
344
-
345
- # 투명한 배경의 새 이미지 생성
346
- temp = Image.new('RGBA', result.size, (0, 0, 0, 0))
347
- temp.paste(scaled_foreground, (x, y), scaled_foreground)
348
-
349
- # 최종 합성
350
- result = Image.alpha_composite(result, temp)
351
-
352
- return result
353
-
354
- def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
355
- aspect_ratio: str = "1:1", position: str = "bottom-center",
356
- scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
357
- try:
358
- if img is None or prompt.strip() == "":
359
- raise gr.Error("Please provide both image and prompt")
360
-
361
- print(f"Processing with position: {position}, scale: {scale_percent}")
362
-
363
- try:
364
- prompt = translate_to_english(prompt)
365
- if bg_prompt:
366
- bg_prompt = translate_to_english(bg_prompt)
367
- except Exception as e:
368
- print(f"Translation error (continuing with original text): {str(e)}")
369
-
370
- results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
371
-
372
- # 추출된 이미지를 RGBA로 유지
373
- extracted = results[2].convert('RGBA')
374
-
375
- if bg_prompt:
376
- try:
377
- print(f"Using position: {position}")
378
- valid_positions = ["top-left", "top-center", "top-right",
379
- "middle-left", "middle-center", "middle-right",
380
- "bottom-left", "bottom-center", "bottom-right"]
381
- if position not in valid_positions:
382
- position = "bottom-center"
383
- print(f"Invalid position, using default: {position}")
384
-
385
- combined = combine_with_background(
386
- foreground=extracted,
387
- background=results[1],
388
- position=position,
389
- scale_percent=scale_percent
390
- )
391
-
392
- # 최종 출력을 위해 RGB로 변환
393
- return combined.convert('RGB'), extracted.convert('RGB')
394
- except Exception as e:
395
- print(f"Combination error: {str(e)}")
396
- return results[1].convert('RGB'), extracted.convert('RGB')
397
-
398
- # 배경이 없는 경우
399
- white_bg = Image.new('RGBA', extracted.size, (255, 255, 255, 255))
400
- combined = Image.alpha_composite(white_bg, extracted)
401
- return combined.convert('RGB'), extracted.convert('RGB')
402
-
403
- except Exception as e:
404
- print(f"Error in process_prompt: {str(e)}")
405
- raise gr.Error(str(e))
406
- finally:
407
- clear_memory()
408
-
409
- def apply_mask(img: Image.Image, mask_img: Image.Image, defringe: bool = True) -> Image.Image:
410
- """마스크 적용 함수"""
411
- assert img.size == mask_img.size
412
- img = img.convert("RGB")
413
- mask_img = mask_img.convert("L")
414
-
415
- if defringe:
416
- rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
417
- foreground = cast(np.ndarray[Any, np.dtype[np.uint8]], estimate_foreground_ml(rgb, alpha))
418
- img = Image.fromarray((foreground * 255).astype("uint8"))
419
-
420
- # 투명한 배경으로 결과 생성
421
- result = Image.new("RGBA", img.size, (0, 0, 0, 0))
422
- result.paste(img, (0, 0), mask_img)
423
-
424
- return result
425
-
426
-
427
- def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
428
- try:
429
- if img is None or box_input.strip() == "":
430
- raise gr.Error("Please provide both image and bounding box coordinates")
431
-
432
- try:
433
- coords = eval(box_input)
434
- if not isinstance(coords, list) or len(coords) != 4:
435
- raise ValueError("Invalid box format")
436
- bbox = tuple(int(x) for x in coords)
437
- except:
438
- raise gr.Error("Invalid box format. Please provide [xmin, ymin, xmax, ymax]")
439
-
440
- # Process the image
441
- results, _ = _process(img, bbox)
442
-
443
- # 합성된 이미지와 추출된 이미지만 반환
444
- return results[1], results[2]
445
- except Exception as e:
446
- raise gr.Error(str(e))
447
-
448
- # Event handler functions 수정
449
- def update_process_button(img, prompt):
450
- return gr.update(
451
- interactive=bool(img and prompt),
452
- variant="primary" if bool(img and prompt) else "secondary"
453
- )
454
-
455
- def update_box_button(img, box_input):
456
- try:
457
- if img and box_input:
458
- coords = eval(box_input)
459
- if isinstance(coords, list) and len(coords) == 4:
460
- return gr.update(interactive=True, variant="primary")
461
- return gr.update(interactive=False, variant="secondary")
462
- except:
463
- return gr.update(interactive=False, variant="secondary")
464
-
465
-
466
- css = """
467
- footer {display: none}
468
- .main-title {
469
- text-align: center;
470
- margin: 1em 0;
471
- padding: 1.5em;
472
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
473
- border-radius: 15px;
474
- box-shadow: 0 4px 6px rgba(0,0,0,0.1);
475
- }
476
- .main-title h1 {
477
- color: #2196F3;
478
- font-size: 2.8em;
479
- margin-bottom: 0.3em;
480
- font-weight: 700;
481
- }
482
- .main-title p {
483
- color: #555;
484
- font-size: 1.3em;
485
- line-height: 1.4;
486
- }
487
- .container {
488
- max-width: 1200px;
489
- margin: auto;
490
- padding: 20px;
491
- }
492
- .input-panel, .output-panel {
493
- background: white;
494
- padding: 1.5em;
495
- border-radius: 12px;
496
- box-shadow: 0 2px 8px rgba(0,0,0,0.08);
497
- margin-bottom: 1em;
498
- }
499
- .controls-panel {
500
- background: #f8f9fa;
501
- padding: 1em;
502
- border-radius: 8px;
503
- margin: 1em 0;
504
- }
505
- .image-display {
506
- min-height: 512px;
507
- display: flex;
508
- align-items: center;
509
- justify-content: center;
510
- background: #fafafa;
511
- border-radius: 8px;
512
- margin: 1em 0;
513
- }
514
- .example-section {
515
- text-align: center;
516
- padding: 2em;
517
- background: #f5f5f5;
518
- border-radius: 12px;
519
- margin-top: 2em;
520
- }
521
- .example-section img {
522
- max-width: 100%;
523
- border-radius: 8px;
524
- box-shadow: 0 4px 8px rgba(0,0,0,0.1);
525
- }
526
- .accordion {
527
- border: 1px solid #e0e0e0;
528
- border-radius: 8px;
529
- margin: 1em 0;
530
- }
531
- .accordion-header {
532
- padding: 1em;
533
- background: #f5f5f5;
534
- cursor: pointer;
535
- }
536
- .accordion-content {
537
- padding: 1em;
538
- display: none;
539
- }
540
- .accordion.open .accordion-content {
541
- display: block;
542
- }
543
- .position-grid {
544
- display: grid;
545
- grid-template-columns: repeat(3, 1fr);
546
- gap: 8px;
547
- margin: 1em 0;
548
- }
549
-
550
-
551
- .position-btn {
552
- padding: 10px;
553
- border: 1px solid #ddd;
554
- border-radius: 4px;
555
- background: white;
556
- cursor: pointer;
557
- transition: all 0.3s ease;
558
- width: 40px;
559
- height: 40px;
560
- display: flex;
561
- align-items: center;
562
- justify-content: center;
563
- }
564
-
565
- .position-btn:hover {
566
- background: #e3f2fd;
567
- }
568
-
569
- .position-btn.selected {
570
- background-color: #2196F3;
571
- color: white;
572
- border-color: #1976D2;
573
- }
574
- """
575
-
576
-
577
- def add_text_with_stroke(draw, text, x, y, font, text_color, stroke_width):
578
- """Helper function to draw text with stroke"""
579
- # Draw the stroke/outline
580
- for adj_x in range(-stroke_width, stroke_width + 1):
581
- for adj_y in range(-stroke_width, stroke_width + 1):
582
- draw.text((x + adj_x, y + adj_y), text, font=font, fill=text_color)
583
-
584
- def remove_background(image):
585
- # Save the image to a specific location
586
- filename = f"image_{uuid.uuid4()}.png" # Generates a universally unique identifier (UUID) for the filename
587
- image.save(filename)
588
- # Call gradio client for background removal
589
- result = client.predict(images=handle_file(filename), api_name="/image")
590
- return Image.open(result[0])
591
-
592
- def superimpose(image_with_text, overlay_image):
593
- # Open image as RGBA to handle transparency
594
- overlay_image = overlay_image.convert("RGBA")
595
- # Paste overlay on the background
596
- image_with_text.paste(overlay_image, (0, 0), overlay_image)
597
- # Save the final image
598
- # image_with_text.save("output_image.png")
599
- return image_with_text
600
-
601
-
602
- def add_text_to_image(
603
- input_image,
604
- text,
605
- font_size,
606
- color,
607
- opacity,
608
- x_position,
609
- y_position,
610
- thickness,
611
- text_position_type,
612
- font_choice
613
- ):
614
- """
615
- Add text to an image with customizable properties
616
- """
617
- try:
618
- if input_image is None or text.strip() == "":
619
- return input_image
620
-
621
- # PIL Image 객체로 변환
622
- if isinstance(input_image, np.ndarray):
623
- image = Image.fromarray(input_image)
624
- elif isinstance(input_image, Image.Image):
625
- image = input_image.copy()
626
- else:
627
- print(f"Unexpected image type: {type(input_image)}")
628
- return input_image
629
-
630
- # 이미지를 RGBA 모드로 변환
631
- image = image.convert('RGBA')
632
- width, height = image.size
633
-
634
- # 폰트 설정
635
- try:
636
- font_path = {
637
- "Default": "DejaVuSans.ttf",
638
- "Korean Regular": "ko-Regular.ttf",
639
- "Korean Son": "ko-son.ttf"
640
- }.get(font_choice, "DejaVuSans.ttf")
641
- font = ImageFont.truetype(font_path, int(font_size))
642
- except Exception as e:
643
- print(f"Font error: {str(e)}, using default")
644
- font = ImageFont.load_default()
645
-
646
- # 색상 설정
647
- rgb_color = {
648
- 'White': (255, 255, 255),
649
- 'Black': (0, 0, 0),
650
- 'Red': (255, 0, 0),
651
- 'Green': (0, 255, 0),
652
- 'Blue': (0, 0, 255),
653
- 'Yellow': (255, 255, 0),
654
- 'Purple': (128, 0, 128)
655
- }.get(color, (255, 255, 255))
656
-
657
- # 텍스트 크기 계산
658
- temp_draw = ImageDraw.Draw(Image.new('RGBA', (1, 1)))
659
- text_bbox = temp_draw.textbbox((0, 0), text, font=font)
660
- text_width = text_bbox[2] - text_bbox[0]
661
- text_height = text_bbox[3] - text_bbox[1]
662
-
663
- # 텍스트 위치 계산
664
- actual_x = int((width - text_width) * (x_position / 100))
665
- actual_y = int((height - text_height) * (y_position / 100))
666
-
667
- # 텍스트 색상 설정
668
- text_color = (*rgb_color, int(opacity))
669
-
670
- print(f"Processing {text_position_type}") # 디버깅
671
-
672
- if text_position_type == "Text Behind Image":
673
- print("Starting Text Behind Image process") # 디버깅
674
-
675
- # 1. 흰색 배경 생성
676
- background = Image.new('RGBA', (width, height), (255, 255, 255, 255))
677
-
678
- # 2. 텍스트 레이어 생성
679
- text_layer = Image.new('RGBA', (width, height), (0, 0, 0, 0))
680
- text_draw = ImageDraw.Draw(text_layer)
681
-
682
- # 3. 텍스트 그리기
683
- text_draw.text(
684
- (actual_x, actual_y),
685
- text,
686
- font=font,
687
- fill=text_color
688
- )
689
-
690
- # 4. 레이어 합성
691
- result = Image.alpha_composite(background, text_layer)
692
- result = Image.alpha_composite(result, image)
693
-
694
- print("Text Behind Image process completed") # 디버깅
695
- return result.convert('RGB')
696
-
697
- else: # Text Over Image
698
- print("Starting Text Over Image process") # 디버깅
699
-
700
- # 텍스트 오버레이 생성
701
- overlay = Image.new('RGBA', (width, height), (0, 0, 0, 0))
702
- draw = ImageDraw.Draw(overlay)
703
-
704
- # 텍스트 그리기
705
- draw.text(
706
- (actual_x, actual_y),
707
- text,
708
- font=font,
709
- fill=text_color
710
- )
711
-
712
- # 이미지 합성
713
- result = Image.alpha_composite(image, overlay)
714
-
715
- print("Text Over Image process completed") # 디버깅
716
- return result.convert('RGB')
717
-
718
- except Exception as e:
719
- print(f"Error in add_text_to_image: {str(e)}")
720
- traceback.print_exc()
721
- return input_image
722
-
723
-
724
- def update_position(new_position):
725
- """위치 업데이트 함수"""
726
- print(f"Position updated to: {new_position}")
727
- return new_position
728
-
729
- def update_controls(bg_prompt):
730
- """배경 프롬프트 입력 여부에 따라 컨트롤 표시 업데이트"""
731
- is_visible = bool(bg_prompt)
732
- return [
733
- gr.update(visible=is_visible), # aspect_ratio
734
- gr.update(visible=is_visible), # object_controls
735
- ]
736
-
737
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
738
- gr.HTML("""
739
- <div class="main-title">
740
- <h1>🎨 GiniGen Canvas-o3</h1>
741
- <p>Remove background of specified objects, generate new backgrounds, and insert text over or behind images with prompts.</p>
742
- </div>
743
- """)
744
-
745
- with gr.Row(equal_height=True):
746
- # 왼쪽 패널 (입력)
747
- with gr.Column(scale=1):
748
- with gr.Group(elem_classes="input-panel"):
749
- input_image = gr.Image(
750
- type="pil",
751
- label="Upload Image",
752
- interactive=True,
753
- height=400
754
- )
755
- text_prompt = gr.Textbox(
756
- label="Object to Extract",
757
- placeholder="Enter what you want to extract...",
758
- interactive=True
759
- )
760
- with gr.Row():
761
- bg_prompt = gr.Textbox(
762
- label="Background Prompt (optional)",
763
- placeholder="Describe the background...",
764
- interactive=True,
765
- scale=3
766
- )
767
- aspect_ratio = gr.Dropdown(
768
- choices=["1:1", "16:9", "9:16", "4:3"],
769
- value="1:1",
770
- label="Aspect Ratio",
771
- interactive=True,
772
- visible=True,
773
- scale=1
774
- )
775
-
776
- with gr.Group(elem_classes="controls-panel", visible=False) as object_controls:
777
- with gr.Column(scale=1):
778
- position = gr.State(value="bottom-center")
779
- with gr.Row():
780
- btn_top_left = gr.Button("↖", elem_classes="position-btn")
781
- btn_top_center = gr.Button("↑", elem_classes="position-btn")
782
- btn_top_right = gr.Button("↗", elem_classes="position-btn")
783
- with gr.Row():
784
- btn_middle_left = gr.Button("←", elem_classes="position-btn")
785
- btn_middle_center = gr.Button("•", elem_classes="position-btn")
786
- btn_middle_right = gr.Button("→", elem_classes="position-btn")
787
- with gr.Row():
788
- btn_bottom_left = gr.Button("↙", elem_classes="position-btn")
789
- btn_bottom_center = gr.Button("↓", elem_classes="position-btn", value="selected")
790
- btn_bottom_right = gr.Button("↘", elem_classes="position-btn")
791
- with gr.Column(scale=1):
792
- scale_slider = gr.Slider(
793
- minimum=10,
794
- maximum=200,
795
- value=50,
796
- step=5,
797
- label="Object Size (%)"
798
- )
799
-
800
- process_btn = gr.Button(
801
- "Process",
802
- variant="primary",
803
- interactive=False,
804
- size="lg"
805
- )
806
-
807
- # 오른쪽 패널 (출력)
808
- with gr.Column(scale=1):
809
- with gr.Group(elem_classes="output-panel"):
810
- with gr.Tab("Result"):
811
- combined_image = gr.Image(
812
- label="Combined Result",
813
- show_download_button=True,
814
- type="pil",
815
- height=400
816
- )
817
-
818
- with gr.Accordion("Text Insertion Options", open=False):
819
- with gr.Group():
820
- with gr.Row():
821
- text_input = gr.Textbox(
822
- label="Text Content",
823
- placeholder="Enter text to add..."
824
- )
825
- text_position_type = gr.Radio(
826
- choices=["Text Over Image", "Text Behind Image"],
827
- value="Text Over Image",
828
- label="Text Position"
829
- )
830
-
831
- with gr.Row():
832
- with gr.Column(scale=1):
833
- font_choice = gr.Dropdown(
834
- choices=["Default", "Korean Regular", "Korean Son"],
835
- value="Default",
836
- label="Font Selection",
837
- interactive=True
838
- )
839
- font_size = gr.Slider(
840
- minimum=10,
841
- maximum=200,
842
- value=40,
843
- step=5,
844
- label="Font Size"
845
- )
846
- color_dropdown = gr.Dropdown(
847
- choices=["White", "Black", "Red", "Green", "Blue", "Yellow", "Purple"],
848
- value="White",
849
- label="Text Color"
850
- )
851
- thickness = gr.Slider(
852
- minimum=0,
853
- maximum=10,
854
- value=1,
855
- step=1,
856
- label="Text Thickness"
857
- )
858
- with gr.Column(scale=1):
859
- opacity_slider = gr.Slider(
860
- minimum=0,
861
- maximum=255,
862
- value=255,
863
- step=1,
864
- label="Opacity"
865
- )
866
- x_position = gr.Slider(
867
- minimum=0,
868
- maximum=100,
869
- value=50,
870
- step=1,
871
- label="X Position (%)"
872
- )
873
- y_position = gr.Slider(
874
- minimum=0,
875
- maximum=100,
876
- value=50,
877
- step=1,
878
- label="Y Position (%)"
879
- )
880
- add_text_btn = gr.Button("Apply Text", variant="primary")
881
-
882
- extracted_image = gr.Image(
883
- label="Extracted Object",
884
- show_download_button=True,
885
- type="pil",
886
- height=200
887
- )
888
-
889
- # 이벤트 바인딩
890
- position_mapping = {
891
- btn_top_left: "top-left",
892
- btn_top_center: "top-center",
893
- btn_top_right: "top-right",
894
- btn_middle_left: "middle-left",
895
- btn_middle_center: "middle-center",
896
- btn_middle_right: "middle-right",
897
- btn_bottom_left: "bottom-left",
898
- btn_bottom_center: "bottom-center",
899
- btn_bottom_right: "bottom-right"
900
- }
901
-
902
- for btn, pos in position_mapping.items():
903
- btn.click(
904
- fn=lambda pos=pos: update_position(pos),
905
- outputs=position
906
- )
907
-
908
- bg_prompt.change(
909
- fn=update_controls,
910
- inputs=bg_prompt,
911
- outputs=[aspect_ratio, object_controls],
912
- queue=False
913
- )
914
-
915
- input_image.change(
916
- fn=update_process_button,
917
- inputs=[input_image, text_prompt],
918
- outputs=process_btn,
919
- queue=False
920
- )
921
-
922
- text_prompt.change(
923
- fn=update_process_button,
924
- inputs=[input_image, text_prompt],
925
- outputs=process_btn,
926
- queue=False
927
- )
928
-
929
- process_btn.click(
930
- fn=process_prompt,
931
- inputs=[
932
- input_image,
933
- text_prompt,
934
- bg_prompt,
935
- aspect_ratio,
936
- position,
937
- scale_slider
938
- ],
939
- outputs=[combined_image, extracted_image],
940
- queue=True
941
- )
942
-
943
- add_text_btn.click(
944
- fn=add_text_to_image,
945
- inputs=[
946
- combined_image,
947
- text_input,
948
- font_size,
949
- color_dropdown,
950
- opacity_slider,
951
- x_position,
952
- y_position,
953
- thickness,
954
- text_position_type,
955
- font_choice
956
- ],
957
- outputs=combined_image,
958
- queue=True
959
- )
960
-
961
- # 간단한 큐 설정으로 변경
962
- demo.queue()
963
- demo.launch(
964
- server_name="0.0.0.0",
965
- server_port=7860,
966
- share=False
967
- )
968
-