canvas-studio

Runtime error

App Files Files Community

ginipick commited on Dec 10, 2024

Commit

9a8a6d5

verified ·

1 Parent(s): 8c99ff5

Update app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +284 -66

app-backup.py CHANGED Viewed

@@ -18,14 +18,55 @@ from refiners.fluxion.utils import no_grad
 from refiners.solutions import BoxSegmenter
 from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
 from diffusers import FluxPipeline
 BoundingBox = tuple[int, int, int, int]
 pillow_heif.register_heif_opener()
 pillow_heif.register_avif_opener()
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # HF 토큰 설정
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
@@ -50,9 +91,12 @@ assert isinstance(gd_model, GroundingDinoForObjectDetection)
 # FLUX 파이프라인 초기화
 pipe = FluxPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
-    torch_dtype=torch.bfloat16,
     use_auth_token=HF_TOKEN
 )
 pipe.load_lora_weights(
     hf_hub_download(
         "ByteDance/Hyper-SD",
@@ -61,7 +105,13 @@ pipe.load_lora_weights(
     )
 )
 pipe.fuse_lora(lora_scale=0.125)
-pipe.to(device="cuda", dtype=torch.bfloat16)
 class timer:
     def __init__(self, method_name="timed process"):
@@ -135,80 +185,151 @@ def calculate_dimensions(aspect_ratio: str, base_size: int = 512) -> tuple[int,
         return base_size * 4 // 3, base_size
     return base_size, base_size
 def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
-    """배경 이미지 생성 함수"""
     try:
-        # 선택된 비율에 따라 크기 계산
         width, height = calculate_dimensions(aspect_ratio)
-        # 8의 배수로 조정
         width, height = adjust_size_to_multiple_of_8(width, height)
         with timer("Background generation"):
-            image = pipe(
-                prompt=prompt,
-                width=width,
-                height=height,
-                num_inference_steps=8,
-                guidance_scale=4.0,
-            ).images[0]
         return image
     except Exception as e:
-        raise gr.Error(f"Background generation failed: {str(e)}")
-def combine_with_background(foreground: Image.Image, background: Image.Image) -> Image.Image:
     """전경과 배경 합성 함수"""
-    background = background.resize(foreground.size)
-    return Image.alpha_composite(background.convert('RGBA'), foreground)
-@spaces.GPU
 def _gpu_process(img: Image.Image, prompt: str | BoundingBox | None) -> tuple[Image.Image, BoundingBox | None, list[str]]:
     time_log: list[str] = []
-    if isinstance(prompt, str):
         t0 = time.time()
-        bbox = gd_detect(img, prompt)
-        time_log.append(f"detect: {time.time() - t0}")
-        if not bbox:
-            print(time_log[0])
-            raise gr.Error("No object detected")
-    else:
-        bbox = prompt
-    t0 = time.time()
-    mask = segmenter(img, bbox)
-    time_log.append(f"segment: {time.time() - t0}")
-    return mask, bbox, time_log
 def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str | None = None, aspect_ratio: str = "1:1") -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
     try:
-        if img.width > 2048 or img.height > 2048:
-            orig_res = max(img.width, img.height)
-            img.thumbnail((2048, 2048))
-            if isinstance(prompt, tuple):
-                x0, y0, x1, y1 = (int(x * 2048 / orig_res) for x in prompt)
-                prompt = (x0, y0, x1, y1)
-        mask, bbox, time_log = _gpu_process(img, prompt)
-        masked_alpha = apply_mask(img, mask, defringe=True)
         if bg_prompt:
             background = generate_background(bg_prompt, aspect_ratio)
-            combined = combine_with_background(masked_alpha, background)
         else:
             combined = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)
-        thresholded = mask.point(lambda p: 255 if p > 10 else 0)
-        bbox = thresholded.getbbox()
-        to_dl = masked_alpha.crop(bbox)
-        temp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
-        to_dl.save(temp, format="PNG")
-        temp.close()
-        return (img, combined, masked_alpha), gr.DownloadButton(value=temp.name, interactive=True)
     except Exception as e:
         raise gr.Error(f"Processing failed: {str(e)}")
 def on_change_bbox(prompts: dict[str, Any] | None):
@@ -218,19 +339,47 @@ def on_change_bbox(prompts: dict[str, Any] | None):
 def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
     return gr.update(interactive=bool(img and prompt))
-def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None, aspect_ratio: str = "1:1") -> tuple[Image.Image, Image.Image]:
     try:
         if img is None or prompt.strip() == "":
             raise gr.Error("Please provide both image and prompt")
-        # Process the image
         results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
-        # 합성된 이미지와 추출된 이미지만 반환
         return results[1], results[2]
     except Exception as e:
         raise gr.Error(str(e))
 def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
     try:
         if img is None or box_input.strip() == "":
@@ -270,7 +419,7 @@ def update_box_button(img, box_input):
         return gr.update(interactive=False, variant="secondary")
-# 맨 앞부분에 CSS 정의 추가
 css = """
 footer {display: none}
 .main-title {
@@ -321,14 +470,27 @@ button.primary {
 button.primary:hover {
     background: #1976D2;
 }
 """
-# UI 부분 수정
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     gr.HTML("""
         <div class="main-title">
-            <h1>🎨 Image Object Extractor</h1>
-            <p>Extract objects from images using text prompts</p>
         </div>
     """)
@@ -359,12 +521,51 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
                     visible=True,
                     scale=1
                 )
             process_btn = gr.Button(
                 "Process",
                 variant="primary",
                 interactive=False
             )
         with gr.Column(scale=1):
             with gr.Row():
                 combined_image = gr.Image(
@@ -396,23 +597,40 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
         queue=False
     )
-    # bg_prompt가 비어있을 때 aspect_ratio를 비활성화하는 함수
-    def update_aspect_ratio(bg_prompt):
-        return gr.update(visible=bool(bg_prompt))
     bg_prompt.change(
-        fn=update_aspect_ratio,
         inputs=bg_prompt,
-        outputs=aspect_ratio,
         queue=False
     )
     process_btn.click(
         fn=process_prompt,
-        inputs=[input_image, text_prompt, bg_prompt, aspect_ratio],
         outputs=[combined_image, extracted_image],
         queue=True
     )
-demo.queue(max_size=30, api_open=False)
-demo.launch()

 from refiners.solutions import BoxSegmenter
 from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
 from diffusers import FluxPipeline
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+import gc
+def clear_memory():
+    """메모리 정리 함수"""
+    gc.collect()
+    try:
+        if torch.cuda.is_available():
+            with torch.cuda.device(0):  # 명시적으로 device 0 사용
+                torch.cuda.empty_cache()
+    except:
+        pass
+# GPU 설정
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 명시적으로 cuda:0 지정
+# GPU 설정을 try-except로 감싸기
+if torch.cuda.is_available():
+    try:
+        with torch.cuda.device(0):
+            torch.cuda.empty_cache()
+            torch.backends.cudnn.benchmark = True
+            torch.backends.cuda.matmul.allow_tf32 = True
+    except:
+        print("Warning: Could not configure CUDA settings")
+# 번역 모델 초기화
+model_name = "Helsinki-NLP/opus-mt-ko-en"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to('cpu')
+translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
+def translate_to_english(text: str) -> str:
+    """한글 텍스트를 영어로 번역"""
+    try:
+        if any(ord('가') <= ord(char) <= ord('힣') for char in text):
+            translated = translator(text, max_length=128)[0]['translation_text']
+            print(f"Translated '{text}' to '{translated}'")
+            return translated
+        return text
+    except Exception as e:
+        print(f"Translation error: {str(e)}")
+        return text
 BoundingBox = tuple[int, int, int, int]
 pillow_heif.register_heif_opener()
 pillow_heif.register_avif_opener()
 # HF 토큰 설정
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
 # FLUX 파이프라인 초기화
 pipe = FluxPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
+    torch_dtype=torch.float16,
     use_auth_token=HF_TOKEN
 )
+pipe.enable_attention_slicing(slice_size="auto")
+# LoRA 가중치 로드
 pipe.load_lora_weights(
     hf_hub_download(
         "ByteDance/Hyper-SD",
     )
 )
 pipe.fuse_lora(lora_scale=0.125)
+# GPU 설정을 try-except로 감싸기
+try:
+    if torch.cuda.is_available():
+        pipe = pipe.to("cuda:0")  # 명시적으로 cuda:0 지정
+except Exception as e:
+    print(f"Warning: Could not move pipeline to CUDA: {str(e)}")
 class timer:
     def __init__(self, method_name="timed process"):
         return base_size * 4 // 3, base_size
     return base_size, base_size
+@spaces.GPU(duration=20)  # 40초에서 20초로 감소
 def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
     try:
         width, height = calculate_dimensions(aspect_ratio)
         width, height = adjust_size_to_multiple_of_8(width, height)
+        max_size = 768
+        if width > max_size or height > max_size:
+            ratio = max_size / max(width, height)
+            width = int(width * ratio)
+            height = int(height * ratio)
+            width, height = adjust_size_to_multiple_of_8(width, height)
         with timer("Background generation"):
+            try:
+                with torch.inference_mode():
+                    image = pipe(
+                        prompt=prompt,
+                        width=width,
+                        height=height,
+                        num_inference_steps=8,
+                        guidance_scale=4.0
+                    ).images[0]
+            except Exception as e:
+                print(f"Pipeline error: {str(e)}")
+                return Image.new('RGB', (width, height), 'white')
         return image
     except Exception as e:
+        print(f"Background generation error: {str(e)}")
+        return Image.new('RGB', (512, 512), 'white')
+def create_position_grid():
+    return """
+    <div class="position-grid" style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; width: 150px; margin: auto;">
+        <button class="position-btn" data-pos="top-left">↖</button>
+        <button class="position-btn" data-pos="top-center">↑</button>
+        <button class="position-btn" data-pos="top-right">↗</button>
+        <button class="position-btn" data-pos="middle-left">←</button>
+        <button class="position-btn" data-pos="middle-center">•</button>
+        <button class="position-btn" data-pos="middle-right">→</button>
+        <button class="position-btn" data-pos="bottom-left">↙</button>
+        <button class="position-btn" data-pos="bottom-center" data-default="true">↓</button>
+        <button class="position-btn" data-pos="bottom-right">↘</button>
+    </div>
+    """
+def calculate_object_position(position: str, bg_size: tuple[int, int], obj_size: tuple[int, int]) -> tuple[int, int]:
+    """오브젝트의 위치 계산"""
+    bg_width, bg_height = bg_size
+    obj_width, obj_height = obj_size
+    positions = {
+        "top-left": (0, 0),
+        "top-center": ((bg_width - obj_width) // 2, 0),
+        "top-right": (bg_width - obj_width, 0),
+        "middle-left": (0, (bg_height - obj_height) // 2),
+        "middle-center": ((bg_width - obj_width) // 2, (bg_height - obj_height) // 2),
+        "middle-right": (bg_width - obj_width, (bg_height - obj_height) // 2),
+        "bottom-left": (0, bg_height - obj_height),
+        "bottom-center": ((bg_width - obj_width) // 2, bg_height - obj_height),
+        "bottom-right": (bg_width - obj_width, bg_height - obj_height)
+    }
+    return positions.get(position, positions["bottom-center"])
+def resize_object(image: Image.Image, scale_percent: float) -> Image.Image:
+    """오브젝트 크기 조정"""
+    width = int(image.width * scale_percent / 100)
+    height = int(image.height * scale_percent / 100)
+    return image.resize((width, height), Image.Resampling.LANCZOS)
+def combine_with_background(foreground: Image.Image, background: Image.Image,
+                          position: str = "bottom-center", scale_percent: float = 100) -> Image.Image:
     """전경과 배경 합성 함수"""
+    # 배경 이미지 준비
+    result = background.convert('RGBA')
+    # 오브젝트 크기 조정
+    scaled_foreground = resize_object(foreground, scale_percent)
+    # 오브젝트 위치 계산
+    x, y = calculate_object_position(position, result.size, scaled_foreground.size)
+    # 합성
+    result.paste(scaled_foreground, (x, y), scaled_foreground)
+    return result
+@spaces.GPU(duration=30)  # 120초에서 30초로 감소
 def _gpu_process(img: Image.Image, prompt: str | BoundingBox | None) -> tuple[Image.Image, BoundingBox | None, list[str]]:
     time_log: list[str] = []
+    try:
+        if isinstance(prompt, str):
+            t0 = time.time()
+            bbox = gd_detect(img, prompt)
+            time_log.append(f"detect: {time.time() - t0}")
+            if not bbox:
+                print(time_log[0])
+                raise gr.Error("No object detected")
+        else:
+            bbox = prompt
         t0 = time.time()
+        mask = segmenter(img, bbox)
+        time_log.append(f"segment: {time.time() - t0}")
+        return mask, bbox, time_log
+    except Exception as e:
+        print(f"GPU process error: {str(e)}")
+        raise
 def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str | None = None, aspect_ratio: str = "1:1") -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
     try:
+        # 입력 이미지 크기 제한
+        max_size = 1024
+        if img.width > max_size or img.height > max_size:
+            ratio = max_size / max(img.width, img.height)
+            new_size = (int(img.width * ratio), int(img.height * ratio))
+            img = img.resize(new_size, Image.LANCZOS)
+        # CUDA 메모리 관리 수정
+        try:
+            if torch.cuda.is_available():
+                current_device = torch.cuda.current_device()
+                with torch.cuda.device(current_device):
+                    torch.cuda.empty_cache()
+        except Exception as e:
+            print(f"CUDA memory management failed: {e}")
+        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
+            mask, bbox, time_log = _gpu_process(img, prompt)
+            masked_alpha = apply_mask(img, mask, defringe=True)
         if bg_prompt:
             background = generate_background(bg_prompt, aspect_ratio)
+            combined = background
         else:
             combined = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)
+        clear_memory()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp:
+            combined.save(temp.name)
+            return (img, combined, masked_alpha), gr.DownloadButton(value=temp.name, interactive=True)
     except Exception as e:
+        clear_memory()
+        print(f"Processing error: {str(e)}")
         raise gr.Error(f"Processing failed: {str(e)}")
 def on_change_bbox(prompts: dict[str, Any] | None):
 def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
     return gr.update(interactive=bool(img and prompt))
+def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
+                  aspect_ratio: str = "1:1", position: str = "bottom-center",
+                  scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
     try:
         if img is None or prompt.strip() == "":
             raise gr.Error("Please provide both image and prompt")
+        print(f"Processing with position: {position}, scale: {scale_percent}")
+        try:
+            prompt = translate_to_english(prompt)
+            if bg_prompt:
+                bg_prompt = translate_to_english(bg_prompt)
+        except Exception as e:
+            print(f"Translation error (continuing with original text): {str(e)}")
         results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
+        if bg_prompt:
+            try:
+                combined = combine_with_background(
+                    foreground=results[2],
+                    background=results[1],
+                    position=position,
+                    scale_percent=scale_percent
+                )
+                print(f"Combined image created with position: {position}")
+                return combined, results[2]
+            except Exception as e:
+                print(f"Combination error: {str(e)}")
+                return results[1], results[2]
         return results[1], results[2]
     except Exception as e:
+        print(f"Error in process_prompt: {str(e)}")
         raise gr.Error(str(e))
+    finally:
+        clear_memory()
 def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
     try:
         if img is None or box_input.strip() == "":
         return gr.update(interactive=False, variant="secondary")
+# CSS 정의
 css = """
 footer {display: none}
 .main-title {
 button.primary:hover {
     background: #1976D2;
 }
+.position-btn {
+    transition: all 0.3s ease;
+}
+.position-btn:hover {
+    background-color: #e3f2fd;
+}
+.position-btn.selected {
+    background-color: #2196F3;
+    color: white;
+}
 """
+# UI 구성
+# UI 구성 부분에서 process_btn을 위로 이동하고 position_grid.click 부분 제거
+# UI 구성
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     gr.HTML("""
         <div class="main-title">
+            <h1>🎨GiniGen Canvas</h1>
+            <p>AI Integrated Image Creator: Extract objects, generate backgrounds, and adjust ratios and positions to create complete images with AI.</p>
         </div>
     """)
                     visible=True,
                     scale=1
                 )
+            with gr.Row(visible=False) as object_controls:
+                with gr.Column(scale=1):
+                    with gr.Row():
+                        position = gr.State(value="bottom-center")
+                        btn_top_left = gr.Button("↖")
+                        btn_top_center = gr.Button("↑")
+                        btn_top_right = gr.Button("↗")
+                    with gr.Row():
+                        btn_middle_left = gr.Button("←")
+                        btn_middle_center = gr.Button("•")
+                        btn_middle_right = gr.Button("→")
+                    with gr.Row():
+                        btn_bottom_left = gr.Button("↙")
+                        btn_bottom_center = gr.Button("↓")
+                        btn_bottom_right = gr.Button("↘")
+                with gr.Column(scale=1):
+                    scale_slider = gr.Slider(
+                        minimum=10,
+                        maximum=200,
+                        value=50,
+                        step=5,
+                        label="Object Size (%)"
+                    )
             process_btn = gr.Button(
                 "Process",
                 variant="primary",
                 interactive=False
             )
+            # 각 버튼에 대한 클릭 이벤트 처리
+            def update_position(new_position):
+                return new_position
+            btn_top_left.click(fn=lambda: update_position("top-left"), outputs=position)
+            btn_top_center.click(fn=lambda: update_position("top-center"), outputs=position)
+            btn_top_right.click(fn=lambda: update_position("top-right"), outputs=position)
+            btn_middle_left.click(fn=lambda: update_position("middle-left"), outputs=position)
+            btn_middle_center.click(fn=lambda: update_position("middle-center"), outputs=position)
+            btn_middle_right.click(fn=lambda: update_position("middle-right"), outputs=position)
+            btn_bottom_left.click(fn=lambda: update_position("bottom-left"), outputs=position)
+            btn_bottom_center.click(fn=lambda: update_position("bottom-center"), outputs=position)
+            btn_bottom_right.click(fn=lambda: update_position("bottom-right"), outputs=position)
         with gr.Column(scale=1):
             with gr.Row():
                 combined_image = gr.Image(
         queue=False
     )
+    def update_controls(bg_prompt):
+        """배경 프롬프트 입력 여부에 따라 컨트롤 표시 업데이트"""
+        is_visible = bool(bg_prompt)
+        return [
+            gr.update(visible=is_visible),  # aspect_ratio
+            gr.update(visible=is_visible),  # object_controls
+        ]
     bg_prompt.change(
+        fn=update_controls,
         inputs=bg_prompt,
+        outputs=[aspect_ratio, object_controls],
         queue=False
     )
     process_btn.click(
         fn=process_prompt,
+        inputs=[
+            input_image,
+            text_prompt,
+            bg_prompt,
+            aspect_ratio,
+            position,
+            scale_slider
+        ],
         outputs=[combined_image, extracted_image],
         queue=True
     )
+demo.queue(max_size=10)  # 큐 크기 제한
+demo.launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    share=False,
+    max_threads=2  # 스레드 수 제한
+)