webtoon

Building

App Files Files Community

aiqtech commited on Dec 24, 2024

Commit

df8be8a

verified ·

1 Parent(s): 76ce631

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -400

app.py CHANGED Viewed

@@ -20,83 +20,20 @@ from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
 from diffusers import FluxPipeline
 from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import gc
-from PIL import Image, ImageDraw, ImageFont
 def clear_memory():
     """메모리 정리 함수"""
     gc.collect()
-    if torch.cuda.is_available():
-        try:
-            with torch.cuda.device('cuda'):
-                torch.cuda.empty_cache()
-        except Exception as e:
-            print(f"GPU memory management warning: {e}")
-def initialize_models():
-    """모델 초기화 함수"""
-    global segmenter, gd_model, gd_processor, pipe, translator
     try:
-        # 번역 모델 - 가벼운 버전 사용
-        model = AutoModelForSeq2SeqLM.from_pretrained(  # 수정된 부분
-            model_name,
-            low_cpu_mem_usage=True,
-            torch_dtype=torch.float16
-        ).to('cpu')
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
-        del model  # 명시적 메모리 해제
-        # GroundingDINO - 더 작은 모델 사용
-        gd_processor = GroundingDinoProcessor.from_pretrained(
-            "IDEA-Research/grounding-dino-base",  # 더 작은 base 모델
-            torch_dtype=torch.float16
-        )
-        gd_model = None  # 필요할 때 로드
-        # Segmenter - 기본 설정
-        segmenter = None  # 필요할 때 로드
-        # FLUX 파이프라인 - 메모리 효율적 설정
-        pipe = FluxPipeline.from_pretrained(
-            "black-forest-labs/FLUX.1-dev",
-            torch_dtype=torch.float16,
-            low_cpu_mem_usage=True,
-            use_safetensors=True
-        )
-        pipe.enable_attention_slicing(slice_size=1)
-        pipe.enable_sequential_cpu_offload()
-    except Exception as e:
-        print(f"Model initialization error: {str(e)}")
-        raise
-def load_model_on_demand(model_type: str):
-    """필요할 때만 모델을 로드하는 함수"""
-    global gd_model, segmenter
-    if model_type == "gd" and gd_model is None:
-        gd_model = GroundingDinoForObjectDetection.from_pretrained(
-            "IDEA-Research/grounding-dino-base",
-            torch_dtype=torch.float16,
-            low_cpu_mem_usage=True
-        )
-    elif model_type == "segmenter" and segmenter is None:
-        segmenter = BoxSegmenter(device='cpu')
 # GPU 설정
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 명시적으로 cuda:0 지정
-# 전역 설정
-torch.backends.cudnn.benchmark = False
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.set_float32_matmul_precision('medium')
-# 캐시 크기 제한
-os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
-os.environ['HF_HOME'] = '/tmp/hf_home'
-os.environ['TORCH_HOME'] = '/tmp/torch_home'
 # GPU 설정을 try-except로 감싸기
 if torch.cuda.is_available():
     try:
@@ -248,29 +185,32 @@ def calculate_dimensions(aspect_ratio: str, base_size: int = 512) -> tuple[int,
         return base_size * 4 // 3, base_size
     return base_size, base_size
-@spaces.GPU(duration=20)
 def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
     try:
         width, height = calculate_dimensions(aspect_ratio)
         width, height = adjust_size_to_multiple_of_8(width, height)
-        # 최대 크기 제한
-        max_size = 512  # 768에서 512로 감소
         if width > max_size or height > max_size:
             ratio = max_size / max(width, height)
             width = int(width * ratio)
             height = int(height * ratio)
             width, height = adjust_size_to_multiple_of_8(width, height)
-        with torch.inference_mode():
-            image = pipe(
-                prompt=prompt,
-                width=width,
-                height=height,
-                num_inference_steps=4,  # 8에서 4로 감소
-                guidance_scale=4.0,
-                batch_size=1
-            ).images[0]
         return image
     except Exception as e:
@@ -399,50 +339,47 @@ def on_change_bbox(prompts: dict[str, Any] | None):
 def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
     return gr.update(interactive=bool(img and prompt))
-def process_image(img: Image.Image) -> Image.Image:
-    """이미지 전처리 최적화"""
-    # 최대 크기 제한
-    max_size = 512  # 더 작은 크기로 제한
-    if img.width > max_size or img.height > max_size:
-        ratio = max_size / max(img.width, img.height)
-        new_size = (int(img.width * ratio), int(img.height * ratio))
-        img = img.resize(new_size, Image.LANCZOS)
-    # 메모리 효율을 위한 이미지 모드 변환
-    if img.mode in ['RGBA', 'LA']:
-        background = Image.new('RGB', img.size, (255, 255, 255))
-        background.paste(img, mask=img.split()[-1])
-        img = background
-    return img
-@spaces.GPU(duration=15)  # 더 짧은 시간 제한
 def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
                   aspect_ratio: str = "1:1", position: str = "bottom-center",
-                  scale_percent: float = 100, text_params: dict | None = None):
     try:
-        # 이미지 전처리
-        img = process_image(img)
-        # 필요한 모델만 로드
-        load_model_on_demand("gd")
-        load_model_on_demand("segmenter")
-        with torch.cuda.amp.autocast():  # 메모리 효율을 위한 mixed precision
-            # 처리 로직...
-            pass
-    finally:
-        # 메모리 정리
-        clear_memory()
-        if torch.cuda.is_available():
             try:
-                with torch.cuda.device('cuda'):
-                    torch.cuda.empty_cache()
             except Exception as e:
-                print(f"GPU cleanup warning: {e}")
 def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
     try:
         if img is None or box_input.strip() == "":
@@ -482,7 +419,6 @@ def update_box_button(img, box_input):
         return gr.update(interactive=False, variant="secondary")
 # CSS 정의
 css = """
 footer {display: none}
@@ -546,234 +482,99 @@ button.primary:hover {
 }
 """
-def add_text_with_stroke(draw, text, x, y, font, text_color, stroke_width):
-    """텍스트에 외곽선을 추가하는 헬퍼 함수"""
-    for adj_x in range(-stroke_width, stroke_width + 1):
-        for adj_y in range(-stroke_width, stroke_width + 1):
-            draw.text((x + adj_x, y + adj_y), text, font=font, fill=text_color)
-def add_text_to_image(image, text_params):
-    """이미지에 텍스트를 추가하는 함수"""
-    if not text_params.get('text'):
-        return image
-    if image.mode != 'RGBA':
-        image = image.convert('RGBA')
-    txt_overlay = Image.new('RGBA', image.size, (255, 255, 255, 0))
-    draw = ImageDraw.Draw(txt_overlay)
-    try:
-        font = ImageFont.truetype("DejaVuSans.ttf", text_params['font_size'])
-    except:
-        try:
-            font = ImageFont.truetype("arial.ttf", text_params['font_size'])
-        except:
-            font = ImageFont.load_default()
-    color_map = {
-        'White': (255, 255, 255),
-        'Black': (0, 0, 0),
-        'Red': (255, 0, 0),
-        'Green': (0, 255, 0),
-        'Blue': (0, 0, 255),
-        'Yellow': (255, 255, 0),
-        'Purple': (128, 0, 128)
-    }
-    rgb_color = color_map.get(text_params['color'], (255, 255, 255))
-    text_color = (*rgb_color, text_params['opacity'])
-    text_bbox = draw.textbbox((0, 0), text_params['text'], font=font)
-    text_width = text_bbox[2] - text_bbox[0]
-    text_height = text_bbox[3] - text_bbox[1]
-    x = int((image.width - text_width) * (text_params['x_position'] / 100))
-    y = int((image.height - text_height) * (text_params['y_position'] / 100))
-    add_text_with_stroke(
-        draw,
-        text_params['text'],
-        x,
-        y,
-        font,
-        text_color,
-        text_params['thickness']
-    )
-    return Image.alpha_composite(image, txt_overlay)
-def update_controls(bg_prompt):
-    """배경 프롬프트 입력 여부에 따라 컨트롤 표시 업데이트"""
-    is_visible = bool(bg_prompt)
-    return [
-        gr.update(visible=True),  # aspect_ratio는 항상 표시
-        gr.update(visible=is_visible)  # object_controls
-    ]
-def update_process_button(img, prompt):
-    """프로세스 버튼 상태 업데이트"""
-    return gr.update(
-        interactive=bool(img and prompt),
-        variant="primary" if bool(img and prompt) else "secondary"
-    )
-if __name__ == "__main__":
-    # CUDA 설정
-    if torch.cuda.is_available():
-        try:
-            torch.cuda.set_device('cuda:0')  # 명시적으로 cuda:0 설정
-            torch.backends.cudnn.benchmark = True
-            torch.backends.cuda.matmul.allow_tf32 = True
-        except Exception as e:
-            print(f"CUDA setup warning: {e}")
-    if HF_TOKEN:
-        login(token=HF_TOKEN, add_to_git_credential=False)
-    # 모델 초기화
-    initialize_models()
-    # Gradio UI 정의
-    with gr.Blocks(
-        theme=gr.themes.Soft(),
-        css=css,
-        analytics_enabled=False,
-        cache_examples=False
-    ) as demo:
-        # HTML 헤더
-        gr.HTML("""
-            <div class="main-title">
-                <h1>🎨GiniGen Canvas</h1>
-                <p>AI Integrated Image Creator: Extract objects, generate backgrounds, and adjust ratios and positions to create complete images with AI.</p>
-            </div>
-        """)
-        with gr.Row():
-            # 입력 컬럼
-            with gr.Column(scale=1):
-                input_image = gr.Image(
-                    type="pil",
-                    label="Upload Image",
-                    interactive=True
                 )
-                text_prompt = gr.Textbox(
-                    label="Object to Extract",
-                    placeholder="Enter what you want to extract...",
-                    interactive=True
                 )
-                # 배경 및 비율 설정
-                with gr.Row():
-                    bg_prompt = gr.Textbox(
-                        label="Background Prompt (optional)",
-                        placeholder="Describe the background...",
-                        interactive=True,
-                        scale=3
-                    )
-                    aspect_ratio = gr.Dropdown(
-                        choices=["1:1", "16:9", "9:16", "4:3"],
-                        value="1:1",
-                        label="Aspect Ratio",
-                        interactive=True,
-                        visible=True,
-                        scale=1
-                    )
-                # 오브젝트 컨트롤
-                with gr.Row(visible=False) as object_controls:
-                    # 위치 컨트롤
-                    with gr.Column(scale=1):
                         position = gr.State(value="bottom-center")
-                        with gr.Row():
-                            btn_top_left = gr.Button("↖")
-                            btn_top_center = gr.Button("↑")
-                            btn_top_right = gr.Button("↗")
-                        with gr.Row():
-                            btn_middle_left = gr.Button("←")
-                            btn_middle_center = gr.Button("•")
-                            btn_middle_right = gr.Button("→")
-                        with gr.Row():
-                            btn_bottom_left = gr.Button("↙")
-                            btn_bottom_center = gr.Button("↓")
-                            btn_bottom_right = gr.Button("↘")
-                    # 크기 컨트롤
-                    with gr.Column(scale=1):
-                        scale_slider = gr.Slider(
-                            minimum=10,
-                            maximum=200,
-                            value=50,
-                            step=5,
-                            label="Object Size (%)"
-                        )
-                # 텍스트 입력 섹션
-                with gr.Group() as text_group:
-                    text_input = gr.Textbox(
-                        label="Text to Add",
-                        placeholder="Enter text..."
-                    )
                     with gr.Row():
-                        with gr.Column(scale=1):
-                            font_size = gr.Slider(
-                                minimum=10,
-                                maximum=800,
-                                value=400,
-                                step=10,
-                                label="Font Size"
-                            )
-                            thickness = gr.Slider(
-                                minimum=0,
-                                maximum=20,
-                                value=0,
-                                step=1,
-                                label="Text Thickness"
-                            )
-                            color_dropdown = gr.Dropdown(
-                                choices=["White", "Black", "Red", "Green", "Blue", "Yellow", "Purple"],
-                                value="White",
-                                label="Text Color"
-                            )
-                        with gr.Column(scale=1):
-                            opacity_slider = gr.Slider(
-                                minimum=0,
-                                maximum=255,
-                                value=255,
-                                step=1,
-                                label="Opacity"
-                            )
-                            text_x_position = gr.Slider(
-                                minimum=0,
-                                maximum=100,
-                                value=50,
-                                step=1,
-                                label="Text X Position (%)"
-                            )
-                            text_y_position = gr.Slider(
-                                minimum=0,
-                                maximum=100,
-                                value=50,
-                                step=1,
-                                label="Text Y Position (%)"
-                            )
-                # 처리 버튼
-                process_btn = gr.Button(
-                    "Process",
-                    variant="primary",
-                    interactive=False
-                )
-            # 출력 컬럼
-            with gr.Column(scale=1):
                 combined_image = gr.Image(
                     label="Combined Result",
                     show_download_button=True,
                     type="pil",
                     height=512
                 )
                 extracted_image = gr.Image(
                     label="Extracted Object",
                     show_download_button=True,
@@ -781,73 +582,55 @@ if __name__ == "__main__":
                     height=256
                 )
-        # 텍스트 파라미터 가져오기 함수
-        def get_text_params():
-            return {
-                'text': text_input.value,
-                'font_size': font_size.value,
-                'thickness': thickness.value,
-                'color': color_dropdown.value,
-                'opacity': opacity_slider.value,
-                'x_position': text_x_position.value,
-                'y_position': text_y_position.value
-            }
-        # 이벤트 바인딩
-        input_image.change(
-            fn=update_process_button,
-            inputs=[input_image, text_prompt],
-            outputs=process_btn,
-            queue=False
-        )
-        text_prompt.change(
-            fn=update_process_button,
-            inputs=[input_image, text_prompt],
-            outputs=process_btn,
-            queue=False
-        )
-        bg_prompt.change(
-            fn=update_controls,
-            inputs=[bg_prompt],
-            outputs=[aspect_ratio, object_controls],
-            queue=False
-        )
-        # 위치 버튼 이벤트
-        for btn, pos in [
-            (btn_top_left, "top-left"), (btn_top_center, "top-center"), (btn_top_right, "top-right"),
-            (btn_middle_left, "middle-left"), (btn_middle_center, "middle-center"), (btn_middle_right, "middle-right"),
-            (btn_bottom_left, "bottom-left"), (btn_bottom_center, "bottom-center"), (btn_bottom_right, "bottom-right")
-        ]:
-            btn.click(fn=lambda p=pos: p, outputs=position)
-        # 메인 프로세스 이벤트
-        process_btn.click(
-            fn=process_prompt,
-            inputs=[
-                input_image,
-                text_prompt,
-                bg_prompt,
-                aspect_ratio,
-                position,
-                scale_slider,
-                gr.State(get_text_params)
-            ],
-            outputs=[combined_image, extracted_image],
-            queue=True
-        )
-demo.queue(max_size=1)  # 큐 크기 제한
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
     share=False,
-    max_threads=1,  # 스레드 수 제한
-    enable_queue=True,
-    cache_examples=False,
-    show_error=True,
-    show_tips=False,
-    quiet=True
-)

 from diffusers import FluxPipeline
 from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import gc
 def clear_memory():
     """메모리 정리 함수"""
     gc.collect()
     try:
+        if torch.cuda.is_available():
+            with torch.cuda.device(0):  # 명시적으로 device 0 사용
+                torch.cuda.empty_cache()
+    except:
+        pass
 # GPU 설정
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 명시적으로 cuda:0 지정
 # GPU 설정을 try-except로 감싸기
 if torch.cuda.is_available():
     try:
         return base_size * 4 // 3, base_size
     return base_size, base_size
+@spaces.GPU(duration=20)  # 40초에서 20초로 감소
 def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
     try:
         width, height = calculate_dimensions(aspect_ratio)
         width, height = adjust_size_to_multiple_of_8(width, height)
+        max_size = 768
         if width > max_size or height > max_size:
             ratio = max_size / max(width, height)
             width = int(width * ratio)
             height = int(height * ratio)
             width, height = adjust_size_to_multiple_of_8(width, height)
+        with timer("Background generation"):
+            try:
+                with torch.inference_mode():
+                    image = pipe(
+                        prompt=prompt,
+                        width=width,
+                        height=height,
+                        num_inference_steps=8,
+                        guidance_scale=4.0
+                    ).images[0]
+            except Exception as e:
+                print(f"Pipeline error: {str(e)}")
+                return Image.new('RGB', (width, height), 'white')
         return image
     except Exception as e:
 def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
     return gr.update(interactive=bool(img and prompt))
 def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
                   aspect_ratio: str = "1:1", position: str = "bottom-center",
+                  scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
     try:
+        if img is None or prompt.strip() == "":
+            raise gr.Error("Please provide both image and prompt")
+        print(f"Processing with position: {position}, scale: {scale_percent}")
+        try:
+            prompt = translate_to_english(prompt)
+            if bg_prompt:
+                bg_prompt = translate_to_english(bg_prompt)
+        except Exception as e:
+            print(f"Translation error (continuing with original text): {str(e)}")
+        results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
+        if bg_prompt:
             try:
+                combined = combine_with_background(
+                    foreground=results[2],
+                    background=results[1],
+                    position=position,
+                    scale_percent=scale_percent
+                )
+                print(f"Combined image created with position: {position}")
+                return combined, results[2]
             except Exception as e:
+                print(f"Combination error: {str(e)}")
+                return results[1], results[2]
+        return results[1], results[2]
+    except Exception as e:
+        print(f"Error in process_prompt: {str(e)}")
+        raise gr.Error(str(e))
+    finally:
+        clear_memory()
 def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
     try:
         if img is None or box_input.strip() == "":
         return gr.update(interactive=False, variant="secondary")
 # CSS 정의
 css = """
 footer {display: none}
 }
 """
+# UI 구성
+# UI 구성 부분에서 process_btn을 위로 이동하고 position_grid.click 부분 제거
+# UI 구성
+with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+    gr.HTML("""
+        <div class="main-title">
+            <h1>🎨GiniGen Canvas</h1>
+            <p>AI Integrated Image Creator: Extract objects, generate backgrounds, and adjust ratios and positions to create complete images with AI.</p>
+        </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(
+                type="pil",
+                label="Upload Image",
+                interactive=True
+            )
+            text_prompt = gr.Textbox(
+                label="Object to Extract",
+                placeholder="Enter what you want to extract...",
+                interactive=True
+            )
+            with gr.Row():
+                bg_prompt = gr.Textbox(
+                    label="Background Prompt (optional)",
+                    placeholder="Describe the background...",
+                    interactive=True,
+                    scale=3
                 )
+                aspect_ratio = gr.Dropdown(
+                    choices=["1:1", "16:9", "9:16", "4:3"],
+                    value="1:1",
+                    label="Aspect Ratio",
+                    interactive=True,
+                    visible=True,
+                    scale=1
                 )
+            with gr.Row(visible=False) as object_controls:
+                with gr.Column(scale=1):
+                    with gr.Row():
                         position = gr.State(value="bottom-center")
+                        btn_top_left = gr.Button("↖")
+                        btn_top_center = gr.Button("↑")
+                        btn_top_right = gr.Button("↗")
                     with gr.Row():
+                        btn_middle_left = gr.Button("←")
+                        btn_middle_center = gr.Button("•")
+                        btn_middle_right = gr.Button("→")
+                    with gr.Row():
+                        btn_bottom_left = gr.Button("↙")
+                        btn_bottom_center = gr.Button("↓")
+                        btn_bottom_right = gr.Button("↘")
+                with gr.Column(scale=1):
+                    scale_slider = gr.Slider(
+                        minimum=10,
+                        maximum=200,
+                        value=50,
+                        step=5,
+                        label="Object Size (%)"
+                    )
+            process_btn = gr.Button(
+                "Process",
+                variant="primary",
+                interactive=False
+            )
+            # 각 버튼에 대한 클릭 이벤트 처리
+            def update_position(new_position):
+                return new_position
+            btn_top_left.click(fn=lambda: update_position("top-left"), outputs=position)
+            btn_top_center.click(fn=lambda: update_position("top-center"), outputs=position)
+            btn_top_right.click(fn=lambda: update_position("top-right"), outputs=position)
+            btn_middle_left.click(fn=lambda: update_position("middle-left"), outputs=position)
+            btn_middle_center.click(fn=lambda: update_position("middle-center"), outputs=position)
+            btn_middle_right.click(fn=lambda: update_position("middle-right"), outputs=position)
+            btn_bottom_left.click(fn=lambda: update_position("bottom-left"), outputs=position)
+            btn_bottom_center.click(fn=lambda: update_position("bottom-center"), outputs=position)
+            btn_bottom_right.click(fn=lambda: update_position("bottom-right"), outputs=position)
+        with gr.Column(scale=1):
+            with gr.Row():
                 combined_image = gr.Image(
                     label="Combined Result",
                     show_download_button=True,
                     type="pil",
                     height=512
                 )
+            with gr.Row():
                 extracted_image = gr.Image(
                     label="Extracted Object",
                     show_download_button=True,
                     height=256
                 )
+    # Event bindings
+    input_image.change(
+        fn=update_process_button,
+        inputs=[input_image, text_prompt],
+        outputs=process_btn,
+        queue=False
+    )
+    text_prompt.change(
+        fn=update_process_button,
+        inputs=[input_image, text_prompt],
+        outputs=process_btn,
+        queue=False
+    )
+    def update_controls(bg_prompt):
+        """배경 프롬프트 입력 여부에 따라 컨트롤 표시 업데이트"""
+        is_visible = bool(bg_prompt)
+        return [
+            gr.update(visible=is_visible),  # aspect_ratio
+            gr.update(visible=is_visible),  # object_controls
+        ]
+    bg_prompt.change(
+        fn=update_controls,
+        inputs=bg_prompt,
+        outputs=[aspect_ratio, object_controls],
+        queue=False
+    )
+    process_btn.click(
+        fn=process_prompt,
+        inputs=[
+            input_image,
+            text_prompt,
+            bg_prompt,
+            aspect_ratio,
+            position,
+            scale_slider
+        ],
+        outputs=[combined_image, extracted_image],
+        queue=True
+    )
+demo.queue(max_size=5)  # 큐 크기 제한
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
     share=False,
+    max_threads=2  # 스레드 수 제한
+)