webtoon

Building

App Files Files Community

aiqtech commited on Dec 24, 2024

Commit

5b55adf

verified ·

1 Parent(s): 958fc4c

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -39

app.py CHANGED Viewed

@@ -37,37 +37,52 @@ def initialize_models():
     global segmenter, gd_model, gd_processor, pipe, translator
     try:
-        # CPU에서 실행되는 번역 모델
-        model = AutoModelForSeq2SeqLM.from_pretrained(
-            model_name,
-            low_cpu_mem_usage=True
         ).to('cpu')
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
-        # GroundingDINO 모델
-        gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
-        gd_model = GroundingDinoForObjectDetection.from_pretrained(
-            gd_model_path,
-            torch_dtype=torch.float16,
-            device_map=None  # device_map을 None으로 설정
         )
-        # Segmenter
-        segmenter = BoxSegmenter(device='cpu')
-        # FLUX 파이프라인
         pipe = FluxPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
             torch_dtype=torch.float16,
-            device_map=None,  # device_map을 None으로 설정
-            low_cpu_mem_usage=True
         )
-        pipe.enable_attention_slicing()
     except Exception as e:
         print(f"Model initialization error: {str(e)}")
         raise
 # GPU 설정
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 명시적으로 cuda:0 지정
@@ -374,26 +389,42 @@ def on_change_bbox(prompts: dict[str, Any] | None):
 def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
     return gr.update(interactive=bool(img and prompt))
-@spaces.GPU()
 def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
                   aspect_ratio: str = "1:1", position: str = "bottom-center",
                   scale_percent: float = 100, text_params: dict | None = None):
     try:
-        # GPU 설정
-        if torch.cuda.is_available():
-            device = torch.device('cuda')
-            # 모델들을 GPU로 이동
-            gd_model.to(device)
-            segmenter.to(device)
-            pipe.to(device)
-        else:
-            device = torch.device('cpu')
-        # 나머지 처리 로직...
     finally:
-        # GPU 메모리 정리
         if torch.cuda.is_available():
             try:
                 with torch.cuda.device('cuda'):
@@ -440,13 +471,7 @@ def update_box_button(img, box_input):
     except:
         return gr.update(interactive=False, variant="secondary")
-def process_image(img: Image.Image, max_size: int = 768) -> Image.Image:
-    """이미지 크기 최적화"""
-    if img.width > max_size or img.height > max_size:
-        ratio = max_size / max(img.width, img.height)
-        new_size = (int(img.width * ratio), int(img.height * ratio))
-        return img.resize(new_size, Image.LANCZOS)
-    return img
 # CSS 정의
 css = """
@@ -804,14 +829,15 @@ if __name__ == "__main__":
             queue=True
         )
-# Gradio 실행 설정 수정
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
     share=False,
-    max_threads=2,
     enable_queue=True,
     cache_examples=False,
     show_error=True,
-    show_tips=False
 )

     global segmenter, gd_model, gd_processor, pipe, translator
     try:
+        # 번역 모델 - 가벼운 버전 사용
+        model = AutoModelForSeq2SeqLength.from_pretrained(
+            model_name,
+            low_cpu_mem_usage=True,
+            torch_dtype=torch.float16
         ).to('cpu')
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
+        del model  # 명시적 메모리 해제
+        # GroundingDINO - 더 작은 모델 사용
+        gd_processor = GroundingDinoProcessor.from_pretrained(
+            "IDEA-Research/grounding-dino-base",  # 더 작은 base 모델
+            torch_dtype=torch.float16
         )
+        gd_model = None  # 필요할 때 로드
+        # Segmenter - 기본 설정
+        segmenter = None  # 필요할 때 로드
+        # FLUX 파이프라인 - 메모리 효율적 설정
         pipe = FluxPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
             torch_dtype=torch.float16,
+            low_cpu_mem_usage=True,
+            use_safetensors=True
         )
+        pipe.enable_attention_slicing(slice_size=1)
+        pipe.enable_sequential_cpu_offload()
     except Exception as e:
         print(f"Model initialization error: {str(e)}")
         raise
+def load_model_on_demand(model_type: str):
+    """필요할 때만 모델을 로드하는 함수"""
+    global gd_model, segmenter
+    if model_type == "gd" and gd_model is None:
+        gd_model = GroundingDinoForObjectDetection.from_pretrained(
+            "IDEA-Research/grounding-dino-base",
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
+        )
+    elif model_type == "segmenter" and segmenter is None:
+        segmenter = BoxSegmenter(device='cpu')
 # GPU 설정
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 명시적으로 cuda:0 지정
 def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
     return gr.update(interactive=bool(img and prompt))
+def process_image(img: Image.Image) -> Image.Image:
+    """이미지 전처리 최적화"""
+    # 최대 크기 제한
+    max_size = 512  # 더 작은 크기로 제한
+    if img.width > max_size or img.height > max_size:
+        ratio = max_size / max(img.width, img.height)
+        new_size = (int(img.width * ratio), int(img.height * ratio))
+        img = img.resize(new_size, Image.LANCZOS)
+    # 메모리 효율을 위한 이미지 모드 변환
+    if img.mode in ['RGBA', 'LA']:
+        background = Image.new('RGB', img.size, (255, 255, 255))
+        background.paste(img, mask=img.split()[-1])
+        img = background
+    return img
+@spaces.GPU(duration=15)  # 더 짧은 시간 제한
 def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
                   aspect_ratio: str = "1:1", position: str = "bottom-center",
                   scale_percent: float = 100, text_params: dict | None = None):
     try:
+        # 이미지 전처리
+        img = process_image(img)
+        # 필요한 모델만 로드
+        load_model_on_demand("gd")
+        load_model_on_demand("segmenter")
+        with torch.cuda.amp.autocast():  # 메모리 효율을 위한 mixed precision
+            # 처리 로직...
+            pass
     finally:
+        # 메모리 정리
+        clear_memory()
         if torch.cuda.is_available():
             try:
                 with torch.cuda.device('cuda'):
     except:
         return gr.update(interactive=False, variant="secondary")
 # CSS 정의
 css = """
             queue=True
         )
+demo.queue(max_size=1)  # 큐 크기 제한
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
     share=False,
+    max_threads=1,  # 스레드 수 제한
     enable_queue=True,
     cache_examples=False,
     show_error=True,
+    show_tips=False,
+    quiet=True
 )