SORA-3D

Building

App Files Files Community

aiqtech commited on Dec 9, 2024

Commit

d88c997

verified ·

1 Parent(s): 1f3fd7c

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -93

app.py CHANGED Viewed

@@ -23,6 +23,7 @@ from typing import Tuple, Dict, Any  # Tuple import 추가
 import transformers
 from transformers import pipeline as transformers_pipeline
 from transformers import Pipeline
 # 전역 변수 초기화
 class GlobalVars:
@@ -81,7 +82,8 @@ torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.benchmark = True
 # 환경 변수 설정
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256,garbage_collection_threshold:0.8"
 os.environ['SPCONV_ALGO'] = 'native'
 os.environ['SPARSE_BACKEND'] = 'native'
 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
@@ -89,7 +91,6 @@ os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'
 os.environ['XFORMERS_ENABLE_FLASH_ATTENTION'] = '1'
 os.environ['TORCH_CUDA_MEMORY_ALLOCATOR'] = 'native'
 os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 # CUDA 초기화 방지
 torch.set_grad_enabled(False)
@@ -207,9 +208,7 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
         return None, None
     try:
-        # CUDA 메모리 초기화
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
@@ -232,88 +231,69 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
             image = image.resize(new_size, Image.LANCZOS)
             print(f"Resized image to: {image.size}")
-        # GPU 작업 시작
-        with torch.cuda.device(0):
-            try:
-                # 모델을 GPU로 이동
-                move_to_device(g.trellis_pipeline, 'cuda')
-                torch.cuda.synchronize()
-                with torch.inference_mode(), torch.cuda.amp.autocast():
-                    # 메모리 사용량 최적화를 위한 배치 크기 설정
-                    torch.cuda.set_per_process_memory_fraction(0.8)  # GPU 메모리 사용량 제한
-                    # 3D 생성
-                    outputs = g.trellis_pipeline.run(
-                        image,
-                        seed=seed,
-                        formats=["gaussian", "mesh"],
-                        preprocess_image=False,
-                        sparse_structure_sampler_params={
-                            "steps": min(ss_sampling_steps, 20),  # 스텝 수 제한
-                            "cfg_strength": ss_guidance_strength,
-                        },
-                        slat_sampler_params={
-                            "steps": min(slat_sampling_steps, 20),  # 스텝 수 제한
-                            "cfg_strength": slat_guidance_strength,
-                        },
-                    )
-                    torch.cuda.synchronize()
-                    # 비디오 렌더링을 위한 메모리 확보
-                    torch.cuda.empty_cache()
-                    # 비디오 렌더링
-                    with torch.cuda.amp.autocast():
-                        video = render_utils.render_video(
-                            outputs['gaussian'][0],
-                            num_frames=60,  # 프레임 수 감소
-                            resolution=512  # 해상도 제한
-                        )['color']
-                        torch.cuda.synchronize()
-                        video_geo = render_utils.render_video(
-                            outputs['mesh'][0],
-                            num_frames=60,  # 프레임 수 감소
-                            resolution=512  # 해상도 제한
-                        )['normal']
-                        torch.cuda.synchronize()
-                    # CPU로 데이터 이동 및 후처리
-                    video = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video]
-                    video_geo = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video_geo]
-                    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
-                    new_trial_id = str(uuid.uuid4())
-                    video_path = f"{TMP_DIR}/{new_trial_id}.mp4"
-                    os.makedirs(os.path.dirname(video_path), exist_ok=True)
-                    imageio.mimsave(video_path, video, fps=15)
-                    # 상태 저장
-                    state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], new_trial_id)
-                    return state, video_path
-            finally:
-                # 정리 작업
-                move_to_device(g.trellis_pipeline, 'cpu')
-                torch.cuda.empty_cache()
-                torch.cuda.synchronize()
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
-        if hasattr(g.trellis_pipeline, 'to'):
-            move_to_device(g.trellis_pipeline, 'cpu')
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
         return None, None
 def clear_gpu_memory():
     """GPU 메모리를 정리하는 유틸리티 함수"""
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
-        gc.collect()  # 가비지 컬렉션 실행
 def move_to_device(model, device):
     """모델을 안전하게 디바이스로 이동하는 함수"""
@@ -346,26 +326,27 @@ def deactivate_button() -> gr.Button:
 @spaces.GPU
 def text_to_image(prompt: str, height: int, width: int, steps: int, scales: float, seed: int) -> Image.Image:
     try:
-        # CUDA 메모리 정리
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         # 한글 감지 및 번역
         def contains_korean(text):
             return any(ord('가') <= ord(c) <= ord('힣') for c in text)
-        # 프롬프트 전처리
         if contains_korean(prompt):
             translated = g.translator(prompt)[0]['translation_text']
             prompt = translated
-        # 프롬프트 형식 강제
         formatted_prompt = f"wbgmsst, 3D, {prompt}, white background"
-        with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
             generated_image = g.flux_pipe(
                 prompt=[formatted_prompt],
-                generator=torch.Generator().manual_seed(int(seed)),
                 num_inference_steps=int(steps),
                 guidance_scale=float(scales),
                 height=int(height),
@@ -375,7 +356,9 @@ def text_to_image(prompt: str, height: int, width: int, steps: int, scales: floa
             if generated_image is not None:
                 trial_id = str(uuid.uuid4())
-                generated_image.save(f"{TMP_DIR}/{trial_id}.png")
                 return generated_image
             else:
                 print("Error: Generated image is None")
@@ -384,6 +367,8 @@ def text_to_image(prompt: str, height: int, width: int, steps: int, scales: floa
     except Exception as e:
         print(f"Error in image generation: {str(e)}")
         return None
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("""## Craft3D""")
@@ -480,9 +465,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 label="Click an image to use it",
                 show_label=True,
                 elem_id="gallery",
-                columns=12,  # 한 줄에 12개
-                rows=2,      # 2줄
-                height=300,  # 높이 조정
                 allow_preview=True,
                 object_fit="contain"  # 이미지 비율 유지
             )

 import transformers
 from transformers import pipeline as transformers_pipeline
 from transformers import Pipeline
+import gc  # 파일 상단에 추가
 # 전역 변수 초기화
 class GlobalVars:
 torch.backends.cudnn.benchmark = True
 # 환경 변수 설정
+# 환경 변수 설정
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 os.environ['SPCONV_ALGO'] = 'native'
 os.environ['SPARSE_BACKEND'] = 'native'
 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
 os.environ['XFORMERS_ENABLE_FLASH_ATTENTION'] = '1'
 os.environ['TORCH_CUDA_MEMORY_ALLOCATOR'] = 'native'
 os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
 # CUDA 초기화 방지
 torch.set_grad_enabled(False)
         return None, None
     try:
+        clear_gpu_memory()
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
             image = image.resize(new_size, Image.LANCZOS)
             print(f"Resized image to: {image.size}")
+        with spaces.GPU(), torch.inference_mode():
+            # 3D 생성
+            g.trellis_pipeline.to('cuda')
+            outputs = g.trellis_pipeline.run(
+                image,
+                seed=seed,
+                formats=["gaussian", "mesh"],
+                preprocess_image=False,
+                sparse_structure_sampler_params={
+                    "steps": min(ss_sampling_steps, 12),
+                    "cfg_strength": ss_guidance_strength,
+                },
+                slat_sampler_params={
+                    "steps": min(slat_sampling_steps, 12),
+                    "cfg_strength": slat_guidance_strength,
+                },
+            )
+            # 비디오 렌더링
+            video = render_utils.render_video(
+                outputs['gaussian'][0],
+                num_frames=60,
+                resolution=512
+            )['color']
+            video_geo = render_utils.render_video(
+                outputs['mesh'][0],
+                num_frames=60,
+                resolution=512
+            )['normal']
+            # CPU로 데이터 이동
+            video = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video]
+            video_geo = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video_geo]
+            video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+            new_trial_id = str(uuid.uuid4())
+            video_path = f"{TMP_DIR}/{new_trial_id}.mp4"
+            os.makedirs(os.path.dirname(video_path), exist_ok=True)
+            imageio.mimsave(video_path, video, fps=15)
+            state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], new_trial_id)
+            return state, video_path
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
         return None, None
+    finally:
+        if hasattr(g.trellis_pipeline, 'to'):
+            g.trellis_pipeline.to('cpu')
+        clear_gpu_memory()
 def clear_gpu_memory():
     """GPU 메모리를 정리하는 유틸리티 함수"""
+    try:
+        if torch.cuda.is_available():
+            with torch.cuda.device('cuda'):
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+        gc.collect()
+    except Exception as e:
+        print(f"Error clearing GPU memory: {e}")
 def move_to_device(model, device):
     """모델을 안전하게 디바이스로 이동하는 함수"""
 @spaces.GPU
 def text_to_image(prompt: str, height: int, width: int, steps: int, scales: float, seed: int) -> Image.Image:
     try:
+        clear_gpu_memory()
         # 한글 감지 및 번역
         def contains_korean(text):
             return any(ord('가') <= ord(c) <= ord('힣') for c in text)
         if contains_korean(prompt):
             translated = g.translator(prompt)[0]['translation_text']
             prompt = translated
         formatted_prompt = f"wbgmsst, 3D, {prompt}, white background"
+        # 크기 제한
+        height = min(height, 512)
+        width = min(width, 512)
+        steps = min(steps, 12)
+        with spaces.GPU(), torch.inference_mode():
             generated_image = g.flux_pipe(
                 prompt=[formatted_prompt],
+                generator=torch.Generator('cuda').manual_seed(int(seed)),
                 num_inference_steps=int(steps),
                 guidance_scale=float(scales),
                 height=int(height),
             if generated_image is not None:
                 trial_id = str(uuid.uuid4())
+                save_path = f"{TMP_DIR}/{trial_id}.png"
+                generated_image.save(save_path)
+                print(f"Saved generated image to: {save_path}")
                 return generated_image
             else:
                 print("Error: Generated image is None")
     except Exception as e:
         print(f"Error in image generation: {str(e)}")
         return None
+    finally:
+        clear_gpu_memory()
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("""## Craft3D""")
                 label="Click an image to use it",
                 show_label=True,
                 elem_id="gallery",
+                columns=11,  # 한 줄에 12개
+                rows=3,      # 2줄
+                height=400,  # 높이 조정
                 allow_preview=True,
                 object_fit="contain"  # 이미지 비율 유지
             )