Spaces:

Mansuba
/

Bangla_text_to_image_app

Running

App Files Files Community

Mansuba commited on Jan 28

Commit

6024488

verified ·

1 Parent(s): d47dd8d

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -54

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import torch
 from transformers import CLIPModel, CLIPProcessor, AutoTokenizer, MarianMTModel, MarianTokenizer
 from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
@@ -11,6 +10,7 @@ import json
 import logging
 from dataclasses import dataclass
 import gc
 # Configure logging
 logging.basicConfig(
@@ -30,6 +30,10 @@ class ModelCache:
     def __init__(self, cache_dir: Path):
         self.cache_dir = cache_dir
         self.cache_dir.mkdir(parents=True, exist_ok=True)
     def load_model(self, model_id: str, load_func: callable, cache_name: str) -> Any:
         try:
@@ -48,18 +52,32 @@ class EnhancedBanglaSDGenerator:
     ):
         self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
         self.cache = ModelCache(Path(cache_dir))
         self._initialize_models(banglaclip_weights_path)
         self._load_context_data()
     def _initialize_models(self, banglaclip_weights_path: str):
         try:
             # Initialize translation models
             self.bn2en_model_name = "Helsinki-NLP/opus-mt-bn-en"
             self.translator = self.cache.load_model(
                 self.bn2en_model_name,
-                MarianMTModel.from_pretrained,
                 "translator"
             ).to(self.device)
             self.trans_tokenizer = MarianTokenizer.from_pretrained(self.bn2en_model_name)
@@ -71,7 +89,7 @@ class EnhancedBanglaSDGenerator:
             self.processor = CLIPProcessor.from_pretrained(self.clip_model_name)
             self.tokenizer = AutoTokenizer.from_pretrained(self.bangla_text_model)
-            # Initialize Stable Diffusion with optimizations
             self._initialize_stable_diffusion()
         except Exception as e:
@@ -79,45 +97,53 @@ class EnhancedBanglaSDGenerator:
             raise RuntimeError(f"Failed to initialize models: {str(e)}")
     def _initialize_stable_diffusion(self):
-        """Initialize Stable Diffusion pipeline with CPU performance optimizations."""
-        self.pipe = self.cache.load_model(
-            "runwayml/stable-diffusion-v1-5",
-            lambda model_id: StableDiffusionPipeline.from_pretrained(
-                model_id,
-                torch_dtype=torch.float32,
-                safety_checker=None,
-                use_safetensors=True,
-                use_memory_efficient_attention=True,
-                local_files_only=True
-            ),
-            "stable_diffusion"
-        )
-        # Optimize scheduler
-        self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
-            self.pipe.scheduler.config,
-            use_karras_sigmas=True,
-            algorithm_type="dpmsolver++"
-        )
-        # CPU optimizations
-        self.pipe.enable_attention_slicing(slice_size=1)
-        self.pipe.enable_vae_slicing()
-        self.pipe.enable_sequential_cpu_offload()
-        # Component-level optimizations
-        for component in [self.pipe.text_encoder, self.pipe.vae, self.pipe.unet]:
-            if hasattr(component, 'enable_model_cpu_offload'):
-                component.enable_model_cpu_offload()
-        self.pipe = self.pipe.to(self.device)
     def _load_banglaclip_model(self, weights_path: str) -> CLIPModel:
         try:
             if not Path(weights_path).exists():
                 raise FileNotFoundError(f"BanglaCLIP weights not found at {weights_path}")
-            clip_model = CLIPModel.from_pretrained(self.clip_model_name)
             state_dict = torch.load(weights_path, map_location=self.device)
             cleaned_state_dict = {
@@ -152,22 +178,12 @@ class EnhancedBanglaSDGenerator:
         inputs = self.trans_tokenizer(bangla_text, return_tensors="pt", padding=True)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        with torch.no_grad():
             outputs = self.translator.generate(**inputs)
         translated = self.trans_tokenizer.decode(outputs[0], skip_special_tokens=True)
         return translated
-    def _get_text_embedding(self, text: str):
-        """Get text embedding from BanglaCLIP model."""
-        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = self.banglaclip_model.get_text_features(**inputs)
-        return outputs
     def generate_image(
         self,
         bangla_text: str,
@@ -182,16 +198,15 @@ class EnhancedBanglaSDGenerator:
             if config.seed is not None:
                 torch.manual_seed(config.seed)
-            enhanced_prompt = self._enhance_prompt(bangla_text)
-            negative_prompt = self._get_negative_prompt()
-            # Pre-generation optimization
-            torch.set_num_threads(max(4, torch.get_num_threads()))
             gc.collect()
             torch.cuda.empty_cache() if torch.cuda.is_available() else None
-            # Memory-optimized generation
-            with torch.inference_mode():
                 result = self.pipe(
                     prompt=enhanced_prompt,
                     negative_prompt=negative_prompt,
@@ -202,7 +217,7 @@ class EnhancedBanglaSDGenerator:
                     use_memory_efficient_cross_attention=True
                 )
-            # Post-generation cleanup
             gc.collect()
             torch.cuda.empty_cache() if torch.cuda.is_available() else None
@@ -337,5 +352,9 @@ def create_gradio_interface():
     return demo
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.queue().launch(share=True)

 import torch
 from transformers import CLIPModel, CLIPProcessor, AutoTokenizer, MarianMTModel, MarianTokenizer
 from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
 import logging
 from dataclasses import dataclass
 import gc
+import os
 # Configure logging
 logging.basicConfig(
     def __init__(self, cache_dir: Path):
         self.cache_dir = cache_dir
         self.cache_dir.mkdir(parents=True, exist_ok=True)
+        # Set environment variables for better memory management
+        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
+        os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
     def load_model(self, model_id: str, load_func: callable, cache_name: str) -> Any:
         try:
     ):
         self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
+        # Set memory split for VRAM usage on CPU
+        self.memory_split = 0.5  # Use 50% of available VRAM
+        self.setup_memory_management()
         self.cache = ModelCache(Path(cache_dir))
         self._initialize_models(banglaclip_weights_path)
         self._load_context_data()
+    def setup_memory_management(self):
+        """Setup optimal memory management for CPU and VRAM"""
+        if torch.cuda.is_available():
+            total_memory = torch.cuda.get_device_properties(0).total_memory
+            torch.cuda.set_per_process_memory_fraction(self.memory_split)
+        # Optimize CPU memory
+        torch.set_num_threads(min(8, os.cpu_count() or 4))
+        torch.set_num_interop_threads(min(8, os.cpu_count() or 4))
     def _initialize_models(self, banglaclip_weights_path: str):
         try:
             # Initialize translation models
             self.bn2en_model_name = "Helsinki-NLP/opus-mt-bn-en"
             self.translator = self.cache.load_model(
                 self.bn2en_model_name,
+                lambda x: MarianMTModel.from_pretrained(x, low_cpu_mem_usage=True),
                 "translator"
             ).to(self.device)
             self.trans_tokenizer = MarianTokenizer.from_pretrained(self.bn2en_model_name)
             self.processor = CLIPProcessor.from_pretrained(self.clip_model_name)
             self.tokenizer = AutoTokenizer.from_pretrained(self.bangla_text_model)
+            # Initialize Stable Diffusion
             self._initialize_stable_diffusion()
         except Exception as e:
             raise RuntimeError(f"Failed to initialize models: {str(e)}")
     def _initialize_stable_diffusion(self):
+        """Initialize Stable Diffusion pipeline with optimized settings."""
+        try:
+            self.pipe = self.cache.load_model(
+                "runwayml/stable-diffusion-v1-5",
+                lambda model_id: StableDiffusionPipeline.from_pretrained(
+                    model_id,
+                    torch_dtype=torch.float32,
+                    safety_checker=None,
+                    use_safetensors=True,
+                    low_cpu_mem_usage=True,
+                ),
+                "stable_diffusion"
+            )
+            # Optimize scheduler for speed
+            self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
+                self.pipe.scheduler.config,
+                use_karras_sigmas=True,
+                algorithm_type="dpmsolver++",
+                solver_order=2
+            )
+            # Memory optimizations
+            self.pipe.enable_attention_slicing(slice_size=1)
+            self.pipe.enable_vae_slicing()
+            self.pipe.enable_sequential_cpu_offload()
+            # VRAM optimization
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                self.pipe.enable_model_cpu_offload()
+            self.pipe = self.pipe.to(self.device)
+        except Exception as e:
+            logger.error(f"Error initializing Stable Diffusion: {str(e)}")
+            raise
     def _load_banglaclip_model(self, weights_path: str) -> CLIPModel:
         try:
             if not Path(weights_path).exists():
                 raise FileNotFoundError(f"BanglaCLIP weights not found at {weights_path}")
+            clip_model = CLIPModel.from_pretrained(
+                self.clip_model_name,
+                low_cpu_mem_usage=True
+            )
             state_dict = torch.load(weights_path, map_location=self.device)
             cleaned_state_dict = {
         inputs = self.trans_tokenizer(bangla_text, return_tensors="pt", padding=True)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        with torch.no_grad(), torch.cpu.amp.autocast():
             outputs = self.translator.generate(**inputs)
         translated = self.trans_tokenizer.decode(outputs[0], skip_special_tokens=True)
         return translated
     def generate_image(
         self,
         bangla_text: str,
             if config.seed is not None:
                 torch.manual_seed(config.seed)
+            # Clear memory before generation
             gc.collect()
             torch.cuda.empty_cache() if torch.cuda.is_available() else None
+            enhanced_prompt = self._enhance_prompt(bangla_text)
+            negative_prompt = self._get_negative_prompt()
+            # Use mixed precision for faster generation
+            with torch.inference_mode(), torch.cpu.amp.autocast():
                 result = self.pipe(
                     prompt=enhanced_prompt,
                     negative_prompt=negative_prompt,
                     use_memory_efficient_cross_attention=True
                 )
+            # Clear memory after generation
             gc.collect()
             torch.cuda.empty_cache() if torch.cuda.is_available() else None
     return demo
 if __name__ == "__main__":
+    # Set environment variables for better performance
+    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
+    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
     demo = create_gradio_interface()
     demo.queue().launch(share=True)