Spaces:

prithivMLmods
/

FLUX-REALISM

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 6

Commit

7d0e511

verified ·

1 Parent(s): b449e17

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ subprocess.run(
 )
 # -------------------------------
-# FLUX.1 IMAGE GENERATION SETUP
 # -------------------------------
 MAX_SEED = np.iinfo(np.int32).max
@@ -38,14 +38,21 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
         seed = random.randint(0, MAX_SEED)
     return seed
 from diffusers import DiffusionPipeline
 base_model = "black-forest-labs/FLUX.1-dev"
-pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16)
 lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
 trigger_word = "Super Realism"  # Leave blank if no trigger word is needed.
 pipe.load_lora_weights(lora_repo)
-pipe.enable_model_cpu_offload()  # Enable CPU offload to manage GPU memory efficiently
 # Define style prompts for Flux.1
 style_list = [
@@ -90,15 +97,18 @@ def generate_image_flux(
     positive_prompt = apply_style(style_name, prompt)
     if trigger_word:
         positive_prompt = f"{trigger_word} {positive_prompt}"
-    images = pipe(
-        prompt=positive_prompt,
-        width=width,
-        height=height,
-        guidance_scale=guidance_scale,
-        num_inference_steps=28,
-        num_images_per_prompt=1,
-        output_type="pil",
-    ).images
     image_paths = [save_image(img) for img in images]
     return image_paths, seed
@@ -111,7 +121,7 @@ smol_processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-2.2B-Inst
 smol_model = AutoModelForImageTextToText.from_pretrained(
     "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
     _attn_implementation="flash_attention_2",
-    torch_dtype=torch.bfloat16
 ).to("cuda:0")
 # -------------------------------
@@ -270,9 +280,9 @@ def generate(
         return_dict=True,
         return_tensors="pt",
     )
-    # Explicitly cast pixel values to bfloat16 to match model weights.
     if "pixel_values" in inputs:
-        inputs["pixel_values"] = inputs["pixel_values"].to(torch.bfloat16)
     inputs = inputs.to(smol_model.device)
     streamer = TextIteratorStreamer(smol_processor, skip_prompt=True, skip_special_tokens=True)

 )
 # -------------------------------
+# CONFIGURATION & UTILITY FUNCTIONS
 # -------------------------------
 MAX_SEED = np.iinfo(np.int32).max
         seed = random.randint(0, MAX_SEED)
     return seed
+# Determine preferred torch dtype based on GPU support.
+bf16_supported = torch.cuda.is_bf16_supported()
+preferred_dtype = torch.bfloat16 if bf16_supported else torch.float16
+# -------------------------------
+# FLUX.1 IMAGE GENERATION SETUP
+# -------------------------------
 from diffusers import DiffusionPipeline
 base_model = "black-forest-labs/FLUX.1-dev"
+pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=preferred_dtype)
 lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
 trigger_word = "Super Realism"  # Leave blank if no trigger word is needed.
 pipe.load_lora_weights(lora_repo)
+pipe.to("cuda")
 # Define style prompts for Flux.1
 style_list = [
     positive_prompt = apply_style(style_name, prompt)
     if trigger_word:
         positive_prompt = f"{trigger_word} {positive_prompt}"
+    # Wrap the diffusion call in no_grad to avoid unnecessary gradient state.
+    with torch.no_grad():
+        images = pipe(
+            prompt=positive_prompt,
+            width=width,
+            height=height,
+            guidance_scale=guidance_scale,
+            num_inference_steps=28,
+            num_images_per_prompt=1,
+            output_type="pil",
+        ).images
+        torch.cuda.synchronize()  # Ensure all CUDA operations have completed
     image_paths = [save_image(img) for img in images]
     return image_paths, seed
 smol_model = AutoModelForImageTextToText.from_pretrained(
     "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
     _attn_implementation="flash_attention_2",
+    torch_dtype=preferred_dtype
 ).to("cuda:0")
 # -------------------------------
         return_dict=True,
         return_tensors="pt",
     )
+    # Explicitly cast pixel values to the preferred dtype to match model weights.
     if "pixel_values" in inputs:
+        inputs["pixel_values"] = inputs["pixel_values"].to(preferred_dtype)
     inputs = inputs.to(smol_model.device)
     streamer = TextIteratorStreamer(smol_processor, skip_prompt=True, skip_special_tokens=True)