Spaces:

gokaygokay
/

FLUX.1-dev-with-Captioner

Running on Zero

App Files Files Community

gokaygokay commited on Oct 28, 2024

Commit

3880b98

1 Parent(s): 44d2484

delete

Browse files

Files changed (2) hide show

app.py +26 -2
stable_diffusion_model.py +0 -0

app.py CHANGED Viewed

@@ -3,10 +3,11 @@ import numpy as np
 import random
 import spaces
 import torch
-from diffusers import  DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
 from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
 from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
 from huggingface_hub import hf_hub_download
 import os
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -23,13 +24,36 @@ MAX_IMAGE_SIZE = 2048
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
 # Load and fuse LoRA BEFORE quantizing
 print('Loading and fusing lora, please wait...')
 lora_path = hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors")
 pipe.load_lora_weights(lora_path)
 pipe.fuse_lora(lora_scale=0.125)
 pipe.unload_lora_weights()
-pipe.transformer.to(device, dtype=torch.bfloat16)
 @spaces.GPU(duration=75)
 def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):

 import random
 import spaces
 import torch
+from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
 from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
 from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
 from huggingface_hub import hf_hub_download
+from optimum.quanto import freeze, qfloat8, quantize
 import os
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
+# Load base model first (before quantization)
+pipe = DiffusionPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-dev",
+    torch_dtype=dtype,
+    vae=taef1,
+    token=huggingface_token
+)
 # Load and fuse LoRA BEFORE quantizing
 print('Loading and fusing lora, please wait...')
 lora_path = hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors")
 pipe.load_lora_weights(lora_path)
 pipe.fuse_lora(lora_scale=0.125)
 pipe.unload_lora_weights()
+# Quantize the transformer
+print("Quantizing transformer")
+quantize(pipe.transformer, weights=qfloat8)
+freeze(pipe.transformer)
+pipe.transformer.to(device)
+# Quantize T5 encoder
+print("Quantizing T5")
+quantize(pipe.text_encoder_2, weights=qfloat8)
+freeze(pipe.text_encoder_2)
+pipe.text_encoder_2.to(device)
+# Move other components to device
+pipe.text_encoder.to(device, dtype=dtype)
+torch.cuda.empty_cache()
 @spaces.GPU(duration=75)
 def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):

stable_diffusion_model.py ADDED Viewed

The diff for this file is too large to render. See raw diff