Spaces:

gokaygokay
/

FLUX.1-dev-with-Captioner

Running on Zero

App Files Files Community

gokaygokay commited on Oct 28, 2024

Commit

692642f

1 Parent(s): 792870e

aa

Browse files

Files changed (1) hide show

app.py +22 -24

app.py CHANGED Viewed

@@ -19,8 +19,6 @@ from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
 import gradio as gr
 import shutil
 import tempfile
-from functools import partial
-from optimum.quanto import quantize, qfloat8, freeze
 from flux_8bit_lora import FluxPipeline
 from src.utils.train_util import instantiate_from_config
@@ -74,22 +72,21 @@ else:
 device = torch.device('cuda')
-base_model = "black-forest-labs/FLUX.1-dev"
-pipe = FluxPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16, token=huggingface_token).to(device)
 print('Loading and fusing lora, please wait...')
-pipe.load_lora_weights(hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors"))
 # We need this scaling because SimpleTuner fixes the alpha to 16, might be fixed later in diffusers
 # See https://github.com/huggingface/diffusers/issues/9134
-pipe.fuse_lora(lora_scale=1.)
-pipe.unload_lora_weights()
-print('Quantizing, please wait...')
-quantize(pipe.transformer, qfloat8)
-freeze(pipe.transformer)
-print('Model quantized!')
-pipe.enable_model_cpu_offload()
 # Load 3D generation models
 config_path = 'configs/instant-mesh-large.yaml'
@@ -153,15 +150,16 @@ ts_cutoff = 2
 @spaces.GPU
 def generate_flux_image(prompt, height, width, steps, scales, seed):
-    return pipe(
-        prompt=prompt,
-        width=int(height),
-        height=int(width),
-        num_inference_steps=int(steps),
-        generator=torch.Generator().manual_seed(int(seed)),
-        guidance_scale=float(scales),
-        timestep_to_start_cfg=ts_cutoff,
-    ).images[0]
 @spaces.GPU
@@ -270,4 +268,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import shutil
 import tempfile
 from flux_8bit_lora import FluxPipeline
 from src.utils.train_util import instantiate_from_config
 device = torch.device('cuda')
+# Load Flux pipeline
+flux_pipe = FluxPipeline.from_pretrained(
+    "Freepik/flux.1-lite-8B-alpha",
+    torch_dtype=torch.bfloat16
+)
+flux_pipe.load_lora_weights(hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors"))
+flux_pipe.fuse_lora(lora_scale=1)
+flux_pipe.to(device="cuda", dtype=torch.bfloat16)
 print('Loading and fusing lora, please wait...')
+flux_pipe.load_lora_weights(hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors"))
 # We need this scaling because SimpleTuner fixes the alpha to 16, might be fixed later in diffusers
 # See https://github.com/huggingface/diffusers/issues/9134
+flux_pipe.fuse_lora(lora_scale=1.)
+flux_pipe.unload_lora_weights()
 # Load 3D generation models
 config_path = 'configs/instant-mesh-large.yaml'
 @spaces.GPU
 def generate_flux_image(prompt, height, width, steps, scales, seed):
+    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux inference"):
+        return flux_pipe(
+            prompt=[prompt],
+            generator=torch.Generator().manual_seed(int(seed)),
+            num_inference_steps=int(steps),
+            guidance_scale=float(scales),
+            height=int(height),
+            width=int(width),
+            max_sequence_length=256
+        ).images[0]
 @spaces.GPU
     )
 if __name__ == "__main__":
+    demo.launch()