gokaygokay commited on
Commit
692642f
·
1 Parent(s): 792870e
Files changed (1) hide show
  1. app.py +22 -24
app.py CHANGED
@@ -19,8 +19,6 @@ from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
19
  import gradio as gr
20
  import shutil
21
  import tempfile
22
- from functools import partial
23
- from optimum.quanto import quantize, qfloat8, freeze
24
  from flux_8bit_lora import FluxPipeline
25
 
26
  from src.utils.train_util import instantiate_from_config
@@ -74,22 +72,21 @@ else:
74
 
75
  device = torch.device('cuda')
76
 
77
- base_model = "black-forest-labs/FLUX.1-dev"
78
- pipe = FluxPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16, token=huggingface_token).to(device)
 
 
 
 
 
 
79
 
80
  print('Loading and fusing lora, please wait...')
81
- pipe.load_lora_weights(hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors"))
82
  # We need this scaling because SimpleTuner fixes the alpha to 16, might be fixed later in diffusers
83
  # See https://github.com/huggingface/diffusers/issues/9134
84
- pipe.fuse_lora(lora_scale=1.)
85
- pipe.unload_lora_weights()
86
-
87
- print('Quantizing, please wait...')
88
- quantize(pipe.transformer, qfloat8)
89
- freeze(pipe.transformer)
90
- print('Model quantized!')
91
- pipe.enable_model_cpu_offload()
92
-
93
 
94
  # Load 3D generation models
95
  config_path = 'configs/instant-mesh-large.yaml'
@@ -153,15 +150,16 @@ ts_cutoff = 2
153
 
154
  @spaces.GPU
155
  def generate_flux_image(prompt, height, width, steps, scales, seed):
156
- return pipe(
157
- prompt=prompt,
158
- width=int(height),
159
- height=int(width),
160
- num_inference_steps=int(steps),
161
- generator=torch.Generator().manual_seed(int(seed)),
162
- guidance_scale=float(scales),
163
- timestep_to_start_cfg=ts_cutoff,
164
- ).images[0]
 
165
 
166
 
167
  @spaces.GPU
@@ -270,4 +268,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
270
  )
271
 
272
  if __name__ == "__main__":
273
- demo.launch()
 
19
  import gradio as gr
20
  import shutil
21
  import tempfile
 
 
22
  from flux_8bit_lora import FluxPipeline
23
 
24
  from src.utils.train_util import instantiate_from_config
 
72
 
73
  device = torch.device('cuda')
74
 
75
+ # Load Flux pipeline
76
+ flux_pipe = FluxPipeline.from_pretrained(
77
+ "Freepik/flux.1-lite-8B-alpha",
78
+ torch_dtype=torch.bfloat16
79
+ )
80
+ flux_pipe.load_lora_weights(hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors"))
81
+ flux_pipe.fuse_lora(lora_scale=1)
82
+ flux_pipe.to(device="cuda", dtype=torch.bfloat16)
83
 
84
  print('Loading and fusing lora, please wait...')
85
+ flux_pipe.load_lora_weights(hf_hub_download("gokaygokay/Flux-Game-Assets-LoRA-v2", "game_asst.safetensors"))
86
  # We need this scaling because SimpleTuner fixes the alpha to 16, might be fixed later in diffusers
87
  # See https://github.com/huggingface/diffusers/issues/9134
88
+ flux_pipe.fuse_lora(lora_scale=1.)
89
+ flux_pipe.unload_lora_weights()
 
 
 
 
 
 
 
90
 
91
  # Load 3D generation models
92
  config_path = 'configs/instant-mesh-large.yaml'
 
150
 
151
  @spaces.GPU
152
  def generate_flux_image(prompt, height, width, steps, scales, seed):
153
+ with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux inference"):
154
+ return flux_pipe(
155
+ prompt=[prompt],
156
+ generator=torch.Generator().manual_seed(int(seed)),
157
+ num_inference_steps=int(steps),
158
+ guidance_scale=float(scales),
159
+ height=int(height),
160
+ width=int(width),
161
+ max_sequence_length=256
162
+ ).images[0]
163
 
164
 
165
  @spaces.GPU
 
268
  )
269
 
270
  if __name__ == "__main__":
271
+ demo.launch()