DonImages commited on
Commit
de8d57e
·
verified ·
1 Parent(s): bcabf70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -3,6 +3,8 @@ from diffusers import StableDiffusion3Pipeline
3
  from huggingface_hub import login
4
  import os
5
  import gradio as gr
 
 
6
 
7
  # Retrieve the token from the environment variable
8
  token = os.getenv("HF_TOKEN") # Hugging Face token from the secret
@@ -11,9 +13,29 @@ if token:
11
  else:
12
  raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
13
 
14
- # Load the Stable Diffusion 3.5 model
 
 
 
 
 
 
 
15
  model_id = "stabilityai/stable-diffusion-3.5-large"
16
- pipe = StableDiffusion3Pipeline.from_pretrained(model_id) # Removed torch_dtype argument
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  pipe.to("cpu") # Ensuring it runs on CPU
18
 
19
  # Define the path to the LoRA model
@@ -41,4 +63,4 @@ def generate_image(prompt):
41
 
42
  # Gradio interface
43
  iface = gr.Interface(fn=generate_image, inputs="text", outputs="image")
44
- iface.launch()
 
3
  from huggingface_hub import login
4
  import os
5
  import gradio as gr
6
+ from diffusers import BitsAndBytesConfig
7
+ from diffusers import SD3Transformer2DModel
8
 
9
  # Retrieve the token from the environment variable
10
  token = os.getenv("HF_TOKEN") # Hugging Face token from the secret
 
13
  else:
14
  raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
15
 
16
+ # Define quantization configuration (4-bit quantization)
17
+ quant_config = BitsAndBytesConfig(
18
+ load_in_4bit=True, # Enable 4-bit quantization
19
+ bnb_4bit_quant_type="nf4", # Choose the quantization type (nf4 is often used for high-quality quantization)
20
+ bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation (works well with CPUs)
21
+ )
22
+
23
+ # Load the Stable Diffusion 3.5 model with quantization
24
  model_id = "stabilityai/stable-diffusion-3.5-large"
25
+ model = SD3Transformer2DModel.from_pretrained(
26
+ model_id,
27
+ subfolder="transformer",
28
+ quantization_config=quant_config,
29
+ torch_dtype=torch.bfloat16 # Ensure the model uses bfloat16 dtype for computation
30
+ )
31
+
32
+ # Load the pipeline with the quantized model
33
+ pipe = StableDiffusion3Pipeline.from_pretrained(
34
+ model_id,
35
+ transformer=model,
36
+ torch_dtype=torch.bfloat16 # Ensuring the pipeline uses bfloat16
37
+ )
38
+
39
  pipe.to("cpu") # Ensuring it runs on CPU
40
 
41
  # Define the path to the LoRA model
 
63
 
64
  # Gradio interface
65
  iface = gr.Interface(fn=generate_image, inputs="text", outputs="image")
66
+ iface.launch()