Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,7 @@ from diffusers import StableDiffusion3Pipeline
|
|
3 |
from huggingface_hub import login
|
4 |
import os
|
5 |
import gradio as gr
|
6 |
-
from
|
7 |
-
from diffusers import SD3Transformer2DModel
|
8 |
|
9 |
# Retrieve the token from the environment variable
|
10 |
token = os.getenv("HF_TOKEN") # Hugging Face token from the secret
|
@@ -13,30 +12,14 @@ if token:
|
|
13 |
else:
|
14 |
raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
|
15 |
|
16 |
-
#
|
17 |
-
quant_config = BitsAndBytesConfig(
|
18 |
-
load_in_4bit=True, # Enable 4-bit quantization
|
19 |
-
bnb_4bit_quant_type="nf4", # Choose the quantization type (nf4 is often used for high-quality quantization)
|
20 |
-
bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation (works well with CPUs)
|
21 |
-
)
|
22 |
-
|
23 |
-
# Load the Stable Diffusion 3.5 model with quantization
|
24 |
model_id = "stabilityai/stable-diffusion-3.5-large"
|
25 |
-
|
26 |
-
model_id,
|
27 |
-
subfolder="transformer",
|
28 |
-
quantization_config=quant_config,
|
29 |
-
torch_dtype=torch.bfloat16 # Ensure the model uses bfloat16 dtype for computation
|
30 |
-
)
|
31 |
|
32 |
-
#
|
33 |
-
pipe =
|
34 |
-
model_id,
|
35 |
-
transformer=model,
|
36 |
-
torch_dtype=torch.bfloat16 # Ensuring the pipeline uses bfloat16
|
37 |
-
)
|
38 |
|
39 |
-
pipe.to("cpu") #
|
40 |
|
41 |
# Define the path to the LoRA model
|
42 |
lora_model_path = "./lora_model.pth" # Assuming the file is saved locally
|
|
|
3 |
from huggingface_hub import login
|
4 |
import os
|
5 |
import gradio as gr
|
6 |
+
from transformers import pipeline as transformers_pipeline
|
|
|
7 |
|
8 |
# Retrieve the token from the environment variable
|
9 |
token = os.getenv("HF_TOKEN") # Hugging Face token from the secret
|
|
|
12 |
else:
|
13 |
raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
|
14 |
|
15 |
+
# Load the Stable Diffusion 3.5 model with quantization enabled for CPU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
model_id = "stabilityai/stable-diffusion-3.5-large"
|
17 |
+
pipe = StableDiffusion3Pipeline.from_pretrained(model_id, torch_dtype=torch.float16) # Use float16 for less memory usage
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
# Perform quantization on the model to reduce the memory footprint
|
20 |
+
pipe.unet = torch.quantization.quantize_dynamic(pipe.unet, {torch.nn.Linear}, dtype=torch.qint8)
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
pipe.to("cpu") # Ensure it runs on CPU
|
23 |
|
24 |
# Define the path to the LoRA model
|
25 |
lora_model_path = "./lora_model.pth" # Assuming the file is saved locally
|