Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ from diffusers import StableDiffusion3Pipeline
|
|
3 |
from huggingface_hub import login
|
4 |
import os
|
5 |
import gradio as gr
|
|
|
|
|
6 |
|
7 |
# Retrieve the token from the environment variable
|
8 |
token = os.getenv("HF_TOKEN") # Hugging Face token from the secret
|
@@ -11,9 +13,29 @@ if token:
|
|
11 |
else:
|
12 |
raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
|
13 |
|
14 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
model_id = "stabilityai/stable-diffusion-3.5-large"
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
pipe.to("cpu") # Ensuring it runs on CPU
|
18 |
|
19 |
# Define the path to the LoRA model
|
@@ -41,4 +63,4 @@ def generate_image(prompt):
|
|
41 |
|
42 |
# Gradio interface
|
43 |
iface = gr.Interface(fn=generate_image, inputs="text", outputs="image")
|
44 |
-
iface.launch()
|
|
|
3 |
from huggingface_hub import login
|
4 |
import os
|
5 |
import gradio as gr
|
6 |
+
from diffusers import BitsAndBytesConfig
|
7 |
+
from diffusers import SD3Transformer2DModel
|
8 |
|
9 |
# Retrieve the token from the environment variable
|
10 |
token = os.getenv("HF_TOKEN") # Hugging Face token from the secret
|
|
|
13 |
else:
|
14 |
raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
|
15 |
|
16 |
+
# Define quantization configuration (4-bit quantization)
|
17 |
+
quant_config = BitsAndBytesConfig(
|
18 |
+
load_in_4bit=True, # Enable 4-bit quantization
|
19 |
+
bnb_4bit_quant_type="nf4", # Choose the quantization type (nf4 is often used for high-quality quantization)
|
20 |
+
bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation (works well with CPUs)
|
21 |
+
)
|
22 |
+
|
23 |
+
# Load the Stable Diffusion 3.5 model with quantization
|
24 |
model_id = "stabilityai/stable-diffusion-3.5-large"
|
25 |
+
model = SD3Transformer2DModel.from_pretrained(
|
26 |
+
model_id,
|
27 |
+
subfolder="transformer",
|
28 |
+
quantization_config=quant_config,
|
29 |
+
torch_dtype=torch.bfloat16 # Ensure the model uses bfloat16 dtype for computation
|
30 |
+
)
|
31 |
+
|
32 |
+
# Load the pipeline with the quantized model
|
33 |
+
pipe = StableDiffusion3Pipeline.from_pretrained(
|
34 |
+
model_id,
|
35 |
+
transformer=model,
|
36 |
+
torch_dtype=torch.bfloat16 # Ensuring the pipeline uses bfloat16
|
37 |
+
)
|
38 |
+
|
39 |
pipe.to("cpu") # Ensuring it runs on CPU
|
40 |
|
41 |
# Define the path to the LoRA model
|
|
|
63 |
|
64 |
# Gradio interface
|
65 |
iface = gr.Interface(fn=generate_image, inputs="text", outputs="image")
|
66 |
+
iface.launch()
|