1inkusFace commited on
Commit
8a11110
·
verified ·
1 Parent(s): 19f9fef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -15
app.py CHANGED
@@ -15,9 +15,7 @@ from models.transformer_sd3 import SD3Transformer2DModel
15
  #from diffusers import StableDiffusion3Pipeline
16
  from transformers import CLIPTextModelWithProjection, T5EncoderModel
17
  from transformers import CLIPTokenizer, T5TokenizerFast
18
- #from diffusers import SD3Transformer2DModel, AutoencoderKL
19
  from diffusers import AutoencoderKL
20
- #from models.transformer_sd3 import SD3Transformer2DModel
21
  from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
22
 
23
  from image_gen_aux import UpscaleWithModel
@@ -59,7 +57,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
59
  torch_dtype = torch.bfloat16
60
 
61
  transformer = SD3Transformer2DModel.from_pretrained(
62
- model_path, subfolder="transformer", torch_dtype=torch.bfloat16
63
  )
64
 
65
  vaeX=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", safety_checker=None, use_safetensors=True, low_cpu_mem_usage=False, subfolder='vae', torch_dtype=torch.float32, token=True)
@@ -74,21 +72,19 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
74
  #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
75
  #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
76
  tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
77
- torch_dtype=torch.bfloat16,
78
  transformer=transformer,
79
  vae=None
80
  #use_safetensors=False,
81
  )
82
 
83
- #pipe.to(device=device, dtype=torch.bfloat16)
84
 
85
- pipe.to(device)
86
  pipe.vae=vaeX.to(device)
87
  text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
88
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
89
  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
90
-
91
-
92
 
93
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
94
 
@@ -120,11 +116,9 @@ def infer(
120
  image_encoder_path=None,
121
  progress=gr.Progress(track_tqdm=True),
122
  ):
123
-
124
  pipe.text_encoder=text_encoder
125
  pipe.text_encoder_2=text_encoder_2
126
  pipe.text_encoder_3=text_encoder_3
127
-
128
  pipe.init_ipadapter(
129
  ip_adapter_path=ipadapter_path,
130
  image_encoder_path=image_encoder_path,
@@ -140,25 +134,25 @@ def infer(
140
  sd_image_a = Image.open(latent_file.name).convert('RGB')
141
  print("-- using image file and loading ip-adapter --")
142
  #sd_image_a.resize((height,width), Image.LANCZOS)
143
- sd_image_a.resize((384,384), Image.LANCZOS)
144
  if latent_file_2 is not None: # Check if a latent file is provided
145
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
146
- sd_image_b.resize((384,384), Image.LANCZOS)
147
  else:
148
  sd_image_b = None
149
  if latent_file_3 is not None: # Check if a latent file is provided
150
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
151
- sd_image_c.resize((384,384), Image.LANCZOS)
152
  else:
153
  sd_image_c = None
154
  if latent_file_4 is not None: # Check if a latent file is provided
155
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
156
- sd_image_d.resize((384,384), Image.LANCZOS)
157
  else:
158
  sd_image_d = None
159
  if latent_file_5 is not None: # Check if a latent file is provided
160
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
161
- sd_image_e.resize((384,384), Image.LANCZOS)
162
  else:
163
  sd_image_e = None
164
  print('-- generating image --')
 
15
  #from diffusers import StableDiffusion3Pipeline
16
  from transformers import CLIPTextModelWithProjection, T5EncoderModel
17
  from transformers import CLIPTokenizer, T5TokenizerFast
 
18
  from diffusers import AutoencoderKL
 
19
  from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
20
 
21
  from image_gen_aux import UpscaleWithModel
 
57
  torch_dtype = torch.bfloat16
58
 
59
  transformer = SD3Transformer2DModel.from_pretrained(
60
+ model_path, subfolder="transformer" #, torch_dtype=torch.bfloat16
61
  )
62
 
63
  vaeX=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", safety_checker=None, use_safetensors=True, low_cpu_mem_usage=False, subfolder='vae', torch_dtype=torch.float32, token=True)
 
72
  #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
73
  #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
74
  tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
75
+ #torch_dtype=torch.bfloat16,
76
  transformer=transformer,
77
  vae=None
78
  #use_safetensors=False,
79
  )
80
 
81
+ pipe.to(device=device, dtype=torch.bfloat16)
82
 
83
+ #pipe.to(device)
84
  pipe.vae=vaeX.to(device)
85
  text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
86
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
87
  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 
 
88
 
89
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
90
 
 
116
  image_encoder_path=None,
117
  progress=gr.Progress(track_tqdm=True),
118
  ):
 
119
  pipe.text_encoder=text_encoder
120
  pipe.text_encoder_2=text_encoder_2
121
  pipe.text_encoder_3=text_encoder_3
 
122
  pipe.init_ipadapter(
123
  ip_adapter_path=ipadapter_path,
124
  image_encoder_path=image_encoder_path,
 
134
  sd_image_a = Image.open(latent_file.name).convert('RGB')
135
  print("-- using image file and loading ip-adapter --")
136
  #sd_image_a.resize((height,width), Image.LANCZOS)
137
+ sd_image_a.resize((width,height), Image.LANCZOS)
138
  if latent_file_2 is not None: # Check if a latent file is provided
139
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
140
+ sd_image_b.resize((width,height), Image.LANCZOS)
141
  else:
142
  sd_image_b = None
143
  if latent_file_3 is not None: # Check if a latent file is provided
144
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
145
+ sd_image_c.resize((width,height), Image.LANCZOS)
146
  else:
147
  sd_image_c = None
148
  if latent_file_4 is not None: # Check if a latent file is provided
149
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
150
+ sd_image_d.resize((width,height), Image.LANCZOS)
151
  else:
152
  sd_image_d = None
153
  if latent_file_5 is not None: # Check if a latent file is provided
154
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
155
+ sd_image_e.resize((width,height), Image.LANCZOS)
156
  else:
157
  sd_image_e = None
158
  print('-- generating image --')