1inkusFace commited on
Commit
ff7f5af
·
verified ·
1 Parent(s): 8f9907e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -393
app.py CHANGED
@@ -1,40 +1,35 @@
1
- import spaces
2
- import os
3
 
4
- # os.putenv('PYTORCH_NVML_BASED_CUDA_CHECK','1')
5
- # os.putenv('TORCH_LINALG_PREFER_CUSOLVER','1')
6
- alloc_conf_parts = [
7
- 'expandable_segments:True',
8
- 'pinned_use_background_threads:True' # Specific to pinned memory.
9
- ]
10
- # os.environ['PYTORCH_CUDA_ALLOC_CONF'] = ','.join(alloc_conf_parts)
11
- # os.environ["SAFETENSORS_FAST_GPU"] = "1"
12
- # os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
13
 
 
14
  import gradio as gr
15
  import numpy as np
16
  import random
 
 
 
17
 
18
- import torch
19
- from diffusers import StableDiffusion3Pipeline
20
- from transformers import CLIPTextModelWithProjection, T5EncoderModel
21
- from transformers import CLIPTokenizer, T5TokenizerFast
22
 
23
- import re
24
- import paramiko
25
- import urllib
26
- import time
27
- from image_gen_aux import UpscaleWithModel
28
- from huggingface_hub import hf_hub_download
29
- import datetime
30
- #import cyper
31
 
32
- from diffusers import AutoencoderKL
33
- #from models.transformer_sd3 import SD3Transformer2DModel
34
- #from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
35
 
36
- from PIL import Image
37
 
 
38
  torch.backends.cuda.matmul.allow_tf32 = False
39
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
40
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
@@ -45,424 +40,262 @@ torch.backends.cuda.preferred_blas_library="cublas"
45
  torch.backends.cuda.preferred_linalg_library="cusolver"
46
  torch.set_float32_matmul_precision("highest")
47
 
48
- hftoken = os.getenv("HF_AUTH_TOKEN")
49
-
50
- # code = r'''
51
- import torch
52
- import paramiko
53
- import os
54
 
55
- FTP_HOST = 'noahcohn.com'
56
- FTP_USER = 'ford442'
57
- FTP_PASS = os.getenv("FTP_PASS")
58
- FTP_DIR = 'img.noahcohn.com/stablediff/'
59
 
60
- def upload_to_ftp(filename):
 
 
61
  try:
62
- transport = paramiko.Transport((FTP_HOST, 22))
63
- destination_path=FTP_DIR+filename
64
- transport.connect(username = FTP_USER, password = FTP_PASS)
65
- sftp = paramiko.SFTPClient.from_transport(transport)
66
- sftp.put(filename, destination_path)
67
- sftp.close()
68
- transport.close()
69
- print(f"Uploaded {filename} to FTP server")
70
  except Exception as e:
71
- print(f"FTP upload error: {e}")
72
- # '''
73
 
74
- # pyx = cyper.inline(code, fast_indexing=True, directives=dict(boundscheck=False, wraparound=False, language_level=3))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
77
- #vae=AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", use_safetensors=True, subfolder='vae',token=True)
78
- #vaeX=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", safety_checker=None, use_safetensors=True, subfolder='vae', low_cpu_mem_usage=False, torch_dtype=torch.float32, token=True)
79
-
80
- pipe = StableDiffusion3Pipeline.from_single_file(
81
- "https://huggingface.co/1inkus/sd35-large-UltraReal-bf16-DDUF/blob/main/sd3-bf16-large.dduf",
82
- #vae=None,
83
- #tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=False, use_fast=True, subfolder="tokenizer_3"),
84
- use_safetensors=True,
85
- config = 'ford442/stable-diffusion-3.5-large-bf16',
86
- #dtype=torch.bfloat16,
87
- #devive_map='auto',
88
- ) #.to(device=device)
89
 
90
- ### pipe = StableDiffusion3Pipeline.from_pretrained(
91
- #"stabilityai # stable-diffusion-3.5-large",
92
- #"ford442/stable-diffusion-3.5-large-bf16",
93
- ### "ford442/stable-diffusion-3.5-large-fp32",
94
- #vae=None,
95
- #vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
96
- #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
97
- # text_encoder=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
98
- # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
99
- # text_encoder_2=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
100
- # text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
101
- # text_encoder_3=None, #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
102
- # text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
103
- #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
104
- #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
105
- #tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=False, use_fast=True, subfolder="tokenizer_3", token=True),
106
- ### tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=False, use_fast=True, subfolder="tokenizer_3", token=True),
107
- #torch_dtype=torch.bfloat16,
108
- #use_safetensors=False,
109
- ###)
110
- #text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(torch.device("cuda:0")) #, dtype=torch.bfloat16)
111
- #text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(torch.device("cuda:0")) #, dtype=torch.bfloat16)
112
- #text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", subfolder='text_encoder_3',token=True).to(torch.device("cuda:0")) #, dtype=torch.bfloat16)
113
-
114
- #pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
115
 
116
- #pipe.to(device=device) #, dtype=torch.bfloat16)
117
- #pipe.vae=vaeX #.to('cpu')
118
-
119
- pipe.to(device,torch.bfloat16)
120
- upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('cuda'))
121
 
122
  MAX_SEED = np.iinfo(np.int32).max
123
-
124
  MAX_IMAGE_SIZE = 4096
125
 
126
- @spaces.GPU(duration=40)
127
- def infer_30(
128
- prompt,
129
- negative_prompt_1,
130
- negative_prompt_2,
131
- negative_prompt_3,
132
- width,
133
- height,
134
- guidance_scale,
135
- num_inference_steps,
136
- progress=gr.Progress(track_tqdm=True),
137
- ):
138
- #pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
139
- #pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
140
- #pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
141
  seed = random.randint(0, MAX_SEED)
142
- generator = torch.Generator(device='cuda').manual_seed(seed)
143
  print('-- generating image --')
 
 
144
  sd_image = pipe(
145
- prompt=prompt,
146
- prompt_2=prompt,
147
- prompt_3=prompt,
148
- negative_prompt=negative_prompt_1,
149
- negative_prompt_2=negative_prompt_2,
150
- negative_prompt_3=negative_prompt_3,
151
- guidance_scale=guidance_scale,
152
- num_inference_steps=num_inference_steps,
153
- width=width,
154
- height=height,
155
- # cross_attention_kwargs={"scale": 0.75},
156
- generator=generator,
157
- max_sequence_length=512
158
  ).images[0]
159
  print('-- got image --')
160
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
161
- sd35_path = f"sd35ll32_{timestamp}.png"
162
- sd_image.save(sd35_path,optimize=False,compress_level=0)
163
- #pyx.upload_to_ftp(sd35_path)
164
- upload_to_ftp(sd35_path)
165
- # pipe.unet.to('cpu')
166
- #upscaler_2.to(torch.device('cuda'))
167
  with torch.no_grad():
168
- upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
 
169
  print('-- got upscaled image --')
170
- downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
171
- upscale_path = f"sd35ll_upscale_{timestamp}.png"
172
- downscale2.save(upscale_path,optimize=False,compress_level=0)
173
- #pyx.upload_to_ftp(upscale_path)
174
- upload_to_ftp(upscale_path)
175
- return sd_image, prompt
176
 
177
  @spaces.GPU(duration=70)
178
- def infer_60(
179
- prompt,
180
- negative_prompt_1,
181
- negative_prompt_2,
182
- negative_prompt_3,
183
- width,
184
- height,
185
- guidance_scale,
186
- num_inference_steps,
187
- progress=gr.Progress(track_tqdm=True),
188
- ):
189
- #pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
190
- #pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
191
- #pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
192
  seed = random.randint(0, MAX_SEED)
193
- generator = torch.Generator(device='cuda').manual_seed(seed)
194
  print('-- generating image --')
 
 
195
  sd_image = pipe(
196
- prompt=prompt,
197
- prompt_2=prompt,
198
- prompt_3=prompt,
199
- negative_prompt=negative_prompt_1,
200
- negative_prompt_2=negative_prompt_2,
201
- negative_prompt_3=negative_prompt_3,
202
- guidance_scale=guidance_scale,
203
- num_inference_steps=num_inference_steps,
204
- width=width,
205
- height=height,
206
- # cross_attention_kwargs={"scale": 0.75},
207
- generator=generator,
208
- max_sequence_length=512
209
  ).images[0]
210
  print('-- got image --')
211
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
212
- sd35_path = f"sd35ll32_{timestamp}.png"
213
- sd_image.save(sd35_path,optimize=False,compress_level=0)
214
- #pyx.upload_to_ftp(sd35_path)
215
- upload_to_ftp(sd35_path)
216
- # pipe.unet.to('cpu')
217
- #upscaler_2.to(torch.device('cuda'))
218
  with torch.no_grad():
219
- upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
 
220
  print('-- got upscaled image --')
221
- downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
222
- upscale_path = f"sd35ll_upscale_{timestamp}.png"
223
- downscale2.save(upscale_path,optimize=False,compress_level=0)
224
- #pyx.upload_to_ftp(upscale_path)
225
- upload_to_ftp(upscale_path)
226
- return sd_image, prompt
227
 
228
- @spaces.GPU(duration=100)
229
- def infer_90(
230
- prompt,
231
- negative_prompt_1,
232
- negative_prompt_2,
233
- negative_prompt_3,
234
- width,
235
- height,
236
- guidance_scale,
237
- num_inference_steps,
238
- progress=gr.Progress(track_tqdm=True),
239
- ):
240
- #pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
241
- #pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
242
- #pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
243
  seed = random.randint(0, MAX_SEED)
244
- generator = torch.Generator(device='cuda').manual_seed(seed)
245
  print('-- generating image --')
 
 
246
  sd_image = pipe(
247
- prompt=prompt,
248
- prompt_2=prompt,
249
- prompt_3=prompt,
250
- negative_prompt=negative_prompt_1,
251
- negative_prompt_2=negative_prompt_2,
252
- negative_prompt_3=negative_prompt_3,
253
- guidance_scale=guidance_scale,
254
- num_inference_steps=num_inference_steps,
255
- width=width,
256
- height=height,
257
- # cross_attention_kwargs={"scale": 0.75},
258
- generator=generator,
259
- max_sequence_length=512
260
  ).images[0]
261
  print('-- got image --')
262
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
263
- sd35_path = f"sd35ll32_{timestamp}.png"
264
- sd_image.save(sd35_path,optimize=False,compress_level=0)
265
- #pyx.upload_to_ftp(sd35_path)
266
- upload_to_ftp(sd35_path)
267
- # pipe.unet.to('cpu')
268
- #upscaler_2.to(torch.device('cuda'))
269
  with torch.no_grad():
270
- upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
 
271
  print('-- got upscaled image --')
272
- downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
273
- upscale_path = f"sd35ll_upscale_{timestamp}.png"
274
- downscale2.save(upscale_path,optimize=False,compress_level=0)
275
- #pyx.upload_to_ftp(upscale_path)
276
- upload_to_ftp(upscale_path)
277
- return sd_image, prompt
278
 
279
- @spaces.GPU(duration=110)
280
- def infer_100(
281
- prompt,
282
- negative_prompt_1,
283
- negative_prompt_2,
284
- negative_prompt_3,
285
- width,
286
- height,
287
- guidance_scale,
288
- num_inference_steps,
289
- progress=gr.Progress(track_tqdm=True),
290
- ):
291
- #pipe.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
292
- #pipe.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
293
- #pipe.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
294
- seed = random.randint(0, MAX_SEED)
295
- generator = torch.Generator(device='cuda').manual_seed(seed)
296
- print('-- generating image --')
297
- sd_image = pipe(
298
- prompt=prompt,
299
- prompt_2=prompt,
300
- prompt_3=prompt,
301
- negative_prompt=negative_prompt_1,
302
- negative_prompt_2=negative_prompt_2,
303
- negative_prompt_3=negative_prompt_3,
304
- guidance_scale=guidance_scale,
305
- num_inference_steps=num_inference_steps,
306
- width=width,
307
- height=height,
308
- # cross_attention_kwargs={"scale": 0.75},
309
- generator=generator,
310
- max_sequence_length=512
311
- ).images[0]
312
- print('-- got image --')
313
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
314
- sd35_path = f"sd35ll32_{timestamp}.png"
315
- sd_image.save(sd35_path,optimize=False,compress_level=0)
316
- #pyx.upload_to_ftp(sd35_path)
317
- upload_to_ftp(sd35_path)
318
- # pipe.unet.to('cpu')
319
- #upscaler_2.to(torch.device('cuda'))
320
- with torch.no_grad():
321
- upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
322
- print('-- got upscaled image --')
323
- downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
324
- upscale_path = f"sd35ll_upscale_{timestamp}.png"
325
- downscale2.save(upscale_path,optimize=False,compress_level=0)
326
- #pyx.upload_to_ftp(upscale_path)
327
- upload_to_ftp(upscale_path)
328
- return sd_image, prompt
329
 
330
  css = """
331
  #col-container {margin: 0 auto;max-width: 640px;}
332
  body{background-color: blue;}
333
  """
334
-
335
- with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
336
  with gr.Column(elem_id="col-container"):
337
- gr.Markdown(" # StableDiffusion 3.5 Large with UltraReal lora")
338
- expanded_prompt_output = gr.Textbox(label="Prompt", lines=1) # Add this line
339
  with gr.Row():
340
  prompt = gr.Text(
341
- label="Prompt",
342
- show_label=False,
343
- max_lines=1,
344
- placeholder="Enter your prompt",
345
- container=False,
346
  )
347
- run_button_30 = gr.Button("Run 30", scale=0, variant="primary")
348
- run_button_60 = gr.Button("Run 60", scale=0, variant="primary")
349
- run_button_90 = gr.Button("Run 90", scale=0, variant="primary")
350
- run_button_100 = gr.Button("Run 100", scale=0, variant="primary")
351
- result = gr.Image(label="Result", show_label=False)
 
 
 
 
352
  with gr.Accordion("Advanced Settings", open=True):
353
- negative_prompt_1 = gr.Text(
354
- label="Negative prompt 1",
355
- max_lines=1,
356
- placeholder="Enter a negative prompt",
357
- visible=True,
358
- value="bad anatomy, poorly drawn hands, distorted face, blurry, out of frame, low resolution, grainy, pixelated, disfigured, mutated, extra limbs, bad composition"
359
- )
360
- negative_prompt_2 = gr.Text(
361
- label="Negative prompt 2",
362
- max_lines=1,
363
- placeholder="Enter a second negative prompt",
364
- visible=True,
365
- value="unrealistic, cartoon, anime, sketch, painting, drawing, illustration, graphic, digital art, render, 3d, blurry, deformed, disfigured, poorly drawn, bad anatomy, mutated, extra limbs, ugly, out of frame, bad composition, low resolution, grainy, pixelated, noisy, oversaturated, undersaturated, (worst quality, low quality:1.3), (bad hands, missing fingers:1.2)"
366
- )
367
- negative_prompt_3 = gr.Text(
368
- label="Negative prompt 3",
369
- max_lines=1,
370
- placeholder="Enter a third negative prompt",
371
- visible=True,
372
- value="(worst quality, low quality:1.3), (bad anatomy, bad hands, missing fingers, extra digit, fewer digits:1.2), (blurry:1.1), cropped, watermark, text, signature, logo, jpeg artifacts, (ugly, deformed, disfigured:1.2), (poorly drawn:1.2), mutated, extra limbs, (bad proportions, gross proportions:1.2), (malformed limbs, missing arms, missing legs, extra arms, extra legs:1.2), (fused fingers, too many fingers, long neck:1.2), (unnatural body, unnatural pose:1.1), out of frame, (bad composition, poorly composed:1.1), (oversaturated, undersaturated:1.1), (grainy, pixelated:1.1), (low resolution, noisy:1.1), (unrealistic, distorted:1.1), (extra fingers, mutated hands, poorly drawn hands, bad hands:1.3), (missing fingers:1.3)"
373
- )
374
- num_iterations = gr.Number(
375
- value=1000,
376
- label="Number of Iterations")
377
  with gr.Row():
378
- width = gr.Slider(
379
- label="Width",
380
- minimum=256,
381
- maximum=MAX_IMAGE_SIZE,
382
- step=32,
383
- value=768,
384
- )
385
- height = gr.Slider(
386
- label="Height",
387
- minimum=256,
388
- maximum=MAX_IMAGE_SIZE,
389
- step=32,
390
- value=768,
391
- )
392
- guidance_scale = gr.Slider(
393
- label="Guidance scale",
394
- minimum=0.0,
395
- maximum=30.0,
396
- step=0.1,
397
- value=4.2,
398
- )
399
- num_inference_steps = gr.Slider(
400
- label="Number of inference steps",
401
- minimum=1,
402
- maximum=500,
403
- step=1,
404
- value=50,
405
- )
406
- gr.on(
407
- triggers=[run_button_30.click, prompt.submit],
408
- fn=infer_30,
409
- inputs=[
410
- prompt,
411
- negative_prompt_1,
412
- negative_prompt_2,
413
- negative_prompt_3,
414
- width,
415
- height,
416
- guidance_scale,
417
- num_inference_steps,
418
- ],
419
- outputs=[result, expanded_prompt_output],
420
- )
421
- gr.on(
422
- triggers=[run_button_60.click, prompt.submit],
423
- fn=infer_60,
424
- inputs=[
425
- prompt,
426
- negative_prompt_1,
427
- negative_prompt_2,
428
- negative_prompt_3,
429
- width,
430
- height,
431
- guidance_scale,
432
- num_inference_steps,
433
- ],
434
- outputs=[result, expanded_prompt_output],
435
  )
436
- gr.on(
437
- triggers=[run_button_90.click, prompt.submit],
438
- fn=infer_90,
439
- inputs=[
440
- prompt,
441
- negative_prompt_1,
442
- negative_prompt_2,
443
- negative_prompt_3,
444
- width,
445
- height,
446
- guidance_scale,
447
- num_inference_steps,
448
- ],
449
- outputs=[result, expanded_prompt_output],
 
450
  )
451
- gr.on(
452
- triggers=[run_button_100.click, prompt.submit],
453
- fn=infer_100,
454
- inputs=[
455
- prompt,
456
- negative_prompt_1,
457
- negative_prompt_2,
458
- negative_prompt_3,
459
- width,
460
- height,
461
- guidance_scale,
462
- num_inference_steps,
463
- ],
464
- outputs=[result, expanded_prompt_output],
 
465
  )
 
466
 
467
  if __name__ == "__main__":
468
  demo.launch()
 
1
+ import subprocess
2
+ subprocess.run(['sh', './spaces.sh'])
3
 
4
+ import os
5
+ # Environment variable setup
6
+ os.environ['PYTORCH_NVML_BASED_CUDA_CHECK'] = '1'
7
+ os.environ['TORCH_LINALG_PREFER_CUSOLVER'] = '1'
8
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,pinned_use_background_threads:True'
9
+ os.environ["SAFETENSORS_FAST_GPU"] = "1"
10
+ os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'
 
 
11
 
12
+ import spaces
13
  import gradio as gr
14
  import numpy as np
15
  import random
16
+ import datetime
17
+ import threading
18
+ import io
19
 
20
+ # --- New GCS Imports ---
21
+ from google.oauth2 import service_account
22
+ from google.cloud import storage
 
23
 
24
+ import torch
 
 
 
 
 
 
 
25
 
26
+ @spaces.GPU(required=True)
27
+ def install_flashattn():
28
+ subprocess.run(['sh', './flashattn.sh'])
29
 
30
+ #install_flashattn()
31
 
32
+ # Torch performance settings
33
  torch.backends.cuda.matmul.allow_tf32 = False
34
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
35
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
 
40
  torch.backends.cuda.preferred_linalg_library="cusolver"
41
  torch.set_float32_matmul_precision("highest")
42
 
43
+ from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel, AutoencoderKL
44
+ from PIL import Image
45
+ from image_gen_aux import UpscaleWithModel
 
 
 
46
 
47
+ # --- GCS Configuration ---
48
+ # Make sure to set these secrets in your Hugging Face Space settings
49
+ GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
50
+ GCS_SA_KEY = os.getenv("GCS_SA_KEY") # The full JSON key content as a string
51
 
52
+ # Initialize GCS client if credentials are available
53
+ gcs_client = None
54
+ if GCS_SA_KEY and GCS_BUCKET_NAME:
55
  try:
56
+ credentials_info = eval(GCS_SA_KEY) # Using eval is safe here if you trust the secret source
57
+ credentials = service_account.Credentials.from_service_account_info(credentials_info)
58
+ gcs_client = storage.Client(credentials=credentials)
59
+ print("✅ GCS Client initialized successfully.")
 
 
 
 
60
  except Exception as e:
61
+ print(f" Failed to initialize GCS client: {e}")
 
62
 
63
+ def upload_to_gcs(image_object, filename):
64
+ if not gcs_client:
65
+ print("⚠️ GCS client not initialized. Skipping upload.")
66
+ return
67
+ try:
68
+ print(f"--> Starting GCS upload for {filename}...")
69
+ bucket = gcs_client.bucket(GCS_BUCKET_NAME)
70
+ blob = bucket.blob(f"stablediff/{filename}")
71
+ img_byte_arr = io.BytesIO()
72
+ image_object.save(img_byte_arr, format='PNG', optimize=False, compress_level=0)
73
+ img_byte_arr = img_byte_arr.getvalue()
74
+ blob.upload_from_string(img_byte_arr, content_type='image/png')
75
+ print(f"✅ Successfully uploaded {filename} to GCS.")
76
+ except Exception as e:
77
+ print(f"❌ An error occurred during GCS upload: {e}")
78
 
79
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ def load_model():
82
+ pipe = StableDiffusion3Pipeline.from_pretrained(
83
+ "ford442/stable-diffusion-3.5-large-bf16",
84
+ trust_remote_code=True,
85
+ transformer=None, # Load transformer separately
86
+ use_safetensors=True
87
+ )
88
+ ll_transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer').to(device, dtype=torch.bfloat16)
89
+ pipe.transformer=ll_transformer
90
+ pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
91
+ pipe.to(device=device, dtype=torch.bfloat16)
92
+ upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(device)
93
+ return pipe, upscaler_2
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ pipe, upscaler_2 = load_model()
 
 
 
 
96
 
97
  MAX_SEED = np.iinfo(np.int32).max
 
98
  MAX_IMAGE_SIZE = 4096
99
 
100
+
101
+ @spaces.GPU(duration=45)
102
+ def generate_images_30(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
103
  seed = random.randint(0, MAX_SEED)
104
+ generator = torch.Generator(device=device).manual_seed(seed)
105
  print('-- generating image --')
106
+ torch.cuda.empty_cache()
107
+ torch.cuda.reset_peak_memory_stats()
108
  sd_image = pipe(
109
+ prompt=prompt, prompt_2=prompt, prompt_3=prompt,
110
+ negative_prompt=neg_prompt_1, negative_prompt_2=neg_prompt_2, negative_prompt_3=neg_prompt_3,
111
+ guidance_scale=guidance, num_inference_steps=steps,
112
+ width=width, height=height, generator=generator,
113
+ max_sequence_length=384
 
 
 
 
 
 
 
 
114
  ).images[0]
115
  print('-- got image --')
116
+ torch.cuda.empty_cache()
117
+ torch.cuda.reset_peak_memory_stats()
 
 
 
 
 
118
  with torch.no_grad():
119
+ upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
120
+ upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
121
  print('-- got upscaled image --')
122
+ downscaled_upscale = upscale2.resize((upscale2.width // 16, upscale2.height // 16), Image.LANCZOS)
123
+ return sd_image, downscaled_upscale, prompt
 
 
 
 
124
 
125
  @spaces.GPU(duration=70)
126
+ def generate_images_60(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  seed = random.randint(0, MAX_SEED)
128
+ generator = torch.Generator(device=device).manual_seed(seed)
129
  print('-- generating image --')
130
+ torch.cuda.empty_cache()
131
+ torch.cuda.reset_peak_memory_stats()
132
  sd_image = pipe(
133
+ prompt=prompt, prompt_2=prompt, prompt_3=prompt,
134
+ negative_prompt=neg_prompt_1, negative_prompt_2=neg_prompt_2, negative_prompt_3=neg_prompt_3,
135
+ guidance_scale=guidance, num_inference_steps=steps,
136
+ width=width, height=height, generator=generator,
137
+ max_sequence_length=384
 
 
 
 
 
 
 
 
138
  ).images[0]
139
  print('-- got image --')
140
+ torch.cuda.empty_cache()
141
+ torch.cuda.reset_peak_memory_stats()
 
 
 
 
 
142
  with torch.no_grad():
143
+ upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
144
+ upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
145
  print('-- got upscaled image --')
146
+ downscaled_upscale = upscale2.resize((upscale2.width // 16, upscale2.height // 16), Image.LANCZOS)
147
+ return sd_image, downscaled_upscale, prompt
 
 
 
 
148
 
149
+ @spaces.GPU(duration=120)
150
+ def generate_images_110(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  seed = random.randint(0, MAX_SEED)
152
+ generator = torch.Generator(device=device).manual_seed(seed)
153
  print('-- generating image --')
154
+ torch.cuda.empty_cache()
155
+ torch.cuda.reset_peak_memory_stats()
156
  sd_image = pipe(
157
+ prompt=prompt, prompt_2=prompt, prompt_3=prompt,
158
+ negative_prompt=neg_prompt_1, negative_prompt_2=neg_prompt_2, negative_prompt_3=neg_prompt_3,
159
+ guidance_scale=guidance, num_inference_steps=steps,
160
+ width=width, height=height, generator=generator,
161
+ max_sequence_length=384
 
 
 
 
 
 
 
 
162
  ).images[0]
163
  print('-- got image --')
164
+ torch.cuda.empty_cache()
165
+ torch.cuda.reset_peak_memory_stats()
 
 
 
 
 
166
  with torch.no_grad():
167
+ upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
168
+ upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
169
  print('-- got upscaled image --')
170
+ downscaled_upscale = upscale2.resize((upscale2.width // 16, upscale2.height // 16), Image.LANCZOS)
171
+ return sd_image, downscaled_upscale, prompt
 
 
 
 
172
 
173
+ def run_inference_and_upload_30(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, save_consent, progress=gr.Progress(track_tqdm=True)):
174
+ sd_image, upscaled_image, expanded_prompt = generate_images_30(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, progress)
175
+ if save_consent:
176
+ print("✅ User consented to save. Preparing uploads...")
177
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
178
+ sd_filename = f"sd35ll_{timestamp}.png"
179
+ upscale_filename = f"sd35ll_upscale_{timestamp}.png"
180
+ sd_thread = threading.Thread(target=upload_to_gcs, args=(sd_image, sd_filename))
181
+ upscale_thread = threading.Thread(target=upload_to_gcs, args=(upscaled_image, upscale_filename))
182
+ sd_thread.start()
183
+ upscale_thread.start()
184
+ else:
185
+ print("ℹ️ User did not consent to save. Skipping upload.")
186
+ return sd_image, expanded_prompt
187
+
188
+ def run_inference_and_upload_60(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, save_consent, progress=gr.Progress(track_tqdm=True)):
189
+ sd_image, upscaled_image, expanded_prompt = generate_images_60(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, progress)
190
+ if save_consent:
191
+ print("✅ User consented to save. Preparing uploads...")
192
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
193
+ sd_filename = f"sd35ll_{timestamp}.png"
194
+ upscale_filename = f"sd35ll_upscale_{timestamp}.png"
195
+ sd_thread = threading.Thread(target=upload_to_gcs, args=(sd_image, sd_filename))
196
+ upscale_thread = threading.Thread(target=upload_to_gcs, args=(upscaled_image, upscale_filename))
197
+ sd_thread.start()
198
+ upscale_thread.start()
199
+ else:
200
+ print("ℹ️ User did not consent to save. Skipping upload.")
201
+ return sd_image, expanded_prompt
202
+
203
+ def run_inference_and_upload_110(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, save_consent, progress=gr.Progress(track_tqdm=True)):
204
+ sd_image, upscaled_image, expanded_prompt = generate_images_110(prompt, neg_prompt_1, neg_prompt_2, neg_prompt_3, width, height, guidance, steps, progress)
205
+ if save_consent:
206
+ print("✅ User consented to save. Preparing uploads...")
207
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
208
+ sd_filename = f"sd35ll_{timestamp}.png"
209
+ upscale_filename = f"sd35ll_upscale_{timestamp}.png"
210
+ sd_thread = threading.Thread(target=upload_to_gcs, args=(sd_image, sd_filename))
211
+ upscale_thread = threading.Thread(target=upload_to_gcs, args=(upscaled_image, upscale_filename))
212
+ sd_thread.start()
213
+ upscale_thread.start()
214
+ else:
215
+ print("ℹ️ User did not consent to save. Skipping upload.")
216
+ return sd_image, expanded_prompt
 
 
 
 
 
 
217
 
218
  css = """
219
  #col-container {margin: 0 auto;max-width: 640px;}
220
  body{background-color: blue;}
221
  """
222
+ with gr.Blocks(theme=gr.themes.Origin(), css=css) as demo:
 
223
  with gr.Column(elem_id="col-container"):
224
+ gr.Markdown(" # StableDiffusion 3.5 Large with UltraReal lora test")
225
+ expanded_prompt_output = gr.Textbox(label="Prompt", lines=1)
226
  with gr.Row():
227
  prompt = gr.Text(
228
+ label="Prompt", show_label=False, max_lines=1,
229
+ placeholder="Enter your prompt", container=False,
 
 
 
230
  )
231
+ run_button_30 = gr.Button("Run30", scale=0, variant="primary")
232
+ run_button_60 = gr.Button("Run60", scale=0, variant="primary")
233
+ run_button_110 = gr.Button("Run100", scale=0, variant="primary")
234
+ result = gr.Image(label="Result", show_label=False, type="pil")
235
+ save_consent_checkbox = gr.Checkbox(
236
+ label="✅ Anonymously upload result to a public gallery",
237
+ value=True, # Default to not uploading
238
+ info="Check this box to help us by contributing your image."
239
+ )
240
  with gr.Accordion("Advanced Settings", open=True):
241
+ negative_prompt_1 = gr.Text(label="Negative prompt 1", max_lines=1, placeholder="Enter a negative prompt", value="bad anatomy, poorly drawn hands, distorted face, blurry, out of frame, low resolution, grainy, pixelated, disfigured, mutated, extra limbs, bad composition")
242
+ negative_prompt_2 = gr.Text(label="Negative prompt 2", max_lines=1, placeholder="Enter a second negative prompt", value="unrealistic, cartoon, anime, sketch, painting, drawing, illustration, graphic, digital art, render, 3d, blurry, deformed, disfigured, poorly drawn, bad anatomy, mutated, extra limbs, ugly, out of frame, bad composition, low resolution, grainy, pixelated, noisy, oversaturated, undersaturated, (worst quality, low quality:1.3), (bad hands, missing fingers:1.2)")
243
+ negative_prompt_3 = gr.Text(label="Negative prompt 3", max_lines=1, placeholder="Enter a third negative prompt", value="(worst quality, low quality:1.3), (bad anatomy, bad hands, missing fingers, extra digit, fewer digits:1.2), (blurry:1.1), cropped, watermark, text, signature, logo, jpeg artifacts, (ugly, deformed, disfigured:1.2), (poorly drawn:1.2), mutated, extra limbs, (bad proportions, gross proportions:1.2), (malformed limbs, missing arms, missing legs, extra arms, extra legs:1.2), (fused fingers, too many fingers, long neck:1.2), (unnatural body, unnatural pose:1.1), out of frame, (bad composition, poorly composed:1.1), (oversaturated, undersaturated:1.1), (grainy, pixelated:1.1), (low resolution, noisy:1.1), (unrealistic, distorted:1.1), (extra fingers, mutated hands, poorly drawn hands, bad hands:1.3), (missing fingers:1.3)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  with gr.Row():
245
+ width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
246
+ height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
247
+ with gr.Row():
248
+ guidance_scale = gr.Slider(label="Guidance scale", minimum=0.0, maximum=30.0, step=0.1, value=4.2)
249
+ num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=150, step=1, value=60)
250
+
251
+ run_button_30.click(
252
+ fn=run_inference_and_upload_30,
253
+ inputs=[
254
+ prompt,
255
+ negative_prompt_1,
256
+ negative_prompt_2,
257
+ negative_prompt_3,
258
+ width,
259
+ height,
260
+ guidance_scale,
261
+ num_inference_steps,
262
+ save_consent_checkbox # Pass the checkbox value
263
+ ],
264
+ outputs=[result, expanded_prompt_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  )
266
+
267
+ run_button_60.click(
268
+ fn=run_inference_and_upload_60,
269
+ inputs=[
270
+ prompt,
271
+ negative_prompt_1,
272
+ negative_prompt_2,
273
+ negative_prompt_3,
274
+ width,
275
+ height,
276
+ guidance_scale,
277
+ num_inference_steps,
278
+ save_consent_checkbox # Pass the checkbox value
279
+ ],
280
+ outputs=[result, expanded_prompt_output],
281
  )
282
+
283
+ run_button_110.click(
284
+ fn=run_inference_and_upload_110,
285
+ inputs=[
286
+ prompt,
287
+ negative_prompt_1,
288
+ negative_prompt_2,
289
+ negative_prompt_3,
290
+ width,
291
+ height,
292
+ guidance_scale,
293
+ num_inference_steps,
294
+ save_consent_checkbox # Pass the checkbox value
295
+ ],
296
+ outputs=[result, expanded_prompt_output],
297
  )
298
+
299
 
300
  if __name__ == "__main__":
301
  demo.launch()